1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #include <linux/ceph/ceph_debug.h> 4 5 #include <linux/backing-dev.h> 6 #include <linux/ctype.h> 7 #include <linux/fs.h> 8 #include <linux/inet.h> 9 #include <linux/in6.h> 10 #include <linux/module.h> 11 #include <linux/mount.h> 12 #include <linux/fs_context.h> 13 #include <linux/fs_parser.h> 14 #include <linux/sched.h> 15 #include <linux/seq_file.h> 16 #include <linux/slab.h> 17 #include <linux/statfs.h> 18 #include <linux/string.h> 19 20 #include "super.h" 21 #include "mds_client.h" 22 #include "cache.h" 23 24 #include <linux/ceph/ceph_features.h> 25 #include <linux/ceph/decode.h> 26 #include <linux/ceph/mon_client.h> 27 #include <linux/ceph/auth.h> 28 #include <linux/ceph/debugfs.h> 29 30 /* 31 * Ceph superblock operations 32 * 33 * Handle the basics of mounting, unmounting. 34 */ 35 36 /* 37 * super ops 38 */ 39 static void ceph_put_super(struct super_block *s) 40 { 41 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 42 43 dout("put_super\n"); 44 ceph_mdsc_close_sessions(fsc->mdsc); 45 } 46 47 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 48 { 49 struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); 50 struct ceph_mon_client *monc = &fsc->client->monc; 51 struct ceph_statfs st; 52 u64 fsid; 53 int err; 54 u64 data_pool; 55 56 if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { 57 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; 58 } else { 59 data_pool = CEPH_NOPOOL; 60 } 61 62 dout("statfs\n"); 63 err = ceph_monc_do_statfs(monc, data_pool, &st); 64 if (err < 0) 65 return err; 66 67 /* fill in kstatfs */ 68 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 69 70 /* 71 * express utilization in terms of large blocks to avoid 72 * overflow on 32-bit machines. 73 * 74 * NOTE: for the time being, we make bsize == frsize to humor 75 * not-yet-ancient versions of glibc that are broken. 76 * Someday, we will probably want to report a real block 77 * size... whatever that may mean for a network file system! 78 */ 79 buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; 80 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; 81 82 /* 83 * By default use root quota for stats; fallback to overall filesystem 84 * usage if using 'noquotadf' mount option or if the root dir doesn't 85 * have max_bytes quota set. 86 */ 87 if (ceph_test_mount_opt(fsc, NOQUOTADF) || 88 !ceph_quota_update_statfs(fsc, buf)) { 89 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 90 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 91 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 92 } 93 94 buf->f_files = le64_to_cpu(st.num_objects); 95 buf->f_ffree = -1; 96 buf->f_namelen = NAME_MAX; 97 98 /* Must convert the fsid, for consistent values across arches */ 99 mutex_lock(&monc->mutex); 100 fsid = le64_to_cpu(*(__le64 *)(&monc->monmap->fsid)) ^ 101 le64_to_cpu(*((__le64 *)&monc->monmap->fsid + 1)); 102 mutex_unlock(&monc->mutex); 103 104 buf->f_fsid.val[0] = fsid & 0xffffffff; 105 buf->f_fsid.val[1] = fsid >> 32; 106 107 return 0; 108 } 109 110 static int ceph_sync_fs(struct super_block *sb, int wait) 111 { 112 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 113 114 if (!wait) { 115 dout("sync_fs (non-blocking)\n"); 116 ceph_flush_dirty_caps(fsc->mdsc); 117 dout("sync_fs (non-blocking) done\n"); 118 return 0; 119 } 120 121 dout("sync_fs (blocking)\n"); 122 ceph_osdc_sync(&fsc->client->osdc); 123 ceph_mdsc_sync(fsc->mdsc); 124 dout("sync_fs (blocking) done\n"); 125 return 0; 126 } 127 128 /* 129 * mount options 130 */ 131 enum { 132 Opt_wsize, 133 Opt_rsize, 134 Opt_rasize, 135 Opt_caps_wanted_delay_min, 136 Opt_caps_wanted_delay_max, 137 Opt_caps_max, 138 Opt_readdir_max_entries, 139 Opt_readdir_max_bytes, 140 Opt_congestion_kb, 141 /* int args above */ 142 Opt_snapdirname, 143 Opt_mds_namespace, 144 Opt_recover_session, 145 Opt_source, 146 /* string args above */ 147 Opt_dirstat, 148 Opt_rbytes, 149 Opt_asyncreaddir, 150 Opt_dcache, 151 Opt_ino32, 152 Opt_fscache, 153 Opt_poolperm, 154 Opt_require_active_mds, 155 Opt_acl, 156 Opt_quotadf, 157 Opt_copyfrom, 158 Opt_wsync, 159 }; 160 161 enum ceph_recover_session_mode { 162 ceph_recover_session_no, 163 ceph_recover_session_clean 164 }; 165 166 static const struct constant_table ceph_param_recover[] = { 167 { "no", ceph_recover_session_no }, 168 { "clean", ceph_recover_session_clean }, 169 {} 170 }; 171 172 static const struct fs_parameter_spec ceph_mount_parameters[] = { 173 fsparam_flag_no ("acl", Opt_acl), 174 fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir), 175 fsparam_s32 ("caps_max", Opt_caps_max), 176 fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max), 177 fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min), 178 fsparam_u32 ("write_congestion_kb", Opt_congestion_kb), 179 fsparam_flag_no ("copyfrom", Opt_copyfrom), 180 fsparam_flag_no ("dcache", Opt_dcache), 181 fsparam_flag_no ("dirstat", Opt_dirstat), 182 fsparam_flag_no ("fsc", Opt_fscache), // fsc|nofsc 183 fsparam_string ("fsc", Opt_fscache), // fsc=... 184 fsparam_flag_no ("ino32", Opt_ino32), 185 fsparam_string ("mds_namespace", Opt_mds_namespace), 186 fsparam_flag_no ("poolperm", Opt_poolperm), 187 fsparam_flag_no ("quotadf", Opt_quotadf), 188 fsparam_u32 ("rasize", Opt_rasize), 189 fsparam_flag_no ("rbytes", Opt_rbytes), 190 fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes), 191 fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries), 192 fsparam_enum ("recover_session", Opt_recover_session, ceph_param_recover), 193 fsparam_flag_no ("require_active_mds", Opt_require_active_mds), 194 fsparam_u32 ("rsize", Opt_rsize), 195 fsparam_string ("snapdirname", Opt_snapdirname), 196 fsparam_string ("source", Opt_source), 197 fsparam_u32 ("wsize", Opt_wsize), 198 fsparam_flag_no ("wsync", Opt_wsync), 199 {} 200 }; 201 202 struct ceph_parse_opts_ctx { 203 struct ceph_options *copts; 204 struct ceph_mount_options *opts; 205 }; 206 207 /* 208 * Remove adjacent slashes and then the trailing slash, unless it is 209 * the only remaining character. 210 * 211 * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/". 212 */ 213 static void canonicalize_path(char *path) 214 { 215 int i, j = 0; 216 217 for (i = 0; path[i] != '\0'; i++) { 218 if (path[i] != '/' || j < 1 || path[j - 1] != '/') 219 path[j++] = path[i]; 220 } 221 222 if (j > 1 && path[j - 1] == '/') 223 j--; 224 path[j] = '\0'; 225 } 226 227 /* 228 * Parse the source parameter. Distinguish the server list from the path. 229 * 230 * The source will look like: 231 * <server_spec>[,<server_spec>...]:[<path>] 232 * where 233 * <server_spec> is <ip>[:<port>] 234 * <path> is optional, but if present must begin with '/' 235 */ 236 static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) 237 { 238 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 239 struct ceph_mount_options *fsopt = pctx->opts; 240 char *dev_name = param->string, *dev_name_end; 241 int ret; 242 243 dout("%s '%s'\n", __func__, dev_name); 244 if (!dev_name || !*dev_name) 245 return invalfc(fc, "Empty source"); 246 247 dev_name_end = strchr(dev_name, '/'); 248 if (dev_name_end) { 249 /* 250 * The server_path will include the whole chars from userland 251 * including the leading '/'. 252 */ 253 kfree(fsopt->server_path); 254 fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); 255 if (!fsopt->server_path) 256 return -ENOMEM; 257 258 canonicalize_path(fsopt->server_path); 259 } else { 260 dev_name_end = dev_name + strlen(dev_name); 261 } 262 263 dev_name_end--; /* back up to ':' separator */ 264 if (dev_name_end < dev_name || *dev_name_end != ':') 265 return invalfc(fc, "No path or : separator in source"); 266 267 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 268 if (fsopt->server_path) 269 dout("server path '%s'\n", fsopt->server_path); 270 271 ret = ceph_parse_mon_ips(param->string, dev_name_end - dev_name, 272 pctx->copts, fc->log.log); 273 if (ret) 274 return ret; 275 276 fc->source = param->string; 277 param->string = NULL; 278 return 0; 279 } 280 281 static int ceph_parse_mount_param(struct fs_context *fc, 282 struct fs_parameter *param) 283 { 284 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 285 struct ceph_mount_options *fsopt = pctx->opts; 286 struct fs_parse_result result; 287 unsigned int mode; 288 int token, ret; 289 290 ret = ceph_parse_param(param, pctx->copts, fc->log.log); 291 if (ret != -ENOPARAM) 292 return ret; 293 294 token = fs_parse(fc, ceph_mount_parameters, param, &result); 295 dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); 296 if (token < 0) 297 return token; 298 299 switch (token) { 300 case Opt_snapdirname: 301 kfree(fsopt->snapdir_name); 302 fsopt->snapdir_name = param->string; 303 param->string = NULL; 304 break; 305 case Opt_mds_namespace: 306 kfree(fsopt->mds_namespace); 307 fsopt->mds_namespace = param->string; 308 param->string = NULL; 309 break; 310 case Opt_recover_session: 311 mode = result.uint_32; 312 if (mode == ceph_recover_session_no) 313 fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER; 314 else if (mode == ceph_recover_session_clean) 315 fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER; 316 else 317 BUG(); 318 break; 319 case Opt_source: 320 if (fc->source) 321 return invalfc(fc, "Multiple sources specified"); 322 return ceph_parse_source(param, fc); 323 case Opt_wsize: 324 if (result.uint_32 < PAGE_SIZE || 325 result.uint_32 > CEPH_MAX_WRITE_SIZE) 326 goto out_of_range; 327 fsopt->wsize = ALIGN(result.uint_32, PAGE_SIZE); 328 break; 329 case Opt_rsize: 330 if (result.uint_32 < PAGE_SIZE || 331 result.uint_32 > CEPH_MAX_READ_SIZE) 332 goto out_of_range; 333 fsopt->rsize = ALIGN(result.uint_32, PAGE_SIZE); 334 break; 335 case Opt_rasize: 336 fsopt->rasize = ALIGN(result.uint_32, PAGE_SIZE); 337 break; 338 case Opt_caps_wanted_delay_min: 339 if (result.uint_32 < 1) 340 goto out_of_range; 341 fsopt->caps_wanted_delay_min = result.uint_32; 342 break; 343 case Opt_caps_wanted_delay_max: 344 if (result.uint_32 < 1) 345 goto out_of_range; 346 fsopt->caps_wanted_delay_max = result.uint_32; 347 break; 348 case Opt_caps_max: 349 if (result.int_32 < 0) 350 goto out_of_range; 351 fsopt->caps_max = result.int_32; 352 break; 353 case Opt_readdir_max_entries: 354 if (result.uint_32 < 1) 355 goto out_of_range; 356 fsopt->max_readdir = result.uint_32; 357 break; 358 case Opt_readdir_max_bytes: 359 if (result.uint_32 < PAGE_SIZE && result.uint_32 != 0) 360 goto out_of_range; 361 fsopt->max_readdir_bytes = result.uint_32; 362 break; 363 case Opt_congestion_kb: 364 if (result.uint_32 < 1024) /* at least 1M */ 365 goto out_of_range; 366 fsopt->congestion_kb = result.uint_32; 367 break; 368 case Opt_dirstat: 369 if (!result.negated) 370 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 371 else 372 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 373 break; 374 case Opt_rbytes: 375 if (!result.negated) 376 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 377 else 378 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 379 break; 380 case Opt_asyncreaddir: 381 if (!result.negated) 382 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 383 else 384 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 385 break; 386 case Opt_dcache: 387 if (!result.negated) 388 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 389 else 390 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 391 break; 392 case Opt_ino32: 393 if (!result.negated) 394 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 395 else 396 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 397 break; 398 399 case Opt_fscache: 400 #ifdef CONFIG_CEPH_FSCACHE 401 kfree(fsopt->fscache_uniq); 402 fsopt->fscache_uniq = NULL; 403 if (result.negated) { 404 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; 405 } else { 406 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 407 fsopt->fscache_uniq = param->string; 408 param->string = NULL; 409 } 410 break; 411 #else 412 return invalfc(fc, "fscache support is disabled"); 413 #endif 414 case Opt_poolperm: 415 if (!result.negated) 416 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; 417 else 418 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; 419 break; 420 case Opt_require_active_mds: 421 if (!result.negated) 422 fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; 423 else 424 fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; 425 break; 426 case Opt_quotadf: 427 if (!result.negated) 428 fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; 429 else 430 fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; 431 break; 432 case Opt_copyfrom: 433 if (!result.negated) 434 fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM; 435 else 436 fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM; 437 break; 438 case Opt_acl: 439 if (!result.negated) { 440 #ifdef CONFIG_CEPH_FS_POSIX_ACL 441 fc->sb_flags |= SB_POSIXACL; 442 #else 443 return invalfc(fc, "POSIX ACL support is disabled"); 444 #endif 445 } else { 446 fc->sb_flags &= ~SB_POSIXACL; 447 } 448 break; 449 case Opt_wsync: 450 if (!result.negated) 451 fsopt->flags &= ~CEPH_MOUNT_OPT_ASYNC_DIROPS; 452 else 453 fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS; 454 break; 455 default: 456 BUG(); 457 } 458 return 0; 459 460 out_of_range: 461 return invalfc(fc, "%s out of range", param->key); 462 } 463 464 static void destroy_mount_options(struct ceph_mount_options *args) 465 { 466 dout("destroy_mount_options %p\n", args); 467 if (!args) 468 return; 469 470 kfree(args->snapdir_name); 471 kfree(args->mds_namespace); 472 kfree(args->server_path); 473 kfree(args->fscache_uniq); 474 kfree(args); 475 } 476 477 static int strcmp_null(const char *s1, const char *s2) 478 { 479 if (!s1 && !s2) 480 return 0; 481 if (s1 && !s2) 482 return -1; 483 if (!s1 && s2) 484 return 1; 485 return strcmp(s1, s2); 486 } 487 488 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 489 struct ceph_options *new_opt, 490 struct ceph_fs_client *fsc) 491 { 492 struct ceph_mount_options *fsopt1 = new_fsopt; 493 struct ceph_mount_options *fsopt2 = fsc->mount_options; 494 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 495 int ret; 496 497 ret = memcmp(fsopt1, fsopt2, ofs); 498 if (ret) 499 return ret; 500 501 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 502 if (ret) 503 return ret; 504 505 ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); 506 if (ret) 507 return ret; 508 509 ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); 510 if (ret) 511 return ret; 512 513 ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); 514 if (ret) 515 return ret; 516 517 return ceph_compare_options(new_opt, fsc->client); 518 } 519 520 /** 521 * ceph_show_options - Show mount options in /proc/mounts 522 * @m: seq_file to write to 523 * @root: root of that (sub)tree 524 */ 525 static int ceph_show_options(struct seq_file *m, struct dentry *root) 526 { 527 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 528 struct ceph_mount_options *fsopt = fsc->mount_options; 529 size_t pos; 530 int ret; 531 532 /* a comma between MNT/MS and client options */ 533 seq_putc(m, ','); 534 pos = m->count; 535 536 ret = ceph_print_client_options(m, fsc->client, false); 537 if (ret) 538 return ret; 539 540 /* retract our comma if no client options */ 541 if (m->count == pos) 542 m->count--; 543 544 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 545 seq_puts(m, ",dirstat"); 546 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) 547 seq_puts(m, ",rbytes"); 548 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 549 seq_puts(m, ",noasyncreaddir"); 550 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) 551 seq_puts(m, ",nodcache"); 552 if (fsopt->flags & CEPH_MOUNT_OPT_INO32) 553 seq_puts(m, ",ino32"); 554 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { 555 seq_show_option(m, "fsc", fsopt->fscache_uniq); 556 } 557 if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) 558 seq_puts(m, ",nopoolperm"); 559 if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) 560 seq_puts(m, ",noquotadf"); 561 562 #ifdef CONFIG_CEPH_FS_POSIX_ACL 563 if (root->d_sb->s_flags & SB_POSIXACL) 564 seq_puts(m, ",acl"); 565 else 566 seq_puts(m, ",noacl"); 567 #endif 568 569 if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0) 570 seq_puts(m, ",copyfrom"); 571 572 if (fsopt->mds_namespace) 573 seq_show_option(m, "mds_namespace", fsopt->mds_namespace); 574 575 if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER) 576 seq_show_option(m, "recover_session", "clean"); 577 578 if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS) 579 seq_puts(m, ",nowsync"); 580 581 if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) 582 seq_printf(m, ",wsize=%u", fsopt->wsize); 583 if (fsopt->rsize != CEPH_MAX_READ_SIZE) 584 seq_printf(m, ",rsize=%u", fsopt->rsize); 585 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 586 seq_printf(m, ",rasize=%u", fsopt->rasize); 587 if (fsopt->congestion_kb != default_congestion_kb()) 588 seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb); 589 if (fsopt->caps_max) 590 seq_printf(m, ",caps_max=%d", fsopt->caps_max); 591 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 592 seq_printf(m, ",caps_wanted_delay_min=%u", 593 fsopt->caps_wanted_delay_min); 594 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 595 seq_printf(m, ",caps_wanted_delay_max=%u", 596 fsopt->caps_wanted_delay_max); 597 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 598 seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir); 599 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 600 seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes); 601 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 602 seq_show_option(m, "snapdirname", fsopt->snapdir_name); 603 604 return 0; 605 } 606 607 /* 608 * handle any mon messages the standard library doesn't understand. 609 * return error if we don't either. 610 */ 611 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 612 { 613 struct ceph_fs_client *fsc = client->private; 614 int type = le16_to_cpu(msg->hdr.type); 615 616 switch (type) { 617 case CEPH_MSG_MDS_MAP: 618 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); 619 return 0; 620 case CEPH_MSG_FS_MAP_USER: 621 ceph_mdsc_handle_fsmap(fsc->mdsc, msg); 622 return 0; 623 default: 624 return -1; 625 } 626 } 627 628 /* 629 * create a new fs client 630 * 631 * Success or not, this function consumes @fsopt and @opt. 632 */ 633 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 634 struct ceph_options *opt) 635 { 636 struct ceph_fs_client *fsc; 637 int page_count; 638 size_t size; 639 int err; 640 641 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 642 if (!fsc) { 643 err = -ENOMEM; 644 goto fail; 645 } 646 647 fsc->client = ceph_create_client(opt, fsc); 648 if (IS_ERR(fsc->client)) { 649 err = PTR_ERR(fsc->client); 650 goto fail; 651 } 652 opt = NULL; /* fsc->client now owns this */ 653 654 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 655 ceph_set_opt(fsc->client, ABORT_ON_FULL); 656 657 if (!fsopt->mds_namespace) { 658 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 659 0, true); 660 } else { 661 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, 662 0, false); 663 } 664 665 fsc->mount_options = fsopt; 666 667 fsc->sb = NULL; 668 fsc->mount_state = CEPH_MOUNT_MOUNTING; 669 fsc->filp_gen = 1; 670 fsc->have_copy_from2 = true; 671 672 atomic_long_set(&fsc->writeback_count, 0); 673 674 err = -ENOMEM; 675 /* 676 * The number of concurrent works can be high but they don't need 677 * to be processed in parallel, limit concurrency. 678 */ 679 fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0); 680 if (!fsc->inode_wq) 681 goto fail_client; 682 fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); 683 if (!fsc->cap_wq) 684 goto fail_inode_wq; 685 686 /* set up mempools */ 687 err = -ENOMEM; 688 page_count = fsc->mount_options->wsize >> PAGE_SHIFT; 689 size = sizeof (struct page *) * (page_count ? page_count : 1); 690 fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size); 691 if (!fsc->wb_pagevec_pool) 692 goto fail_cap_wq; 693 694 return fsc; 695 696 fail_cap_wq: 697 destroy_workqueue(fsc->cap_wq); 698 fail_inode_wq: 699 destroy_workqueue(fsc->inode_wq); 700 fail_client: 701 ceph_destroy_client(fsc->client); 702 fail: 703 kfree(fsc); 704 if (opt) 705 ceph_destroy_options(opt); 706 destroy_mount_options(fsopt); 707 return ERR_PTR(err); 708 } 709 710 static void flush_fs_workqueues(struct ceph_fs_client *fsc) 711 { 712 flush_workqueue(fsc->inode_wq); 713 flush_workqueue(fsc->cap_wq); 714 } 715 716 static void destroy_fs_client(struct ceph_fs_client *fsc) 717 { 718 dout("destroy_fs_client %p\n", fsc); 719 720 ceph_mdsc_destroy(fsc); 721 destroy_workqueue(fsc->inode_wq); 722 destroy_workqueue(fsc->cap_wq); 723 724 mempool_destroy(fsc->wb_pagevec_pool); 725 726 destroy_mount_options(fsc->mount_options); 727 728 ceph_destroy_client(fsc->client); 729 730 kfree(fsc); 731 dout("destroy_fs_client %p done\n", fsc); 732 } 733 734 /* 735 * caches 736 */ 737 struct kmem_cache *ceph_inode_cachep; 738 struct kmem_cache *ceph_cap_cachep; 739 struct kmem_cache *ceph_cap_flush_cachep; 740 struct kmem_cache *ceph_dentry_cachep; 741 struct kmem_cache *ceph_file_cachep; 742 struct kmem_cache *ceph_dir_file_cachep; 743 struct kmem_cache *ceph_mds_request_cachep; 744 745 static void ceph_inode_init_once(void *foo) 746 { 747 struct ceph_inode_info *ci = foo; 748 inode_init_once(&ci->vfs_inode); 749 } 750 751 static int __init init_caches(void) 752 { 753 int error = -ENOMEM; 754 755 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 756 sizeof(struct ceph_inode_info), 757 __alignof__(struct ceph_inode_info), 758 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 759 SLAB_ACCOUNT, ceph_inode_init_once); 760 if (!ceph_inode_cachep) 761 return -ENOMEM; 762 763 ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); 764 if (!ceph_cap_cachep) 765 goto bad_cap; 766 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, 767 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 768 if (!ceph_cap_flush_cachep) 769 goto bad_cap_flush; 770 771 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 772 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 773 if (!ceph_dentry_cachep) 774 goto bad_dentry; 775 776 ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); 777 if (!ceph_file_cachep) 778 goto bad_file; 779 780 ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); 781 if (!ceph_dir_file_cachep) 782 goto bad_dir_file; 783 784 ceph_mds_request_cachep = KMEM_CACHE(ceph_mds_request, SLAB_MEM_SPREAD); 785 if (!ceph_mds_request_cachep) 786 goto bad_mds_req; 787 788 error = ceph_fscache_register(); 789 if (error) 790 goto bad_fscache; 791 792 return 0; 793 794 bad_fscache: 795 kmem_cache_destroy(ceph_mds_request_cachep); 796 bad_mds_req: 797 kmem_cache_destroy(ceph_dir_file_cachep); 798 bad_dir_file: 799 kmem_cache_destroy(ceph_file_cachep); 800 bad_file: 801 kmem_cache_destroy(ceph_dentry_cachep); 802 bad_dentry: 803 kmem_cache_destroy(ceph_cap_flush_cachep); 804 bad_cap_flush: 805 kmem_cache_destroy(ceph_cap_cachep); 806 bad_cap: 807 kmem_cache_destroy(ceph_inode_cachep); 808 return error; 809 } 810 811 static void destroy_caches(void) 812 { 813 /* 814 * Make sure all delayed rcu free inodes are flushed before we 815 * destroy cache. 816 */ 817 rcu_barrier(); 818 819 kmem_cache_destroy(ceph_inode_cachep); 820 kmem_cache_destroy(ceph_cap_cachep); 821 kmem_cache_destroy(ceph_cap_flush_cachep); 822 kmem_cache_destroy(ceph_dentry_cachep); 823 kmem_cache_destroy(ceph_file_cachep); 824 kmem_cache_destroy(ceph_dir_file_cachep); 825 kmem_cache_destroy(ceph_mds_request_cachep); 826 827 ceph_fscache_unregister(); 828 } 829 830 /* 831 * ceph_umount_begin - initiate forced umount. Tear down down the 832 * mount, skipping steps that may hang while waiting for server(s). 833 */ 834 static void ceph_umount_begin(struct super_block *sb) 835 { 836 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 837 838 dout("ceph_umount_begin - starting forced umount\n"); 839 if (!fsc) 840 return; 841 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 842 ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); 843 ceph_mdsc_force_umount(fsc->mdsc); 844 fsc->filp_gen++; // invalidate open files 845 } 846 847 static const struct super_operations ceph_super_ops = { 848 .alloc_inode = ceph_alloc_inode, 849 .free_inode = ceph_free_inode, 850 .write_inode = ceph_write_inode, 851 .drop_inode = generic_delete_inode, 852 .evict_inode = ceph_evict_inode, 853 .sync_fs = ceph_sync_fs, 854 .put_super = ceph_put_super, 855 .show_options = ceph_show_options, 856 .statfs = ceph_statfs, 857 .umount_begin = ceph_umount_begin, 858 }; 859 860 /* 861 * Bootstrap mount by opening the root directory. Note the mount 862 * @started time from caller, and time out if this takes too long. 863 */ 864 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 865 const char *path, 866 unsigned long started) 867 { 868 struct ceph_mds_client *mdsc = fsc->mdsc; 869 struct ceph_mds_request *req = NULL; 870 int err; 871 struct dentry *root; 872 873 /* open dir */ 874 dout("open_root_inode opening '%s'\n", path); 875 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 876 if (IS_ERR(req)) 877 return ERR_CAST(req); 878 req->r_path1 = kstrdup(path, GFP_NOFS); 879 if (!req->r_path1) { 880 root = ERR_PTR(-ENOMEM); 881 goto out; 882 } 883 884 req->r_ino1.ino = CEPH_INO_ROOT; 885 req->r_ino1.snap = CEPH_NOSNAP; 886 req->r_started = started; 887 req->r_timeout = fsc->client->options->mount_timeout; 888 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 889 req->r_num_caps = 2; 890 err = ceph_mdsc_do_request(mdsc, NULL, req); 891 if (err == 0) { 892 struct inode *inode = req->r_target_inode; 893 req->r_target_inode = NULL; 894 dout("open_root_inode success\n"); 895 root = d_make_root(inode); 896 if (!root) { 897 root = ERR_PTR(-ENOMEM); 898 goto out; 899 } 900 dout("open_root_inode success, root dentry is %p\n", root); 901 } else { 902 root = ERR_PTR(err); 903 } 904 out: 905 ceph_mdsc_put_request(req); 906 return root; 907 } 908 909 /* 910 * mount: join the ceph cluster, and open root directory. 911 */ 912 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, 913 struct fs_context *fc) 914 { 915 int err; 916 unsigned long started = jiffies; /* note the start time */ 917 struct dentry *root; 918 919 dout("mount start %p\n", fsc); 920 mutex_lock(&fsc->client->mount_mutex); 921 922 if (!fsc->sb->s_root) { 923 const char *path = fsc->mount_options->server_path ? 924 fsc->mount_options->server_path + 1 : ""; 925 926 err = __ceph_open_session(fsc->client, started); 927 if (err < 0) 928 goto out; 929 930 /* setup fscache */ 931 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { 932 err = ceph_fscache_register_fs(fsc, fc); 933 if (err < 0) 934 goto out; 935 } 936 937 dout("mount opening path '%s'\n", path); 938 939 ceph_fs_debugfs_init(fsc); 940 941 root = open_root_dentry(fsc, path, started); 942 if (IS_ERR(root)) { 943 err = PTR_ERR(root); 944 goto out; 945 } 946 fsc->sb->s_root = dget(root); 947 } else { 948 root = dget(fsc->sb->s_root); 949 } 950 951 fsc->mount_state = CEPH_MOUNT_MOUNTED; 952 dout("mount success\n"); 953 mutex_unlock(&fsc->client->mount_mutex); 954 return root; 955 956 out: 957 mutex_unlock(&fsc->client->mount_mutex); 958 return ERR_PTR(err); 959 } 960 961 static int ceph_set_super(struct super_block *s, struct fs_context *fc) 962 { 963 struct ceph_fs_client *fsc = s->s_fs_info; 964 int ret; 965 966 dout("set_super %p\n", s); 967 968 s->s_maxbytes = MAX_LFS_FILESIZE; 969 970 s->s_xattr = ceph_xattr_handlers; 971 fsc->sb = s; 972 fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ 973 974 s->s_op = &ceph_super_ops; 975 s->s_d_op = &ceph_dentry_ops; 976 s->s_export_op = &ceph_export_ops; 977 978 s->s_time_gran = 1; 979 s->s_time_min = 0; 980 s->s_time_max = U32_MAX; 981 982 ret = set_anon_super_fc(s, fc); 983 if (ret != 0) 984 fsc->sb = NULL; 985 return ret; 986 } 987 988 /* 989 * share superblock if same fs AND options 990 */ 991 static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) 992 { 993 struct ceph_fs_client *new = fc->s_fs_info; 994 struct ceph_mount_options *fsopt = new->mount_options; 995 struct ceph_options *opt = new->client->options; 996 struct ceph_fs_client *other = ceph_sb_to_client(sb); 997 998 dout("ceph_compare_super %p\n", sb); 999 1000 if (compare_mount_options(fsopt, opt, other)) { 1001 dout("monitor(s)/mount options don't match\n"); 1002 return 0; 1003 } 1004 if ((opt->flags & CEPH_OPT_FSID) && 1005 ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { 1006 dout("fsid doesn't match\n"); 1007 return 0; 1008 } 1009 if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) { 1010 dout("flags differ\n"); 1011 return 0; 1012 } 1013 return 1; 1014 } 1015 1016 /* 1017 * construct our own bdi so we can control readahead, etc. 1018 */ 1019 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 1020 1021 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) 1022 { 1023 int err; 1024 1025 err = super_setup_bdi_name(sb, "ceph-%ld", 1026 atomic_long_inc_return(&bdi_seq)); 1027 if (err) 1028 return err; 1029 1030 /* set ra_pages based on rasize mount option? */ 1031 sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; 1032 1033 /* set io_pages based on max osd read size */ 1034 sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; 1035 1036 return 0; 1037 } 1038 1039 static int ceph_get_tree(struct fs_context *fc) 1040 { 1041 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1042 struct super_block *sb; 1043 struct ceph_fs_client *fsc; 1044 struct dentry *res; 1045 int (*compare_super)(struct super_block *, struct fs_context *) = 1046 ceph_compare_super; 1047 int err; 1048 1049 dout("ceph_get_tree\n"); 1050 1051 if (!fc->source) 1052 return invalfc(fc, "No source"); 1053 1054 /* create client (which we may/may not use) */ 1055 fsc = create_fs_client(pctx->opts, pctx->copts); 1056 pctx->opts = NULL; 1057 pctx->copts = NULL; 1058 if (IS_ERR(fsc)) { 1059 err = PTR_ERR(fsc); 1060 goto out_final; 1061 } 1062 1063 err = ceph_mdsc_init(fsc); 1064 if (err < 0) 1065 goto out; 1066 1067 if (ceph_test_opt(fsc->client, NOSHARE)) 1068 compare_super = NULL; 1069 1070 fc->s_fs_info = fsc; 1071 sb = sget_fc(fc, compare_super, ceph_set_super); 1072 fc->s_fs_info = NULL; 1073 if (IS_ERR(sb)) { 1074 err = PTR_ERR(sb); 1075 goto out; 1076 } 1077 1078 if (ceph_sb_to_client(sb) != fsc) { 1079 destroy_fs_client(fsc); 1080 fsc = ceph_sb_to_client(sb); 1081 dout("get_sb got existing client %p\n", fsc); 1082 } else { 1083 dout("get_sb using new client %p\n", fsc); 1084 err = ceph_setup_bdi(sb, fsc); 1085 if (err < 0) 1086 goto out_splat; 1087 } 1088 1089 res = ceph_real_mount(fsc, fc); 1090 if (IS_ERR(res)) { 1091 err = PTR_ERR(res); 1092 goto out_splat; 1093 } 1094 dout("root %p inode %p ino %llx.%llx\n", res, 1095 d_inode(res), ceph_vinop(d_inode(res))); 1096 fc->root = fsc->sb->s_root; 1097 return 0; 1098 1099 out_splat: 1100 if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) { 1101 pr_info("No mds server is up or the cluster is laggy\n"); 1102 err = -EHOSTUNREACH; 1103 } 1104 1105 ceph_mdsc_close_sessions(fsc->mdsc); 1106 deactivate_locked_super(sb); 1107 goto out_final; 1108 1109 out: 1110 destroy_fs_client(fsc); 1111 out_final: 1112 dout("ceph_get_tree fail %d\n", err); 1113 return err; 1114 } 1115 1116 static void ceph_free_fc(struct fs_context *fc) 1117 { 1118 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1119 1120 if (pctx) { 1121 destroy_mount_options(pctx->opts); 1122 ceph_destroy_options(pctx->copts); 1123 kfree(pctx); 1124 } 1125 } 1126 1127 static int ceph_reconfigure_fc(struct fs_context *fc) 1128 { 1129 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1130 struct ceph_mount_options *fsopt = pctx->opts; 1131 struct ceph_fs_client *fsc = ceph_sb_to_client(fc->root->d_sb); 1132 1133 if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS) 1134 ceph_set_mount_opt(fsc, ASYNC_DIROPS); 1135 else 1136 ceph_clear_mount_opt(fsc, ASYNC_DIROPS); 1137 1138 sync_filesystem(fc->root->d_sb); 1139 return 0; 1140 } 1141 1142 static const struct fs_context_operations ceph_context_ops = { 1143 .free = ceph_free_fc, 1144 .parse_param = ceph_parse_mount_param, 1145 .get_tree = ceph_get_tree, 1146 .reconfigure = ceph_reconfigure_fc, 1147 }; 1148 1149 /* 1150 * Set up the filesystem mount context. 1151 */ 1152 static int ceph_init_fs_context(struct fs_context *fc) 1153 { 1154 struct ceph_parse_opts_ctx *pctx; 1155 struct ceph_mount_options *fsopt; 1156 1157 pctx = kzalloc(sizeof(*pctx), GFP_KERNEL); 1158 if (!pctx) 1159 return -ENOMEM; 1160 1161 pctx->copts = ceph_alloc_options(); 1162 if (!pctx->copts) 1163 goto nomem; 1164 1165 pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL); 1166 if (!pctx->opts) 1167 goto nomem; 1168 1169 fsopt = pctx->opts; 1170 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 1171 1172 fsopt->wsize = CEPH_MAX_WRITE_SIZE; 1173 fsopt->rsize = CEPH_MAX_READ_SIZE; 1174 fsopt->rasize = CEPH_RASIZE_DEFAULT; 1175 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 1176 if (!fsopt->snapdir_name) 1177 goto nomem; 1178 1179 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 1180 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 1181 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 1182 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 1183 fsopt->congestion_kb = default_congestion_kb(); 1184 1185 #ifdef CONFIG_CEPH_FS_POSIX_ACL 1186 fc->sb_flags |= SB_POSIXACL; 1187 #endif 1188 1189 fc->fs_private = pctx; 1190 fc->ops = &ceph_context_ops; 1191 return 0; 1192 1193 nomem: 1194 destroy_mount_options(pctx->opts); 1195 ceph_destroy_options(pctx->copts); 1196 kfree(pctx); 1197 return -ENOMEM; 1198 } 1199 1200 static void ceph_kill_sb(struct super_block *s) 1201 { 1202 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 1203 dev_t dev = s->s_dev; 1204 1205 dout("kill_sb %p\n", s); 1206 1207 ceph_mdsc_pre_umount(fsc->mdsc); 1208 flush_fs_workqueues(fsc); 1209 1210 generic_shutdown_super(s); 1211 1212 fsc->client->extra_mon_dispatch = NULL; 1213 ceph_fs_debugfs_cleanup(fsc); 1214 1215 ceph_fscache_unregister_fs(fsc); 1216 1217 destroy_fs_client(fsc); 1218 free_anon_bdev(dev); 1219 } 1220 1221 static struct file_system_type ceph_fs_type = { 1222 .owner = THIS_MODULE, 1223 .name = "ceph", 1224 .init_fs_context = ceph_init_fs_context, 1225 .kill_sb = ceph_kill_sb, 1226 .fs_flags = FS_RENAME_DOES_D_MOVE, 1227 }; 1228 MODULE_ALIAS_FS("ceph"); 1229 1230 int ceph_force_reconnect(struct super_block *sb) 1231 { 1232 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 1233 int err = 0; 1234 1235 ceph_umount_begin(sb); 1236 1237 /* Make sure all page caches get invalidated. 1238 * see remove_session_caps_cb() */ 1239 flush_workqueue(fsc->inode_wq); 1240 1241 /* In case that we were blacklisted. This also reset 1242 * all mon/osd connections */ 1243 ceph_reset_client_addr(fsc->client); 1244 1245 ceph_osdc_clear_abort_err(&fsc->client->osdc); 1246 1247 fsc->blacklisted = false; 1248 fsc->mount_state = CEPH_MOUNT_MOUNTED; 1249 1250 if (sb->s_root) { 1251 err = __ceph_do_getattr(d_inode(sb->s_root), NULL, 1252 CEPH_STAT_CAP_INODE, true); 1253 } 1254 return err; 1255 } 1256 1257 static int __init init_ceph(void) 1258 { 1259 int ret = init_caches(); 1260 if (ret) 1261 goto out; 1262 1263 ceph_flock_init(); 1264 ret = register_filesystem(&ceph_fs_type); 1265 if (ret) 1266 goto out_caches; 1267 1268 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1269 1270 return 0; 1271 1272 out_caches: 1273 destroy_caches(); 1274 out: 1275 return ret; 1276 } 1277 1278 static void __exit exit_ceph(void) 1279 { 1280 dout("exit_ceph\n"); 1281 unregister_filesystem(&ceph_fs_type); 1282 destroy_caches(); 1283 } 1284 1285 module_init(init_ceph); 1286 module_exit(exit_ceph); 1287 1288 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 1289 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 1290 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 1291 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 1292 MODULE_LICENSE("GPL"); 1293