1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * stackglue.c 5 * 6 * Code which implements an OCFS2 specific interface to underlying 7 * cluster stacks. 8 * 9 * Copyright (C) 2007 Oracle. All rights reserved. 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public 13 * License as published by the Free Software Foundation, version 2. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 */ 20 21 #include <linux/list.h> 22 #include <linux/spinlock.h> 23 #include <linux/module.h> 24 #include <linux/slab.h> 25 #include <linux/kmod.h> 26 #include <linux/fs.h> 27 #include <linux/kobject.h> 28 #include <linux/sysfs.h> 29 #include <linux/sysctl.h> 30 31 #include "ocfs2_fs.h" 32 33 #include "stackglue.h" 34 35 #define OCFS2_STACK_PLUGIN_O2CB "o2cb" 36 #define OCFS2_STACK_PLUGIN_USER "user" 37 #define OCFS2_MAX_HB_CTL_PATH 256 38 39 static struct ocfs2_locking_protocol *lproto; 40 static DEFINE_SPINLOCK(ocfs2_stack_lock); 41 static LIST_HEAD(ocfs2_stack_list); 42 static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1]; 43 static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; 44 45 /* 46 * The stack currently in use. If not null, active_stack->sp_count > 0, 47 * the module is pinned, and the locking protocol cannot be changed. 48 */ 49 static struct ocfs2_stack_plugin *active_stack; 50 51 static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) 52 { 53 struct ocfs2_stack_plugin *p; 54 55 assert_spin_locked(&ocfs2_stack_lock); 56 57 list_for_each_entry(p, &ocfs2_stack_list, sp_list) { 58 if (!strcmp(p->sp_name, name)) 59 return p; 60 } 61 62 return NULL; 63 } 64 65 static int ocfs2_stack_driver_request(const char *stack_name, 66 const char *plugin_name) 67 { 68 int rc; 69 struct ocfs2_stack_plugin *p; 70 71 spin_lock(&ocfs2_stack_lock); 72 73 /* 74 * If the stack passed by the filesystem isn't the selected one, 75 * we can't continue. 76 */ 77 if (strcmp(stack_name, cluster_stack_name)) { 78 rc = -EBUSY; 79 goto out; 80 } 81 82 if (active_stack) { 83 /* 84 * If the active stack isn't the one we want, it cannot 85 * be selected right now. 86 */ 87 if (!strcmp(active_stack->sp_name, plugin_name)) 88 rc = 0; 89 else 90 rc = -EBUSY; 91 goto out; 92 } 93 94 p = ocfs2_stack_lookup(plugin_name); 95 if (!p || !try_module_get(p->sp_owner)) { 96 rc = -ENOENT; 97 goto out; 98 } 99 100 /* Ok, the stack is pinned */ 101 p->sp_count++; 102 active_stack = p; 103 104 rc = 0; 105 106 out: 107 spin_unlock(&ocfs2_stack_lock); 108 return rc; 109 } 110 111 /* 112 * This function looks up the appropriate stack and makes it active. If 113 * there is no stack, it tries to load it. It will fail if the stack still 114 * cannot be found. It will also fail if a different stack is in use. 115 */ 116 static int ocfs2_stack_driver_get(const char *stack_name) 117 { 118 int rc; 119 char *plugin_name = OCFS2_STACK_PLUGIN_O2CB; 120 121 /* 122 * Classic stack does not pass in a stack name. This is 123 * compatible with older tools as well. 124 */ 125 if (!stack_name || !*stack_name) 126 stack_name = OCFS2_STACK_PLUGIN_O2CB; 127 128 if (strlen(stack_name) != OCFS2_STACK_LABEL_LEN) { 129 printk(KERN_ERR 130 "ocfs2 passed an invalid cluster stack label: \"%s\"\n", 131 stack_name); 132 return -EINVAL; 133 } 134 135 /* Anything that isn't the classic stack is a user stack */ 136 if (strcmp(stack_name, OCFS2_STACK_PLUGIN_O2CB)) 137 plugin_name = OCFS2_STACK_PLUGIN_USER; 138 139 rc = ocfs2_stack_driver_request(stack_name, plugin_name); 140 if (rc == -ENOENT) { 141 request_module("ocfs2_stack_%s", plugin_name); 142 rc = ocfs2_stack_driver_request(stack_name, plugin_name); 143 } 144 145 if (rc == -ENOENT) { 146 printk(KERN_ERR 147 "ocfs2: Cluster stack driver \"%s\" cannot be found\n", 148 plugin_name); 149 } else if (rc == -EBUSY) { 150 printk(KERN_ERR 151 "ocfs2: A different cluster stack is in use\n"); 152 } 153 154 return rc; 155 } 156 157 static void ocfs2_stack_driver_put(void) 158 { 159 spin_lock(&ocfs2_stack_lock); 160 BUG_ON(active_stack == NULL); 161 BUG_ON(active_stack->sp_count == 0); 162 163 active_stack->sp_count--; 164 if (!active_stack->sp_count) { 165 module_put(active_stack->sp_owner); 166 active_stack = NULL; 167 } 168 spin_unlock(&ocfs2_stack_lock); 169 } 170 171 int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin) 172 { 173 int rc; 174 175 spin_lock(&ocfs2_stack_lock); 176 if (!ocfs2_stack_lookup(plugin->sp_name)) { 177 plugin->sp_count = 0; 178 plugin->sp_proto = lproto; 179 list_add(&plugin->sp_list, &ocfs2_stack_list); 180 printk(KERN_INFO "ocfs2: Registered cluster interface %s\n", 181 plugin->sp_name); 182 rc = 0; 183 } else { 184 printk(KERN_ERR "ocfs2: Stack \"%s\" already registered\n", 185 plugin->sp_name); 186 rc = -EEXIST; 187 } 188 spin_unlock(&ocfs2_stack_lock); 189 190 return rc; 191 } 192 EXPORT_SYMBOL_GPL(ocfs2_stack_glue_register); 193 194 void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin) 195 { 196 struct ocfs2_stack_plugin *p; 197 198 spin_lock(&ocfs2_stack_lock); 199 p = ocfs2_stack_lookup(plugin->sp_name); 200 if (p) { 201 BUG_ON(p != plugin); 202 BUG_ON(plugin == active_stack); 203 BUG_ON(plugin->sp_count != 0); 204 list_del_init(&plugin->sp_list); 205 printk(KERN_INFO "ocfs2: Unregistered cluster interface %s\n", 206 plugin->sp_name); 207 } else { 208 printk(KERN_ERR "Stack \"%s\" is not registered\n", 209 plugin->sp_name); 210 } 211 spin_unlock(&ocfs2_stack_lock); 212 } 213 EXPORT_SYMBOL_GPL(ocfs2_stack_glue_unregister); 214 215 void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) 216 { 217 struct ocfs2_stack_plugin *p; 218 219 BUG_ON(proto == NULL); 220 221 spin_lock(&ocfs2_stack_lock); 222 BUG_ON(active_stack != NULL); 223 224 lproto = proto; 225 list_for_each_entry(p, &ocfs2_stack_list, sp_list) { 226 p->sp_proto = lproto; 227 } 228 229 spin_unlock(&ocfs2_stack_lock); 230 } 231 EXPORT_SYMBOL_GPL(ocfs2_stack_glue_set_locking_protocol); 232 233 234 /* 235 * The ocfs2_dlm_lock() and ocfs2_dlm_unlock() functions take 236 * "struct ocfs2_lock_res *astarg" instead of "void *astarg" because the 237 * underlying stack plugins need to pilfer the lksb off of the lock_res. 238 * If some other structure needs to be passed as an astarg, the plugins 239 * will need to be given a different avenue to the lksb. 240 */ 241 int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, 242 int mode, 243 union ocfs2_dlm_lksb *lksb, 244 u32 flags, 245 void *name, 246 unsigned int namelen, 247 struct ocfs2_lock_res *astarg) 248 { 249 BUG_ON(lproto == NULL); 250 251 return active_stack->sp_ops->dlm_lock(conn, mode, lksb, flags, 252 name, namelen, astarg); 253 } 254 EXPORT_SYMBOL_GPL(ocfs2_dlm_lock); 255 256 int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, 257 union ocfs2_dlm_lksb *lksb, 258 u32 flags, 259 struct ocfs2_lock_res *astarg) 260 { 261 BUG_ON(lproto == NULL); 262 263 return active_stack->sp_ops->dlm_unlock(conn, lksb, flags, astarg); 264 } 265 EXPORT_SYMBOL_GPL(ocfs2_dlm_unlock); 266 267 int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb) 268 { 269 return active_stack->sp_ops->lock_status(lksb); 270 } 271 EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status); 272 273 /* 274 * Why don't we cast to ocfs2_meta_lvb? The "clean" answer is that we 275 * don't cast at the glue level. The real answer is that the header 276 * ordering is nigh impossible. 277 */ 278 void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) 279 { 280 return active_stack->sp_ops->lock_lvb(lksb); 281 } 282 EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb); 283 284 void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) 285 { 286 active_stack->sp_ops->dump_lksb(lksb); 287 } 288 EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb); 289 290 int ocfs2_cluster_connect(const char *stack_name, 291 const char *group, 292 int grouplen, 293 void (*recovery_handler)(int node_num, 294 void *recovery_data), 295 void *recovery_data, 296 struct ocfs2_cluster_connection **conn) 297 { 298 int rc = 0; 299 struct ocfs2_cluster_connection *new_conn; 300 301 BUG_ON(group == NULL); 302 BUG_ON(conn == NULL); 303 BUG_ON(recovery_handler == NULL); 304 305 if (grouplen > GROUP_NAME_MAX) { 306 rc = -EINVAL; 307 goto out; 308 } 309 310 new_conn = kzalloc(sizeof(struct ocfs2_cluster_connection), 311 GFP_KERNEL); 312 if (!new_conn) { 313 rc = -ENOMEM; 314 goto out; 315 } 316 317 memcpy(new_conn->cc_name, group, grouplen); 318 new_conn->cc_namelen = grouplen; 319 new_conn->cc_recovery_handler = recovery_handler; 320 new_conn->cc_recovery_data = recovery_data; 321 322 /* Start the new connection at our maximum compatibility level */ 323 new_conn->cc_version = lproto->lp_max_version; 324 325 /* This will pin the stack driver if successful */ 326 rc = ocfs2_stack_driver_get(stack_name); 327 if (rc) 328 goto out_free; 329 330 rc = active_stack->sp_ops->connect(new_conn); 331 if (rc) { 332 ocfs2_stack_driver_put(); 333 goto out_free; 334 } 335 336 *conn = new_conn; 337 338 out_free: 339 if (rc) 340 kfree(new_conn); 341 342 out: 343 return rc; 344 } 345 EXPORT_SYMBOL_GPL(ocfs2_cluster_connect); 346 347 /* If hangup_pending is 0, the stack driver will be dropped */ 348 int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, 349 int hangup_pending) 350 { 351 int ret; 352 353 BUG_ON(conn == NULL); 354 355 ret = active_stack->sp_ops->disconnect(conn); 356 357 /* XXX Should we free it anyway? */ 358 if (!ret) { 359 kfree(conn); 360 if (!hangup_pending) 361 ocfs2_stack_driver_put(); 362 } 363 364 return ret; 365 } 366 EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect); 367 368 /* 369 * Leave the group for this filesystem. This is executed by a userspace 370 * program (stored in ocfs2_hb_ctl_path). 371 */ 372 static void ocfs2_leave_group(const char *group) 373 { 374 int ret; 375 char *argv[5], *envp[3]; 376 377 argv[0] = ocfs2_hb_ctl_path; 378 argv[1] = "-K"; 379 argv[2] = "-u"; 380 argv[3] = (char *)group; 381 argv[4] = NULL; 382 383 /* minimal command environment taken from cpu_run_sbin_hotplug */ 384 envp[0] = "HOME=/"; 385 envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; 386 envp[2] = NULL; 387 388 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); 389 if (ret < 0) { 390 printk(KERN_ERR 391 "ocfs2: Error %d running user helper " 392 "\"%s %s %s %s\"\n", 393 ret, argv[0], argv[1], argv[2], argv[3]); 394 } 395 } 396 397 /* 398 * Hangup is a required post-umount. ocfs2-tools software expects the 399 * filesystem to call "ocfs2_hb_ctl" during unmount. This happens 400 * regardless of whether the DLM got started, so we can't do it 401 * in ocfs2_cluster_disconnect(). The ocfs2_leave_group() function does 402 * the actual work. 403 */ 404 void ocfs2_cluster_hangup(const char *group, int grouplen) 405 { 406 BUG_ON(group == NULL); 407 BUG_ON(group[grouplen] != '\0'); 408 409 ocfs2_leave_group(group); 410 411 /* cluster_disconnect() was called with hangup_pending==1 */ 412 ocfs2_stack_driver_put(); 413 } 414 EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup); 415 416 int ocfs2_cluster_this_node(unsigned int *node) 417 { 418 return active_stack->sp_ops->this_node(node); 419 } 420 EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node); 421 422 423 /* 424 * Sysfs bits 425 */ 426 427 static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj, 428 struct kobj_attribute *attr, 429 char *buf) 430 { 431 ssize_t ret = 0; 432 433 spin_lock(&ocfs2_stack_lock); 434 if (lproto) 435 ret = snprintf(buf, PAGE_SIZE, "%u.%u\n", 436 lproto->lp_max_version.pv_major, 437 lproto->lp_max_version.pv_minor); 438 spin_unlock(&ocfs2_stack_lock); 439 440 return ret; 441 } 442 443 static struct kobj_attribute ocfs2_attr_max_locking_protocol = 444 __ATTR(max_locking_protocol, S_IFREG | S_IRUGO, 445 ocfs2_max_locking_protocol_show, NULL); 446 447 static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj, 448 struct kobj_attribute *attr, 449 char *buf) 450 { 451 ssize_t ret = 0, total = 0, remain = PAGE_SIZE; 452 struct ocfs2_stack_plugin *p; 453 454 spin_lock(&ocfs2_stack_lock); 455 list_for_each_entry(p, &ocfs2_stack_list, sp_list) { 456 ret = snprintf(buf, remain, "%s\n", 457 p->sp_name); 458 if (ret < 0) { 459 total = ret; 460 break; 461 } 462 if (ret == remain) { 463 /* snprintf() didn't fit */ 464 total = -E2BIG; 465 break; 466 } 467 total += ret; 468 remain -= ret; 469 } 470 spin_unlock(&ocfs2_stack_lock); 471 472 return total; 473 } 474 475 static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins = 476 __ATTR(loaded_cluster_plugins, S_IFREG | S_IRUGO, 477 ocfs2_loaded_cluster_plugins_show, NULL); 478 479 static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj, 480 struct kobj_attribute *attr, 481 char *buf) 482 { 483 ssize_t ret = 0; 484 485 spin_lock(&ocfs2_stack_lock); 486 if (active_stack) { 487 ret = snprintf(buf, PAGE_SIZE, "%s\n", 488 active_stack->sp_name); 489 if (ret == PAGE_SIZE) 490 ret = -E2BIG; 491 } 492 spin_unlock(&ocfs2_stack_lock); 493 494 return ret; 495 } 496 497 static struct kobj_attribute ocfs2_attr_active_cluster_plugin = 498 __ATTR(active_cluster_plugin, S_IFREG | S_IRUGO, 499 ocfs2_active_cluster_plugin_show, NULL); 500 501 static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj, 502 struct kobj_attribute *attr, 503 char *buf) 504 { 505 ssize_t ret; 506 spin_lock(&ocfs2_stack_lock); 507 ret = snprintf(buf, PAGE_SIZE, "%s\n", cluster_stack_name); 508 spin_unlock(&ocfs2_stack_lock); 509 510 return ret; 511 } 512 513 static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj, 514 struct kobj_attribute *attr, 515 const char *buf, size_t count) 516 { 517 size_t len = count; 518 ssize_t ret; 519 520 if (len == 0) 521 return len; 522 523 if (buf[len - 1] == '\n') 524 len--; 525 526 if ((len != OCFS2_STACK_LABEL_LEN) || 527 (strnlen(buf, len) != len)) 528 return -EINVAL; 529 530 spin_lock(&ocfs2_stack_lock); 531 if (active_stack) { 532 if (!strncmp(buf, cluster_stack_name, len)) 533 ret = count; 534 else 535 ret = -EBUSY; 536 } else { 537 memcpy(cluster_stack_name, buf, len); 538 ret = count; 539 } 540 spin_unlock(&ocfs2_stack_lock); 541 542 return ret; 543 } 544 545 546 static struct kobj_attribute ocfs2_attr_cluster_stack = 547 __ATTR(cluster_stack, S_IFREG | S_IRUGO | S_IWUSR, 548 ocfs2_cluster_stack_show, 549 ocfs2_cluster_stack_store); 550 551 static struct attribute *ocfs2_attrs[] = { 552 &ocfs2_attr_max_locking_protocol.attr, 553 &ocfs2_attr_loaded_cluster_plugins.attr, 554 &ocfs2_attr_active_cluster_plugin.attr, 555 &ocfs2_attr_cluster_stack.attr, 556 NULL, 557 }; 558 559 static struct attribute_group ocfs2_attr_group = { 560 .attrs = ocfs2_attrs, 561 }; 562 563 static struct kset *ocfs2_kset; 564 565 static void ocfs2_sysfs_exit(void) 566 { 567 kset_unregister(ocfs2_kset); 568 } 569 570 static int ocfs2_sysfs_init(void) 571 { 572 int ret; 573 574 ocfs2_kset = kset_create_and_add("ocfs2", NULL, fs_kobj); 575 if (!ocfs2_kset) 576 return -ENOMEM; 577 578 ret = sysfs_create_group(&ocfs2_kset->kobj, &ocfs2_attr_group); 579 if (ret) 580 goto error; 581 582 return 0; 583 584 error: 585 kset_unregister(ocfs2_kset); 586 return ret; 587 } 588 589 /* 590 * Sysctl bits 591 * 592 * The sysctl lives at /proc/sys/fs/ocfs2/nm/hb_ctl_path. The 'nm' doesn't 593 * make as much sense in a multiple cluster stack world, but it's safer 594 * and easier to preserve the name. 595 */ 596 597 #define FS_OCFS2_NM 1 598 599 static ctl_table ocfs2_nm_table[] = { 600 { 601 .ctl_name = 1, 602 .procname = "hb_ctl_path", 603 .data = ocfs2_hb_ctl_path, 604 .maxlen = OCFS2_MAX_HB_CTL_PATH, 605 .mode = 0644, 606 .proc_handler = &proc_dostring, 607 .strategy = &sysctl_string, 608 }, 609 { .ctl_name = 0 } 610 }; 611 612 static ctl_table ocfs2_mod_table[] = { 613 { 614 .ctl_name = FS_OCFS2_NM, 615 .procname = "nm", 616 .data = NULL, 617 .maxlen = 0, 618 .mode = 0555, 619 .child = ocfs2_nm_table 620 }, 621 { .ctl_name = 0} 622 }; 623 624 static ctl_table ocfs2_kern_table[] = { 625 { 626 .ctl_name = FS_OCFS2, 627 .procname = "ocfs2", 628 .data = NULL, 629 .maxlen = 0, 630 .mode = 0555, 631 .child = ocfs2_mod_table 632 }, 633 { .ctl_name = 0} 634 }; 635 636 static ctl_table ocfs2_root_table[] = { 637 { 638 .ctl_name = CTL_FS, 639 .procname = "fs", 640 .data = NULL, 641 .maxlen = 0, 642 .mode = 0555, 643 .child = ocfs2_kern_table 644 }, 645 { .ctl_name = 0 } 646 }; 647 648 static struct ctl_table_header *ocfs2_table_header = NULL; 649 650 651 /* 652 * Initialization 653 */ 654 655 static int __init ocfs2_stack_glue_init(void) 656 { 657 strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); 658 659 ocfs2_table_header = register_sysctl_table(ocfs2_root_table); 660 if (!ocfs2_table_header) { 661 printk(KERN_ERR 662 "ocfs2 stack glue: unable to register sysctl\n"); 663 return -ENOMEM; /* or something. */ 664 } 665 666 return ocfs2_sysfs_init(); 667 } 668 669 static void __exit ocfs2_stack_glue_exit(void) 670 { 671 lproto = NULL; 672 ocfs2_sysfs_exit(); 673 if (ocfs2_table_header) 674 unregister_sysctl_table(ocfs2_table_header); 675 } 676 677 MODULE_AUTHOR("Oracle"); 678 MODULE_DESCRIPTION("ocfs2 cluter stack glue layer"); 679 MODULE_LICENSE("GPL"); 680 module_init(ocfs2_stack_glue_init); 681 module_exit(ocfs2_stack_glue_exit); 682