1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 drbd_nl.c 4 5 This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 6 7 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. 8 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. 9 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 10 11 12 */ 13 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 16 #include <linux/module.h> 17 #include <linux/drbd.h> 18 #include <linux/in.h> 19 #include <linux/fs.h> 20 #include <linux/file.h> 21 #include <linux/slab.h> 22 #include <linux/blkpg.h> 23 #include <linux/cpumask.h> 24 #include "drbd_int.h" 25 #include "drbd_protocol.h" 26 #include "drbd_req.h" 27 #include "drbd_state_change.h" 28 #include <linux/unaligned.h> 29 #include <linux/drbd_limits.h> 30 #include <linux/kthread.h> 31 32 #include <net/genetlink.h> 33 34 /* .doit */ 35 // int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info); 36 // int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info); 37 38 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info); 39 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info); 40 41 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info); 42 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info); 43 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info); 44 45 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info); 46 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info); 47 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info); 48 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info); 49 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info); 50 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info); 51 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info); 52 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info); 53 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info); 54 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info); 55 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info); 56 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info); 57 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info); 58 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info); 59 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info); 60 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info); 61 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info); 62 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info); 63 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info); 64 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info); 65 /* .dumpit */ 66 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb); 67 int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb); 68 int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb); 69 int drbd_adm_dump_devices_done(struct netlink_callback *cb); 70 int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb); 71 int drbd_adm_dump_connections_done(struct netlink_callback *cb); 72 int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb); 73 int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb); 74 int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb); 75 76 #include <linux/drbd_genl_api.h> 77 #include "drbd_nla.h" 78 79 static int drbd_pre_doit(const struct genl_split_ops *ops, 80 struct sk_buff *skb, struct genl_info *info); 81 static void drbd_post_doit(const struct genl_split_ops *ops, 82 struct sk_buff *skb, struct genl_info *info); 83 84 #define GENL_MAGIC_FAMILY_PRE_DOIT drbd_pre_doit 85 #define GENL_MAGIC_FAMILY_POST_DOIT drbd_post_doit 86 87 #include <linux/genl_magic_func.h> 88 89 static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */ 90 static atomic_t notify_genl_seq = ATOMIC_INIT(2); /* two. */ 91 92 DEFINE_MUTEX(notification_mutex); 93 94 /* used bdev_open_by_path, to claim our meta data device(s) */ 95 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device."; 96 97 static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info) 98 { 99 genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb)))); 100 if (genlmsg_reply(skb, info)) 101 pr_err("error sending genl reply\n"); 102 } 103 104 /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only 105 * reason it could fail was no space in skb, and there are 4k available. */ 106 static int drbd_msg_put_info(struct sk_buff *skb, const char *info) 107 { 108 struct nlattr *nla; 109 int err = -EMSGSIZE; 110 111 if (!info || !info[0]) 112 return 0; 113 114 nla = nla_nest_start_noflag(skb, DRBD_NLA_CFG_REPLY); 115 if (!nla) 116 return err; 117 118 err = nla_put_string(skb, T_info_text, info); 119 if (err) { 120 nla_nest_cancel(skb, nla); 121 return err; 122 } else 123 nla_nest_end(skb, nla); 124 return 0; 125 } 126 127 __printf(2, 3) 128 static int drbd_msg_sprintf_info(struct sk_buff *skb, const char *fmt, ...) 129 { 130 va_list args; 131 struct nlattr *nla, *txt; 132 int err = -EMSGSIZE; 133 int len; 134 135 nla = nla_nest_start_noflag(skb, DRBD_NLA_CFG_REPLY); 136 if (!nla) 137 return err; 138 139 txt = nla_reserve(skb, T_info_text, 256); 140 if (!txt) { 141 nla_nest_cancel(skb, nla); 142 return err; 143 } 144 va_start(args, fmt); 145 len = vscnprintf(nla_data(txt), 256, fmt, args); 146 va_end(args); 147 148 /* maybe: retry with larger reserve, if truncated */ 149 txt->nla_len = nla_attr_size(len+1); 150 nlmsg_trim(skb, (char*)txt + NLA_ALIGN(txt->nla_len)); 151 nla_nest_end(skb, nla); 152 153 return 0; 154 } 155 156 /* Flags for drbd_adm_prepare() */ 157 #define DRBD_ADM_NEED_MINOR (1 << 0) 158 #define DRBD_ADM_NEED_RESOURCE (1 << 1) 159 #define DRBD_ADM_NEED_CONNECTION (1 << 2) 160 161 /* Per-command flags for drbd_pre_doit() */ 162 static const unsigned int drbd_genl_cmd_flags[] = { 163 [DRBD_ADM_GET_STATUS] = DRBD_ADM_NEED_MINOR, 164 [DRBD_ADM_NEW_MINOR] = DRBD_ADM_NEED_RESOURCE, 165 [DRBD_ADM_DEL_MINOR] = DRBD_ADM_NEED_MINOR, 166 [DRBD_ADM_NEW_RESOURCE] = 0, 167 [DRBD_ADM_DEL_RESOURCE] = DRBD_ADM_NEED_RESOURCE, 168 [DRBD_ADM_RESOURCE_OPTS] = DRBD_ADM_NEED_RESOURCE, 169 [DRBD_ADM_CONNECT] = DRBD_ADM_NEED_RESOURCE, 170 [DRBD_ADM_CHG_NET_OPTS] = DRBD_ADM_NEED_CONNECTION, 171 [DRBD_ADM_DISCONNECT] = DRBD_ADM_NEED_CONNECTION, 172 [DRBD_ADM_ATTACH] = DRBD_ADM_NEED_MINOR, 173 [DRBD_ADM_CHG_DISK_OPTS] = DRBD_ADM_NEED_MINOR, 174 [DRBD_ADM_RESIZE] = DRBD_ADM_NEED_MINOR, 175 [DRBD_ADM_PRIMARY] = DRBD_ADM_NEED_MINOR, 176 [DRBD_ADM_SECONDARY] = DRBD_ADM_NEED_MINOR, 177 [DRBD_ADM_NEW_C_UUID] = DRBD_ADM_NEED_MINOR, 178 [DRBD_ADM_START_OV] = DRBD_ADM_NEED_MINOR, 179 [DRBD_ADM_DETACH] = DRBD_ADM_NEED_MINOR, 180 [DRBD_ADM_INVALIDATE] = DRBD_ADM_NEED_MINOR, 181 [DRBD_ADM_INVAL_PEER] = DRBD_ADM_NEED_MINOR, 182 [DRBD_ADM_PAUSE_SYNC] = DRBD_ADM_NEED_MINOR, 183 [DRBD_ADM_RESUME_SYNC] = DRBD_ADM_NEED_MINOR, 184 [DRBD_ADM_SUSPEND_IO] = DRBD_ADM_NEED_MINOR, 185 [DRBD_ADM_RESUME_IO] = DRBD_ADM_NEED_MINOR, 186 [DRBD_ADM_OUTDATE] = DRBD_ADM_NEED_MINOR, 187 [DRBD_ADM_GET_TIMEOUT_TYPE] = DRBD_ADM_NEED_MINOR, 188 [DRBD_ADM_DOWN] = DRBD_ADM_NEED_RESOURCE, 189 }; 190 191 /* 192 * At this point, we still rely on the global genl_lock(). 193 * If we want to avoid that, and allow "genl_family.parallel_ops", we may need 194 * to add additional synchronization against object destruction/modification. 195 */ 196 static int drbd_adm_prepare(struct drbd_config_context *adm_ctx, 197 struct sk_buff *skb, struct genl_info *info, unsigned flags) 198 { 199 struct drbd_genlmsghdr *d_in = genl_info_userhdr(info); 200 const u8 cmd = info->genlhdr->cmd; 201 int err; 202 203 /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */ 204 if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN)) 205 return -EPERM; 206 207 adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); 208 if (!adm_ctx->reply_skb) { 209 err = -ENOMEM; 210 goto fail; 211 } 212 213 adm_ctx->reply_dh = genlmsg_put_reply(adm_ctx->reply_skb, 214 info, &drbd_genl_family, 0, cmd); 215 /* put of a few bytes into a fresh skb of >= 4k will always succeed. 216 * but anyways */ 217 if (!adm_ctx->reply_dh) { 218 err = -ENOMEM; 219 goto fail; 220 } 221 222 adm_ctx->reply_dh->minor = d_in->minor; 223 adm_ctx->reply_dh->ret_code = NO_ERROR; 224 225 adm_ctx->volume = VOLUME_UNSPECIFIED; 226 if (info->attrs[DRBD_NLA_CFG_CONTEXT]) { 227 struct nlattr *nla; 228 /* parse and validate only */ 229 err = drbd_cfg_context_from_attrs(NULL, info); 230 if (err) 231 goto fail; 232 233 /* It was present, and valid, 234 * copy it over to the reply skb. */ 235 err = nla_put_nohdr(adm_ctx->reply_skb, 236 info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len, 237 info->attrs[DRBD_NLA_CFG_CONTEXT]); 238 if (err) 239 goto fail; 240 241 /* and assign stuff to the adm_ctx */ 242 nla = nested_attr_tb[__nla_type(T_ctx_volume)]; 243 if (nla) 244 adm_ctx->volume = nla_get_u32(nla); 245 nla = nested_attr_tb[__nla_type(T_ctx_resource_name)]; 246 if (nla) 247 adm_ctx->resource_name = nla_data(nla); 248 adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)]; 249 adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)]; 250 if ((adm_ctx->my_addr && 251 nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) || 252 (adm_ctx->peer_addr && 253 nla_len(adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) { 254 err = -EINVAL; 255 goto fail; 256 } 257 } 258 259 adm_ctx->minor = d_in->minor; 260 adm_ctx->device = minor_to_device(d_in->minor); 261 262 /* We are protected by the global genl_lock(). 263 * But we may explicitly drop it/retake it in drbd_adm_set_role(), 264 * so make sure this object stays around. */ 265 if (adm_ctx->device) 266 kref_get(&adm_ctx->device->kref); 267 268 if (adm_ctx->resource_name) { 269 adm_ctx->resource = drbd_find_resource(adm_ctx->resource_name); 270 } 271 272 if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) { 273 drbd_msg_put_info(adm_ctx->reply_skb, "unknown minor"); 274 return ERR_MINOR_INVALID; 275 } 276 if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) { 277 drbd_msg_put_info(adm_ctx->reply_skb, "unknown resource"); 278 if (adm_ctx->resource_name) 279 return ERR_RES_NOT_KNOWN; 280 return ERR_INVALID_REQUEST; 281 } 282 283 if (flags & DRBD_ADM_NEED_CONNECTION) { 284 if (adm_ctx->resource) { 285 drbd_msg_put_info(adm_ctx->reply_skb, "no resource name expected"); 286 return ERR_INVALID_REQUEST; 287 } 288 if (adm_ctx->device) { 289 drbd_msg_put_info(adm_ctx->reply_skb, "no minor number expected"); 290 return ERR_INVALID_REQUEST; 291 } 292 if (adm_ctx->my_addr && adm_ctx->peer_addr) 293 adm_ctx->connection = conn_get_by_addrs(nla_data(adm_ctx->my_addr), 294 nla_len(adm_ctx->my_addr), 295 nla_data(adm_ctx->peer_addr), 296 nla_len(adm_ctx->peer_addr)); 297 if (!adm_ctx->connection) { 298 drbd_msg_put_info(adm_ctx->reply_skb, "unknown connection"); 299 return ERR_INVALID_REQUEST; 300 } 301 } 302 303 /* some more paranoia, if the request was over-determined */ 304 if (adm_ctx->device && adm_ctx->resource && 305 adm_ctx->device->resource != adm_ctx->resource) { 306 pr_warn("request: minor=%u, resource=%s; but that minor belongs to resource %s\n", 307 adm_ctx->minor, adm_ctx->resource->name, 308 adm_ctx->device->resource->name); 309 drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource"); 310 return ERR_INVALID_REQUEST; 311 } 312 if (adm_ctx->device && 313 adm_ctx->volume != VOLUME_UNSPECIFIED && 314 adm_ctx->volume != adm_ctx->device->vnr) { 315 pr_warn("request: minor=%u, volume=%u; but that minor is volume %u in %s\n", 316 adm_ctx->minor, adm_ctx->volume, 317 adm_ctx->device->vnr, adm_ctx->device->resource->name); 318 drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume"); 319 return ERR_INVALID_REQUEST; 320 } 321 322 /* still, provide adm_ctx->resource always, if possible. */ 323 if (!adm_ctx->resource) { 324 adm_ctx->resource = adm_ctx->device ? adm_ctx->device->resource 325 : adm_ctx->connection ? adm_ctx->connection->resource : NULL; 326 if (adm_ctx->resource) 327 kref_get(&adm_ctx->resource->kref); 328 } 329 330 return NO_ERROR; 331 332 fail: 333 nlmsg_free(adm_ctx->reply_skb); 334 adm_ctx->reply_skb = NULL; 335 return err; 336 } 337 338 static int drbd_pre_doit(const struct genl_split_ops *ops, 339 struct sk_buff *skb, struct genl_info *info) 340 { 341 struct drbd_config_context *adm_ctx; 342 u8 cmd = info->genlhdr->cmd; 343 unsigned int flags; 344 int err; 345 346 adm_ctx = kzalloc_obj(*adm_ctx); 347 if (!adm_ctx) 348 return -ENOMEM; 349 350 flags = (cmd < ARRAY_SIZE(drbd_genl_cmd_flags)) 351 ? drbd_genl_cmd_flags[cmd] : 0; 352 353 err = drbd_adm_prepare(adm_ctx, skb, info, flags); 354 if (err && !adm_ctx->reply_skb) { 355 /* Fatal error before reply_skb was allocated. */ 356 kfree(adm_ctx); 357 return err; 358 } 359 if (err) 360 adm_ctx->reply_dh->ret_code = err; 361 362 info->user_ptr[0] = adm_ctx; 363 return 0; 364 } 365 366 static void drbd_post_doit(const struct genl_split_ops *ops, 367 struct sk_buff *skb, struct genl_info *info) 368 { 369 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 370 371 if (!adm_ctx) 372 return; 373 374 if (adm_ctx->reply_skb) 375 drbd_adm_send_reply(adm_ctx->reply_skb, info); 376 377 if (adm_ctx->device) { 378 kref_put(&adm_ctx->device->kref, drbd_destroy_device); 379 adm_ctx->device = NULL; 380 } 381 if (adm_ctx->connection) { 382 kref_put(&adm_ctx->connection->kref, &drbd_destroy_connection); 383 adm_ctx->connection = NULL; 384 } 385 if (adm_ctx->resource) { 386 kref_put(&adm_ctx->resource->kref, drbd_destroy_resource); 387 adm_ctx->resource = NULL; 388 } 389 390 kfree(adm_ctx); 391 } 392 393 static void setup_khelper_env(struct drbd_connection *connection, char **envp) 394 { 395 char *afs; 396 397 /* FIXME: A future version will not allow this case. */ 398 if (connection->my_addr_len == 0 || connection->peer_addr_len == 0) 399 return; 400 401 switch (((struct sockaddr *)&connection->peer_addr)->sa_family) { 402 case AF_INET6: 403 afs = "ipv6"; 404 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6", 405 &((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr); 406 break; 407 case AF_INET: 408 afs = "ipv4"; 409 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", 410 &((struct sockaddr_in *)&connection->peer_addr)->sin_addr); 411 break; 412 default: 413 afs = "ssocks"; 414 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4", 415 &((struct sockaddr_in *)&connection->peer_addr)->sin_addr); 416 } 417 snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs); 418 } 419 420 int drbd_khelper(struct drbd_device *device, char *cmd) 421 { 422 char *envp[] = { "HOME=/", 423 "TERM=linux", 424 "PATH=/sbin:/usr/sbin:/bin:/usr/bin", 425 (char[20]) { }, /* address family */ 426 (char[60]) { }, /* address */ 427 NULL }; 428 char mb[14]; 429 char *argv[] = {drbd_usermode_helper, cmd, mb, NULL }; 430 struct drbd_connection *connection = first_peer_device(device)->connection; 431 struct sib_info sib; 432 int ret; 433 434 if (current == connection->worker.task) 435 set_bit(CALLBACK_PENDING, &connection->flags); 436 437 snprintf(mb, 14, "minor-%d", device_to_minor(device)); 438 setup_khelper_env(connection, envp); 439 440 /* The helper may take some time. 441 * write out any unsynced meta data changes now */ 442 drbd_md_sync(device); 443 444 drbd_info(device, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, mb); 445 sib.sib_reason = SIB_HELPER_PRE; 446 sib.helper_name = cmd; 447 drbd_bcast_event(device, &sib); 448 notify_helper(NOTIFY_CALL, device, connection, cmd, 0); 449 ret = call_usermodehelper(drbd_usermode_helper, argv, envp, UMH_WAIT_PROC); 450 if (ret) 451 drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n", 452 drbd_usermode_helper, cmd, mb, 453 (ret >> 8) & 0xff, ret); 454 else 455 drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n", 456 drbd_usermode_helper, cmd, mb, 457 (ret >> 8) & 0xff, ret); 458 sib.sib_reason = SIB_HELPER_POST; 459 sib.helper_exit_code = ret; 460 drbd_bcast_event(device, &sib); 461 notify_helper(NOTIFY_RESPONSE, device, connection, cmd, ret); 462 463 if (current == connection->worker.task) 464 clear_bit(CALLBACK_PENDING, &connection->flags); 465 466 if (ret < 0) /* Ignore any ERRNOs we got. */ 467 ret = 0; 468 469 return ret; 470 } 471 472 enum drbd_peer_state conn_khelper(struct drbd_connection *connection, char *cmd) 473 { 474 char *envp[] = { "HOME=/", 475 "TERM=linux", 476 "PATH=/sbin:/usr/sbin:/bin:/usr/bin", 477 (char[20]) { }, /* address family */ 478 (char[60]) { }, /* address */ 479 NULL }; 480 char *resource_name = connection->resource->name; 481 char *argv[] = {drbd_usermode_helper, cmd, resource_name, NULL }; 482 int ret; 483 484 setup_khelper_env(connection, envp); 485 conn_md_sync(connection); 486 487 drbd_info(connection, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, resource_name); 488 /* TODO: conn_bcast_event() ?? */ 489 notify_helper(NOTIFY_CALL, NULL, connection, cmd, 0); 490 491 ret = call_usermodehelper(drbd_usermode_helper, argv, envp, UMH_WAIT_PROC); 492 if (ret) 493 drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n", 494 drbd_usermode_helper, cmd, resource_name, 495 (ret >> 8) & 0xff, ret); 496 else 497 drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n", 498 drbd_usermode_helper, cmd, resource_name, 499 (ret >> 8) & 0xff, ret); 500 /* TODO: conn_bcast_event() ?? */ 501 notify_helper(NOTIFY_RESPONSE, NULL, connection, cmd, ret); 502 503 if (ret < 0) /* Ignore any ERRNOs we got. */ 504 ret = 0; 505 506 return ret; 507 } 508 509 static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection) 510 { 511 enum drbd_fencing_p fp = FP_NOT_AVAIL; 512 struct drbd_peer_device *peer_device; 513 int vnr; 514 515 rcu_read_lock(); 516 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 517 struct drbd_device *device = peer_device->device; 518 if (get_ldev_if_state(device, D_CONSISTENT)) { 519 struct disk_conf *disk_conf = 520 rcu_dereference(peer_device->device->ldev->disk_conf); 521 fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing); 522 put_ldev(device); 523 } 524 } 525 rcu_read_unlock(); 526 527 return fp; 528 } 529 530 static bool resource_is_supended(struct drbd_resource *resource) 531 { 532 return resource->susp || resource->susp_fen || resource->susp_nod; 533 } 534 535 bool conn_try_outdate_peer(struct drbd_connection *connection) 536 { 537 struct drbd_resource * const resource = connection->resource; 538 unsigned int connect_cnt; 539 union drbd_state mask = { }; 540 union drbd_state val = { }; 541 enum drbd_fencing_p fp; 542 char *ex_to_string; 543 int r; 544 545 spin_lock_irq(&resource->req_lock); 546 if (connection->cstate >= C_WF_REPORT_PARAMS) { 547 drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n"); 548 spin_unlock_irq(&resource->req_lock); 549 return false; 550 } 551 552 connect_cnt = connection->connect_cnt; 553 spin_unlock_irq(&resource->req_lock); 554 555 fp = highest_fencing_policy(connection); 556 switch (fp) { 557 case FP_NOT_AVAIL: 558 drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n"); 559 spin_lock_irq(&resource->req_lock); 560 if (connection->cstate < C_WF_REPORT_PARAMS) { 561 _conn_request_state(connection, 562 (union drbd_state) { { .susp_fen = 1 } }, 563 (union drbd_state) { { .susp_fen = 0 } }, 564 CS_VERBOSE | CS_HARD | CS_DC_SUSP); 565 /* We are no longer suspended due to the fencing policy. 566 * We may still be suspended due to the on-no-data-accessible policy. 567 * If that was OND_IO_ERROR, fail pending requests. */ 568 if (!resource_is_supended(resource)) 569 _tl_restart(connection, CONNECTION_LOST_WHILE_PENDING); 570 } 571 /* Else: in case we raced with a connection handshake, 572 * let the handshake figure out if we maybe can RESEND, 573 * and do not resume/fail pending requests here. 574 * Worst case is we stay suspended for now, which may be 575 * resolved by either re-establishing the replication link, or 576 * the next link failure, or eventually the administrator. */ 577 spin_unlock_irq(&resource->req_lock); 578 return false; 579 580 case FP_DONT_CARE: 581 return true; 582 default: ; 583 } 584 585 r = conn_khelper(connection, "fence-peer"); 586 587 switch ((r>>8) & 0xff) { 588 case P_INCONSISTENT: /* peer is inconsistent */ 589 ex_to_string = "peer is inconsistent or worse"; 590 mask.pdsk = D_MASK; 591 val.pdsk = D_INCONSISTENT; 592 break; 593 case P_OUTDATED: /* peer got outdated, or was already outdated */ 594 ex_to_string = "peer was fenced"; 595 mask.pdsk = D_MASK; 596 val.pdsk = D_OUTDATED; 597 break; 598 case P_DOWN: /* peer was down */ 599 if (conn_highest_disk(connection) == D_UP_TO_DATE) { 600 /* we will(have) create(d) a new UUID anyways... */ 601 ex_to_string = "peer is unreachable, assumed to be dead"; 602 mask.pdsk = D_MASK; 603 val.pdsk = D_OUTDATED; 604 } else { 605 ex_to_string = "peer unreachable, doing nothing since disk != UpToDate"; 606 } 607 break; 608 case P_PRIMARY: /* Peer is primary, voluntarily outdate myself. 609 * This is useful when an unconnected R_SECONDARY is asked to 610 * become R_PRIMARY, but finds the other peer being active. */ 611 ex_to_string = "peer is active"; 612 drbd_warn(connection, "Peer is primary, outdating myself.\n"); 613 mask.disk = D_MASK; 614 val.disk = D_OUTDATED; 615 break; 616 case P_FENCING: 617 /* THINK: do we need to handle this 618 * like case 4, or more like case 5? */ 619 if (fp != FP_STONITH) 620 drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n"); 621 ex_to_string = "peer was stonithed"; 622 mask.pdsk = D_MASK; 623 val.pdsk = D_OUTDATED; 624 break; 625 default: 626 /* The script is broken ... */ 627 drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff); 628 return false; /* Eventually leave IO frozen */ 629 } 630 631 drbd_info(connection, "fence-peer helper returned %d (%s)\n", 632 (r>>8) & 0xff, ex_to_string); 633 634 /* Not using 635 conn_request_state(connection, mask, val, CS_VERBOSE); 636 here, because we might were able to re-establish the connection in the 637 meantime. */ 638 spin_lock_irq(&resource->req_lock); 639 if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) { 640 if (connection->connect_cnt != connect_cnt) 641 /* In case the connection was established and droped 642 while the fence-peer handler was running, ignore it */ 643 drbd_info(connection, "Ignoring fence-peer exit code\n"); 644 else 645 _conn_request_state(connection, mask, val, CS_VERBOSE); 646 } 647 spin_unlock_irq(&resource->req_lock); 648 649 return conn_highest_pdsk(connection) <= D_OUTDATED; 650 } 651 652 static int _try_outdate_peer_async(void *data) 653 { 654 struct drbd_connection *connection = (struct drbd_connection *)data; 655 656 conn_try_outdate_peer(connection); 657 658 kref_put(&connection->kref, drbd_destroy_connection); 659 return 0; 660 } 661 662 void conn_try_outdate_peer_async(struct drbd_connection *connection) 663 { 664 struct task_struct *opa; 665 666 kref_get(&connection->kref); 667 /* We may have just sent a signal to this thread 668 * to get it out of some blocking network function. 669 * Clear signals; otherwise kthread_run(), which internally uses 670 * wait_on_completion_killable(), will mistake our pending signal 671 * for a new fatal signal and fail. */ 672 flush_signals(current); 673 opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h"); 674 if (IS_ERR(opa)) { 675 drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n"); 676 kref_put(&connection->kref, drbd_destroy_connection); 677 } 678 } 679 680 enum drbd_state_rv 681 drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int force) 682 { 683 struct drbd_peer_device *const peer_device = first_peer_device(device); 684 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; 685 const int max_tries = 4; 686 enum drbd_state_rv rv = SS_UNKNOWN_ERROR; 687 struct net_conf *nc; 688 int try = 0; 689 int forced = 0; 690 union drbd_state mask, val; 691 692 if (new_role == R_PRIMARY) { 693 struct drbd_connection *connection; 694 695 /* Detect dead peers as soon as possible. */ 696 697 rcu_read_lock(); 698 for_each_connection(connection, device->resource) 699 request_ping(connection); 700 rcu_read_unlock(); 701 } 702 703 mutex_lock(device->state_mutex); 704 705 mask.i = 0; mask.role = R_MASK; 706 val.i = 0; val.role = new_role; 707 708 while (try++ < max_tries) { 709 rv = _drbd_request_state_holding_state_mutex(device, mask, val, CS_WAIT_COMPLETE); 710 711 /* in case we first succeeded to outdate, 712 * but now suddenly could establish a connection */ 713 if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) { 714 val.pdsk = 0; 715 mask.pdsk = 0; 716 continue; 717 } 718 719 if (rv == SS_NO_UP_TO_DATE_DISK && force && 720 (device->state.disk < D_UP_TO_DATE && 721 device->state.disk >= D_INCONSISTENT)) { 722 mask.disk = D_MASK; 723 val.disk = D_UP_TO_DATE; 724 forced = 1; 725 continue; 726 } 727 728 if (rv == SS_NO_UP_TO_DATE_DISK && 729 device->state.disk == D_CONSISTENT && mask.pdsk == 0) { 730 D_ASSERT(device, device->state.pdsk == D_UNKNOWN); 731 732 if (conn_try_outdate_peer(connection)) { 733 val.disk = D_UP_TO_DATE; 734 mask.disk = D_MASK; 735 } 736 continue; 737 } 738 739 if (rv == SS_NOTHING_TO_DO) 740 goto out; 741 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) { 742 if (!conn_try_outdate_peer(connection) && force) { 743 drbd_warn(device, "Forced into split brain situation!\n"); 744 mask.pdsk = D_MASK; 745 val.pdsk = D_OUTDATED; 746 747 } 748 continue; 749 } 750 if (rv == SS_TWO_PRIMARIES) { 751 /* Maybe the peer is detected as dead very soon... 752 retry at most once more in this case. */ 753 if (try < max_tries) { 754 int timeo; 755 try = max_tries - 1; 756 rcu_read_lock(); 757 nc = rcu_dereference(connection->net_conf); 758 timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; 759 rcu_read_unlock(); 760 schedule_timeout_interruptible(timeo); 761 } 762 continue; 763 } 764 if (rv < SS_SUCCESS) { 765 rv = _drbd_request_state(device, mask, val, 766 CS_VERBOSE + CS_WAIT_COMPLETE); 767 if (rv < SS_SUCCESS) 768 goto out; 769 } 770 break; 771 } 772 773 if (rv < SS_SUCCESS) 774 goto out; 775 776 if (forced) 777 drbd_warn(device, "Forced to consider local data as UpToDate!\n"); 778 779 /* Wait until nothing is on the fly :) */ 780 wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0); 781 782 /* FIXME also wait for all pending P_BARRIER_ACK? */ 783 784 if (new_role == R_SECONDARY) { 785 if (get_ldev(device)) { 786 device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; 787 put_ldev(device); 788 } 789 } else { 790 mutex_lock(&device->resource->conf_update); 791 nc = connection->net_conf; 792 if (nc) 793 nc->discard_my_data = 0; /* without copy; single bit op is atomic */ 794 mutex_unlock(&device->resource->conf_update); 795 796 if (get_ldev(device)) { 797 if (((device->state.conn < C_CONNECTED || 798 device->state.pdsk <= D_FAILED) 799 && device->ldev->md.uuid[UI_BITMAP] == 0) || forced) 800 drbd_uuid_new_current(device); 801 802 device->ldev->md.uuid[UI_CURRENT] |= (u64)1; 803 put_ldev(device); 804 } 805 } 806 807 /* writeout of activity log covered areas of the bitmap 808 * to stable storage done in after state change already */ 809 810 if (device->state.conn >= C_WF_REPORT_PARAMS) { 811 /* if this was forced, we should consider sync */ 812 if (forced) 813 drbd_send_uuids(peer_device); 814 drbd_send_current_state(peer_device); 815 } 816 817 drbd_md_sync(device); 818 set_disk_ro(device->vdisk, new_role == R_SECONDARY); 819 kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE); 820 out: 821 mutex_unlock(device->state_mutex); 822 return rv; 823 } 824 825 static const char *from_attrs_err_to_txt(int err) 826 { 827 return err == -ENOMSG ? "required attribute missing" : 828 err == -EOPNOTSUPP ? "unknown mandatory attribute" : 829 err == -EEXIST ? "can not change invariant setting" : 830 "invalid attribute value"; 831 } 832 833 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info) 834 { 835 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 836 struct set_role_parms parms; 837 int err; 838 enum drbd_ret_code retcode; 839 enum drbd_state_rv rv; 840 841 if (!adm_ctx->reply_skb) 842 return 0; 843 retcode = adm_ctx->reply_dh->ret_code; 844 if (retcode != NO_ERROR) 845 goto out; 846 847 memset(&parms, 0, sizeof(parms)); 848 if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) { 849 err = set_role_parms_from_attrs(&parms, info); 850 if (err) { 851 retcode = ERR_MANDATORY_TAG; 852 drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err)); 853 goto out; 854 } 855 } 856 genl_unlock(); 857 mutex_lock(&adm_ctx->resource->adm_mutex); 858 859 if (info->genlhdr->cmd == DRBD_ADM_PRIMARY) 860 rv = drbd_set_role(adm_ctx->device, R_PRIMARY, parms.assume_uptodate); 861 else 862 rv = drbd_set_role(adm_ctx->device, R_SECONDARY, 0); 863 864 mutex_unlock(&adm_ctx->resource->adm_mutex); 865 genl_lock(); 866 adm_ctx->reply_dh->ret_code = rv; 867 return 0; 868 out: 869 adm_ctx->reply_dh->ret_code = retcode; 870 return 0; 871 } 872 873 /* Initializes the md.*_offset members, so we are able to find 874 * the on disk meta data. 875 * 876 * We currently have two possible layouts: 877 * external: 878 * |----------- md_size_sect ------------------| 879 * [ 4k superblock ][ activity log ][ Bitmap ] 880 * | al_offset == 8 | 881 * | bm_offset = al_offset + X | 882 * ==> bitmap sectors = md_size_sect - bm_offset 883 * 884 * internal: 885 * |----------- md_size_sect ------------------| 886 * [data.....][ Bitmap ][ activity log ][ 4k superblock ] 887 * | al_offset < 0 | 888 * | bm_offset = al_offset - Y | 889 * ==> bitmap sectors = Y = al_offset - bm_offset 890 * 891 * Activity log size used to be fixed 32kB, 892 * but is about to become configurable. 893 */ 894 static void drbd_md_set_sector_offsets(struct drbd_device *device, 895 struct drbd_backing_dev *bdev) 896 { 897 sector_t md_size_sect = 0; 898 unsigned int al_size_sect = bdev->md.al_size_4k * 8; 899 900 bdev->md.md_offset = drbd_md_ss(bdev); 901 902 switch (bdev->md.meta_dev_idx) { 903 default: 904 /* v07 style fixed size indexed meta data */ 905 bdev->md.md_size_sect = MD_128MB_SECT; 906 bdev->md.al_offset = MD_4kB_SECT; 907 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect; 908 break; 909 case DRBD_MD_INDEX_FLEX_EXT: 910 /* just occupy the full device; unit: sectors */ 911 bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev); 912 bdev->md.al_offset = MD_4kB_SECT; 913 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect; 914 break; 915 case DRBD_MD_INDEX_INTERNAL: 916 case DRBD_MD_INDEX_FLEX_INT: 917 /* al size is still fixed */ 918 bdev->md.al_offset = -al_size_sect; 919 /* we need (slightly less than) ~ this much bitmap sectors: */ 920 md_size_sect = drbd_get_capacity(bdev->backing_bdev); 921 md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT); 922 md_size_sect = BM_SECT_TO_EXT(md_size_sect); 923 md_size_sect = ALIGN(md_size_sect, 8); 924 925 /* plus the "drbd meta data super block", 926 * and the activity log; */ 927 md_size_sect += MD_4kB_SECT + al_size_sect; 928 929 bdev->md.md_size_sect = md_size_sect; 930 /* bitmap offset is adjusted by 'super' block size */ 931 bdev->md.bm_offset = -md_size_sect + MD_4kB_SECT; 932 break; 933 } 934 } 935 936 /* input size is expected to be in KB */ 937 char *ppsize(char *buf, unsigned long long size) 938 { 939 /* Needs 9 bytes at max including trailing NUL: 940 * -1ULL ==> "16384 EB" */ 941 static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' }; 942 int base = 0; 943 while (size >= 10000 && base < sizeof(units)-1) { 944 /* shift + round */ 945 size = (size >> 10) + !!(size & (1<<9)); 946 base++; 947 } 948 sprintf(buf, "%u %cB", (unsigned)size, units[base]); 949 950 return buf; 951 } 952 953 /* there is still a theoretical deadlock when called from receiver 954 * on an D_INCONSISTENT R_PRIMARY: 955 * remote READ does inc_ap_bio, receiver would need to receive answer 956 * packet from remote to dec_ap_bio again. 957 * receiver receive_sizes(), comes here, 958 * waits for ap_bio_cnt == 0. -> deadlock. 959 * but this cannot happen, actually, because: 960 * R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable 961 * (not connected, or bad/no disk on peer): 962 * see drbd_fail_request_early, ap_bio_cnt is zero. 963 * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET: 964 * peer may not initiate a resize. 965 */ 966 /* Note these are not to be confused with 967 * drbd_adm_suspend_io/drbd_adm_resume_io, 968 * which are (sub) state changes triggered by admin (drbdsetup), 969 * and can be long lived. 970 * This changes an device->flag, is triggered by drbd internals, 971 * and should be short-lived. */ 972 /* It needs to be a counter, since multiple threads might 973 independently suspend and resume IO. */ 974 void drbd_suspend_io(struct drbd_device *device) 975 { 976 atomic_inc(&device->suspend_cnt); 977 if (drbd_suspended(device)) 978 return; 979 wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt)); 980 } 981 982 void drbd_resume_io(struct drbd_device *device) 983 { 984 if (atomic_dec_and_test(&device->suspend_cnt)) 985 wake_up(&device->misc_wait); 986 } 987 988 /* 989 * drbd_determine_dev_size() - Sets the right device size obeying all constraints 990 * @device: DRBD device. 991 * 992 * Returns 0 on success, negative return values indicate errors. 993 * You should call drbd_md_sync() after calling this function. 994 */ 995 enum determine_dev_size 996 drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local) 997 { 998 struct md_offsets_and_sizes { 999 u64 last_agreed_sect; 1000 u64 md_offset; 1001 s32 al_offset; 1002 s32 bm_offset; 1003 u32 md_size_sect; 1004 1005 u32 al_stripes; 1006 u32 al_stripe_size_4k; 1007 } prev; 1008 sector_t u_size, size; 1009 struct drbd_md *md = &device->ldev->md; 1010 void *buffer; 1011 1012 int md_moved, la_size_changed; 1013 enum determine_dev_size rv = DS_UNCHANGED; 1014 1015 /* We may change the on-disk offsets of our meta data below. Lock out 1016 * anything that may cause meta data IO, to avoid acting on incomplete 1017 * layout changes or scribbling over meta data that is in the process 1018 * of being moved. 1019 * 1020 * Move is not exactly correct, btw, currently we have all our meta 1021 * data in core memory, to "move" it we just write it all out, there 1022 * are no reads. */ 1023 drbd_suspend_io(device); 1024 buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */ 1025 if (!buffer) { 1026 drbd_resume_io(device); 1027 return DS_ERROR; 1028 } 1029 1030 /* remember current offset and sizes */ 1031 prev.last_agreed_sect = md->la_size_sect; 1032 prev.md_offset = md->md_offset; 1033 prev.al_offset = md->al_offset; 1034 prev.bm_offset = md->bm_offset; 1035 prev.md_size_sect = md->md_size_sect; 1036 prev.al_stripes = md->al_stripes; 1037 prev.al_stripe_size_4k = md->al_stripe_size_4k; 1038 1039 if (rs) { 1040 /* rs is non NULL if we should change the AL layout only */ 1041 md->al_stripes = rs->al_stripes; 1042 md->al_stripe_size_4k = rs->al_stripe_size / 4; 1043 md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4; 1044 } 1045 1046 drbd_md_set_sector_offsets(device, device->ldev); 1047 1048 rcu_read_lock(); 1049 u_size = rcu_dereference(device->ldev->disk_conf)->disk_size; 1050 rcu_read_unlock(); 1051 size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED); 1052 1053 if (size < prev.last_agreed_sect) { 1054 if (rs && u_size == 0) { 1055 /* Remove "rs &&" later. This check should always be active, but 1056 right now the receiver expects the permissive behavior */ 1057 drbd_warn(device, "Implicit shrink not allowed. " 1058 "Use --size=%llus for explicit shrink.\n", 1059 (unsigned long long)size); 1060 rv = DS_ERROR_SHRINK; 1061 } 1062 if (u_size > size) 1063 rv = DS_ERROR_SPACE_MD; 1064 if (rv != DS_UNCHANGED) 1065 goto err_out; 1066 } 1067 1068 if (get_capacity(device->vdisk) != size || 1069 drbd_bm_capacity(device) != size) { 1070 int err; 1071 err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC)); 1072 if (unlikely(err)) { 1073 /* currently there is only one error: ENOMEM! */ 1074 size = drbd_bm_capacity(device); 1075 if (size == 0) { 1076 drbd_err(device, "OUT OF MEMORY! " 1077 "Could not allocate bitmap!\n"); 1078 } else { 1079 drbd_err(device, "BM resizing failed. " 1080 "Leaving size unchanged\n"); 1081 } 1082 rv = DS_ERROR; 1083 } 1084 /* racy, see comments above. */ 1085 drbd_set_my_capacity(device, size); 1086 md->la_size_sect = size; 1087 } 1088 if (rv <= DS_ERROR) 1089 goto err_out; 1090 1091 la_size_changed = (prev.last_agreed_sect != md->la_size_sect); 1092 1093 md_moved = prev.md_offset != md->md_offset 1094 || prev.md_size_sect != md->md_size_sect; 1095 1096 if (la_size_changed || md_moved || rs) { 1097 u32 prev_flags; 1098 1099 /* We do some synchronous IO below, which may take some time. 1100 * Clear the timer, to avoid scary "timer expired!" messages, 1101 * "Superblock" is written out at least twice below, anyways. */ 1102 timer_delete(&device->md_sync_timer); 1103 1104 /* We won't change the "al-extents" setting, we just may need 1105 * to move the on-disk location of the activity log ringbuffer. 1106 * Lock for transaction is good enough, it may well be "dirty" 1107 * or even "starving". */ 1108 wait_event(device->al_wait, lc_try_lock_for_transaction(device->act_log)); 1109 1110 /* mark current on-disk bitmap and activity log as unreliable */ 1111 prev_flags = md->flags; 1112 md->flags |= MDF_FULL_SYNC | MDF_AL_DISABLED; 1113 drbd_md_write(device, buffer); 1114 1115 drbd_al_initialize(device, buffer); 1116 1117 drbd_info(device, "Writing the whole bitmap, %s\n", 1118 la_size_changed && md_moved ? "size changed and md moved" : 1119 la_size_changed ? "size changed" : "md moved"); 1120 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ 1121 drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write, 1122 "size changed", BM_LOCKED_MASK, NULL); 1123 1124 /* on-disk bitmap and activity log is authoritative again 1125 * (unless there was an IO error meanwhile...) */ 1126 md->flags = prev_flags; 1127 drbd_md_write(device, buffer); 1128 1129 if (rs) 1130 drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n", 1131 md->al_stripes, md->al_stripe_size_4k * 4); 1132 } 1133 1134 if (size > prev.last_agreed_sect) 1135 rv = prev.last_agreed_sect ? DS_GREW : DS_GREW_FROM_ZERO; 1136 if (size < prev.last_agreed_sect) 1137 rv = DS_SHRUNK; 1138 1139 if (0) { 1140 err_out: 1141 /* restore previous offset and sizes */ 1142 md->la_size_sect = prev.last_agreed_sect; 1143 md->md_offset = prev.md_offset; 1144 md->al_offset = prev.al_offset; 1145 md->bm_offset = prev.bm_offset; 1146 md->md_size_sect = prev.md_size_sect; 1147 md->al_stripes = prev.al_stripes; 1148 md->al_stripe_size_4k = prev.al_stripe_size_4k; 1149 md->al_size_4k = (u64)prev.al_stripes * prev.al_stripe_size_4k; 1150 } 1151 lc_unlock(device->act_log); 1152 wake_up(&device->al_wait); 1153 drbd_md_put_buffer(device); 1154 drbd_resume_io(device); 1155 1156 return rv; 1157 } 1158 1159 sector_t 1160 drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev, 1161 sector_t u_size, int assume_peer_has_space) 1162 { 1163 sector_t p_size = device->p_size; /* partner's disk size. */ 1164 sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */ 1165 sector_t m_size; /* my size */ 1166 sector_t size = 0; 1167 1168 m_size = drbd_get_max_capacity(bdev); 1169 1170 if (device->state.conn < C_CONNECTED && assume_peer_has_space) { 1171 drbd_warn(device, "Resize while not connected was forced by the user!\n"); 1172 p_size = m_size; 1173 } 1174 1175 if (p_size && m_size) { 1176 size = min_t(sector_t, p_size, m_size); 1177 } else { 1178 if (la_size_sect) { 1179 size = la_size_sect; 1180 if (m_size && m_size < size) 1181 size = m_size; 1182 if (p_size && p_size < size) 1183 size = p_size; 1184 } else { 1185 if (m_size) 1186 size = m_size; 1187 if (p_size) 1188 size = p_size; 1189 } 1190 } 1191 1192 if (size == 0) 1193 drbd_err(device, "Both nodes diskless!\n"); 1194 1195 if (u_size) { 1196 if (u_size > size) 1197 drbd_err(device, "Requested disk size is too big (%lu > %lu)\n", 1198 (unsigned long)u_size>>1, (unsigned long)size>>1); 1199 else 1200 size = u_size; 1201 } 1202 1203 return size; 1204 } 1205 1206 /* 1207 * drbd_check_al_size() - Ensures that the AL is of the right size 1208 * @device: DRBD device. 1209 * 1210 * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation 1211 * failed, and 0 on success. You should call drbd_md_sync() after you called 1212 * this function. 1213 */ 1214 static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc) 1215 { 1216 struct lru_cache *n, *t; 1217 struct lc_element *e; 1218 unsigned int in_use; 1219 int i; 1220 1221 if (device->act_log && 1222 device->act_log->nr_elements == dc->al_extents) 1223 return 0; 1224 1225 in_use = 0; 1226 t = device->act_log; 1227 n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION, 1228 dc->al_extents, sizeof(struct lc_element), 0); 1229 1230 if (n == NULL) { 1231 drbd_err(device, "Cannot allocate act_log lru!\n"); 1232 return -ENOMEM; 1233 } 1234 spin_lock_irq(&device->al_lock); 1235 if (t) { 1236 for (i = 0; i < t->nr_elements; i++) { 1237 e = lc_element_by_index(t, i); 1238 if (e->refcnt) 1239 drbd_err(device, "refcnt(%d)==%d\n", 1240 e->lc_number, e->refcnt); 1241 in_use += e->refcnt; 1242 } 1243 } 1244 if (!in_use) 1245 device->act_log = n; 1246 spin_unlock_irq(&device->al_lock); 1247 if (in_use) { 1248 drbd_err(device, "Activity log still in use!\n"); 1249 lc_destroy(n); 1250 return -EBUSY; 1251 } else { 1252 lc_destroy(t); 1253 } 1254 drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */ 1255 return 0; 1256 } 1257 1258 static unsigned int drbd_max_peer_bio_size(struct drbd_device *device) 1259 { 1260 /* 1261 * We may ignore peer limits if the peer is modern enough. From 8.3.8 1262 * onwards the peer can use multiple BIOs for a single peer_request. 1263 */ 1264 if (device->state.conn < C_WF_REPORT_PARAMS) 1265 return device->peer_max_bio_size; 1266 1267 if (first_peer_device(device)->connection->agreed_pro_version < 94) 1268 return min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); 1269 1270 /* 1271 * Correct old drbd (up to 8.3.7) if it believes it can do more than 1272 * 32KiB. 1273 */ 1274 if (first_peer_device(device)->connection->agreed_pro_version == 94) 1275 return DRBD_MAX_SIZE_H80_PACKET; 1276 1277 /* 1278 * drbd 8.3.8 onwards, before 8.4.0 1279 */ 1280 if (first_peer_device(device)->connection->agreed_pro_version < 100) 1281 return DRBD_MAX_BIO_SIZE_P95; 1282 return DRBD_MAX_BIO_SIZE; 1283 } 1284 1285 static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection) 1286 { 1287 /* when we introduced REQ_WRITE_SAME support, we also bumped 1288 * our maximum supported batch bio size used for discards. */ 1289 if (connection->agreed_features & DRBD_FF_WSAME) 1290 return DRBD_MAX_BBIO_SECTORS; 1291 /* before, with DRBD <= 8.4.6, we only allowed up to one AL_EXTENT_SIZE. */ 1292 return AL_EXTENT_SIZE >> 9; 1293 } 1294 1295 static bool drbd_discard_supported(struct drbd_connection *connection, 1296 struct drbd_backing_dev *bdev) 1297 { 1298 if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev)) 1299 return false; 1300 1301 if (connection->cstate >= C_CONNECTED && 1302 !(connection->agreed_features & DRBD_FF_TRIM)) { 1303 drbd_info(connection, 1304 "peer DRBD too old, does not support TRIM: disabling discards\n"); 1305 return false; 1306 } 1307 1308 return true; 1309 } 1310 1311 /* This is the workaround for "bio would need to, but cannot, be split" */ 1312 static unsigned int drbd_backing_dev_max_segments(struct drbd_device *device) 1313 { 1314 unsigned int max_segments; 1315 1316 rcu_read_lock(); 1317 max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs; 1318 rcu_read_unlock(); 1319 1320 if (!max_segments) 1321 return BLK_MAX_SEGMENTS; 1322 return max_segments; 1323 } 1324 1325 void drbd_reconsider_queue_parameters(struct drbd_device *device, 1326 struct drbd_backing_dev *bdev, struct o_qlim *o) 1327 { 1328 struct drbd_connection *connection = 1329 first_peer_device(device)->connection; 1330 struct request_queue * const q = device->rq_queue; 1331 unsigned int now = queue_max_hw_sectors(q) << 9; 1332 struct queue_limits lim; 1333 struct request_queue *b = NULL; 1334 unsigned int new; 1335 1336 if (bdev) { 1337 b = bdev->backing_bdev->bd_disk->queue; 1338 1339 device->local_max_bio_size = 1340 queue_max_hw_sectors(b) << SECTOR_SHIFT; 1341 } 1342 1343 /* 1344 * We may later detach and re-attach on a disconnected Primary. Avoid 1345 * decreasing the value in this case. 1346 * 1347 * We want to store what we know the peer DRBD can handle, not what the 1348 * peer IO backend can handle. 1349 */ 1350 new = min3(DRBD_MAX_BIO_SIZE, device->local_max_bio_size, 1351 max(drbd_max_peer_bio_size(device), device->peer_max_bio_size)); 1352 if (new != now) { 1353 if (device->state.role == R_PRIMARY && new < now) 1354 drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", 1355 new, now); 1356 drbd_info(device, "max BIO size = %u\n", new); 1357 } 1358 1359 lim = queue_limits_start_update(q); 1360 if (bdev) { 1361 blk_set_stacking_limits(&lim); 1362 lim.max_segments = drbd_backing_dev_max_segments(device); 1363 } else { 1364 lim.max_segments = BLK_MAX_SEGMENTS; 1365 lim.features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | 1366 BLK_FEAT_ROTATIONAL | BLK_FEAT_STABLE_WRITES; 1367 } 1368 1369 lim.max_hw_sectors = new >> SECTOR_SHIFT; 1370 lim.seg_boundary_mask = PAGE_SIZE - 1; 1371 1372 /* 1373 * We don't care for the granularity, really. 1374 * 1375 * Stacking limits below should fix it for the local device. Whether or 1376 * not it is a suitable granularity on the remote device is not our 1377 * problem, really. If you care, you need to use devices with similar 1378 * topology on all peers. 1379 */ 1380 if (drbd_discard_supported(connection, bdev)) { 1381 lim.discard_granularity = 512; 1382 lim.max_hw_discard_sectors = 1383 drbd_max_discard_sectors(connection); 1384 } else { 1385 lim.discard_granularity = 0; 1386 lim.max_hw_discard_sectors = 0; 1387 } 1388 1389 if (bdev) { 1390 blk_stack_limits(&lim, &b->limits, 0); 1391 /* 1392 * blk_set_stacking_limits() cleared the features, and 1393 * blk_stack_limits() may or may not have inherited 1394 * BLK_FEAT_STABLE_WRITES from the backing device. 1395 * 1396 * DRBD always requires stable writes because: 1397 * 1. The same bio data is read for both local disk I/O and 1398 * network transmission. If the page changes mid-flight, 1399 * the local and remote copies could diverge. 1400 * 2. When data integrity is enabled, DRBD calculates a 1401 * checksum before sending the data. If the page changes 1402 * between checksum calculation and transmission, the 1403 * receiver will detect a checksum mismatch. 1404 */ 1405 lim.features |= BLK_FEAT_STABLE_WRITES; 1406 } 1407 1408 /* 1409 * If we can handle "zeroes" efficiently on the protocol, we want to do 1410 * that, even if our backend does not announce max_write_zeroes_sectors 1411 * itself. 1412 */ 1413 if (connection->agreed_features & DRBD_FF_WZEROES) 1414 lim.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS; 1415 else 1416 lim.max_write_zeroes_sectors = 0; 1417 lim.max_hw_wzeroes_unmap_sectors = 0; 1418 1419 if ((lim.discard_granularity >> SECTOR_SHIFT) > 1420 lim.max_hw_discard_sectors) { 1421 lim.discard_granularity = 0; 1422 lim.max_hw_discard_sectors = 0; 1423 } 1424 1425 if (queue_limits_commit_update(q, &lim)) 1426 drbd_err(device, "setting new queue limits failed\n"); 1427 } 1428 1429 /* Starts the worker thread */ 1430 static void conn_reconfig_start(struct drbd_connection *connection) 1431 { 1432 drbd_thread_start(&connection->worker); 1433 drbd_flush_workqueue(&connection->sender_work); 1434 } 1435 1436 /* if still unconfigured, stops worker again. */ 1437 static void conn_reconfig_done(struct drbd_connection *connection) 1438 { 1439 bool stop_threads; 1440 spin_lock_irq(&connection->resource->req_lock); 1441 stop_threads = conn_all_vols_unconf(connection) && 1442 connection->cstate == C_STANDALONE; 1443 spin_unlock_irq(&connection->resource->req_lock); 1444 if (stop_threads) { 1445 /* ack_receiver thread and ack_sender workqueue are implicitly 1446 * stopped by receiver in conn_disconnect() */ 1447 drbd_thread_stop(&connection->receiver); 1448 drbd_thread_stop(&connection->worker); 1449 } 1450 } 1451 1452 /* Make sure IO is suspended before calling this function(). */ 1453 static void drbd_suspend_al(struct drbd_device *device) 1454 { 1455 int s = 0; 1456 1457 if (!lc_try_lock(device->act_log)) { 1458 drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n"); 1459 return; 1460 } 1461 1462 drbd_al_shrink(device); 1463 spin_lock_irq(&device->resource->req_lock); 1464 if (device->state.conn < C_CONNECTED) 1465 s = !test_and_set_bit(AL_SUSPENDED, &device->flags); 1466 spin_unlock_irq(&device->resource->req_lock); 1467 lc_unlock(device->act_log); 1468 1469 if (s) 1470 drbd_info(device, "Suspended AL updates\n"); 1471 } 1472 1473 1474 static bool should_set_defaults(struct genl_info *info) 1475 { 1476 struct drbd_genlmsghdr *dh = genl_info_userhdr(info); 1477 1478 return 0 != (dh->flags & DRBD_GENL_F_SET_DEFAULTS); 1479 } 1480 1481 static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev) 1482 { 1483 /* This is limited by 16 bit "slot" numbers, 1484 * and by available on-disk context storage. 1485 * 1486 * Also (u16)~0 is special (denotes a "free" extent). 1487 * 1488 * One transaction occupies one 4kB on-disk block, 1489 * we have n such blocks in the on disk ring buffer, 1490 * the "current" transaction may fail (n-1), 1491 * and there is 919 slot numbers context information per transaction. 1492 * 1493 * 72 transaction blocks amounts to more than 2**16 context slots, 1494 * so cap there first. 1495 */ 1496 const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX; 1497 const unsigned int sufficient_on_disk = 1498 (max_al_nr + AL_CONTEXT_PER_TRANSACTION -1) 1499 /AL_CONTEXT_PER_TRANSACTION; 1500 1501 unsigned int al_size_4k = bdev->md.al_size_4k; 1502 1503 if (al_size_4k > sufficient_on_disk) 1504 return max_al_nr; 1505 1506 return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION; 1507 } 1508 1509 static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b) 1510 { 1511 return a->disk_barrier != b->disk_barrier || 1512 a->disk_flushes != b->disk_flushes || 1513 a->disk_drain != b->disk_drain; 1514 } 1515 1516 static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *disk_conf, 1517 struct drbd_backing_dev *nbc) 1518 { 1519 struct block_device *bdev = nbc->backing_bdev; 1520 1521 if (disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) 1522 disk_conf->al_extents = DRBD_AL_EXTENTS_MIN; 1523 if (disk_conf->al_extents > drbd_al_extents_max(nbc)) 1524 disk_conf->al_extents = drbd_al_extents_max(nbc); 1525 1526 if (!bdev_max_discard_sectors(bdev)) { 1527 if (disk_conf->rs_discard_granularity) { 1528 disk_conf->rs_discard_granularity = 0; /* disable feature */ 1529 drbd_info(device, "rs_discard_granularity feature disabled\n"); 1530 } 1531 } 1532 1533 if (disk_conf->rs_discard_granularity) { 1534 int orig_value = disk_conf->rs_discard_granularity; 1535 sector_t discard_size = bdev_max_discard_sectors(bdev) << 9; 1536 unsigned int discard_granularity = bdev_discard_granularity(bdev); 1537 int remainder; 1538 1539 if (discard_granularity > disk_conf->rs_discard_granularity) 1540 disk_conf->rs_discard_granularity = discard_granularity; 1541 1542 remainder = disk_conf->rs_discard_granularity % 1543 discard_granularity; 1544 disk_conf->rs_discard_granularity += remainder; 1545 1546 if (disk_conf->rs_discard_granularity > discard_size) 1547 disk_conf->rs_discard_granularity = discard_size; 1548 1549 if (disk_conf->rs_discard_granularity != orig_value) 1550 drbd_info(device, "rs_discard_granularity changed to %d\n", 1551 disk_conf->rs_discard_granularity); 1552 } 1553 } 1554 1555 static int disk_opts_check_al_size(struct drbd_device *device, struct disk_conf *dc) 1556 { 1557 int err = -EBUSY; 1558 1559 if (device->act_log && 1560 device->act_log->nr_elements == dc->al_extents) 1561 return 0; 1562 1563 drbd_suspend_io(device); 1564 /* If IO completion is currently blocked, we would likely wait 1565 * "forever" for the activity log to become unused. So we don't. */ 1566 if (atomic_read(&device->ap_bio_cnt)) 1567 goto out; 1568 1569 wait_event(device->al_wait, lc_try_lock(device->act_log)); 1570 drbd_al_shrink(device); 1571 err = drbd_check_al_size(device, dc); 1572 lc_unlock(device->act_log); 1573 wake_up(&device->al_wait); 1574 out: 1575 drbd_resume_io(device); 1576 return err; 1577 } 1578 1579 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) 1580 { 1581 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 1582 enum drbd_ret_code retcode; 1583 struct drbd_device *device; 1584 struct disk_conf *new_disk_conf, *old_disk_conf; 1585 struct fifo_buffer *old_plan = NULL, *new_plan = NULL; 1586 int err; 1587 unsigned int fifo_size; 1588 1589 if (!adm_ctx->reply_skb) 1590 return 0; 1591 retcode = adm_ctx->reply_dh->ret_code; 1592 if (retcode != NO_ERROR) 1593 goto finish; 1594 1595 device = adm_ctx->device; 1596 mutex_lock(&adm_ctx->resource->adm_mutex); 1597 1598 /* we also need a disk 1599 * to change the options on */ 1600 if (!get_ldev(device)) { 1601 retcode = ERR_NO_DISK; 1602 goto out; 1603 } 1604 1605 new_disk_conf = kmalloc_obj(struct disk_conf); 1606 if (!new_disk_conf) { 1607 retcode = ERR_NOMEM; 1608 goto fail; 1609 } 1610 1611 mutex_lock(&device->resource->conf_update); 1612 old_disk_conf = device->ldev->disk_conf; 1613 *new_disk_conf = *old_disk_conf; 1614 if (should_set_defaults(info)) 1615 set_disk_conf_defaults(new_disk_conf); 1616 1617 err = disk_conf_from_attrs_for_change(new_disk_conf, info); 1618 if (err && err != -ENOMSG) { 1619 retcode = ERR_MANDATORY_TAG; 1620 drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err)); 1621 goto fail_unlock; 1622 } 1623 1624 if (!expect(device, new_disk_conf->resync_rate >= 1)) 1625 new_disk_conf->resync_rate = 1; 1626 1627 sanitize_disk_conf(device, new_disk_conf, device->ldev); 1628 1629 if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX) 1630 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX; 1631 1632 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ; 1633 if (fifo_size != device->rs_plan_s->size) { 1634 new_plan = fifo_alloc(fifo_size); 1635 if (!new_plan) { 1636 drbd_err(device, "kmalloc of fifo_buffer failed"); 1637 retcode = ERR_NOMEM; 1638 goto fail_unlock; 1639 } 1640 } 1641 1642 err = disk_opts_check_al_size(device, new_disk_conf); 1643 if (err) { 1644 /* Could be just "busy". Ignore? 1645 * Introduce dedicated error code? */ 1646 drbd_msg_put_info(adm_ctx->reply_skb, 1647 "Try again without changing current al-extents setting"); 1648 retcode = ERR_NOMEM; 1649 goto fail_unlock; 1650 } 1651 1652 lock_all_resources(); 1653 retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after); 1654 if (retcode == NO_ERROR) { 1655 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); 1656 drbd_resync_after_changed(device); 1657 } 1658 unlock_all_resources(); 1659 1660 if (retcode != NO_ERROR) 1661 goto fail_unlock; 1662 1663 if (new_plan) { 1664 old_plan = device->rs_plan_s; 1665 rcu_assign_pointer(device->rs_plan_s, new_plan); 1666 } 1667 1668 mutex_unlock(&device->resource->conf_update); 1669 1670 if (new_disk_conf->al_updates) 1671 device->ldev->md.flags &= ~MDF_AL_DISABLED; 1672 else 1673 device->ldev->md.flags |= MDF_AL_DISABLED; 1674 1675 if (new_disk_conf->md_flushes) 1676 clear_bit(MD_NO_FUA, &device->flags); 1677 else 1678 set_bit(MD_NO_FUA, &device->flags); 1679 1680 if (write_ordering_changed(old_disk_conf, new_disk_conf)) 1681 drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH); 1682 1683 if (old_disk_conf->discard_zeroes_if_aligned != 1684 new_disk_conf->discard_zeroes_if_aligned) 1685 drbd_reconsider_queue_parameters(device, device->ldev, NULL); 1686 1687 drbd_md_sync(device); 1688 1689 if (device->state.conn >= C_CONNECTED) { 1690 struct drbd_peer_device *peer_device; 1691 1692 for_each_peer_device(peer_device, device) 1693 drbd_send_sync_param(peer_device); 1694 } 1695 1696 kvfree_rcu_mightsleep(old_disk_conf); 1697 kfree(old_plan); 1698 mod_timer(&device->request_timer, jiffies + HZ); 1699 goto success; 1700 1701 fail_unlock: 1702 mutex_unlock(&device->resource->conf_update); 1703 fail: 1704 kfree(new_disk_conf); 1705 kfree(new_plan); 1706 success: 1707 put_ldev(device); 1708 out: 1709 mutex_unlock(&adm_ctx->resource->adm_mutex); 1710 finish: 1711 adm_ctx->reply_dh->ret_code = retcode; 1712 return 0; 1713 } 1714 1715 static struct file *open_backing_dev(struct drbd_device *device, 1716 const char *bdev_path, void *claim_ptr, bool do_bd_link) 1717 { 1718 struct file *file; 1719 int err = 0; 1720 1721 file = bdev_file_open_by_path(bdev_path, BLK_OPEN_READ | BLK_OPEN_WRITE, 1722 claim_ptr, NULL); 1723 if (IS_ERR(file)) { 1724 drbd_err(device, "open(\"%s\") failed with %ld\n", 1725 bdev_path, PTR_ERR(file)); 1726 return file; 1727 } 1728 1729 if (!do_bd_link) 1730 return file; 1731 1732 err = bd_link_disk_holder(file_bdev(file), device->vdisk); 1733 if (err) { 1734 fput(file); 1735 drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n", 1736 bdev_path, err); 1737 file = ERR_PTR(err); 1738 } 1739 return file; 1740 } 1741 1742 static int open_backing_devices(struct drbd_device *device, 1743 struct disk_conf *new_disk_conf, 1744 struct drbd_backing_dev *nbc) 1745 { 1746 struct file *file; 1747 1748 file = open_backing_dev(device, new_disk_conf->backing_dev, device, 1749 true); 1750 if (IS_ERR(file)) 1751 return ERR_OPEN_DISK; 1752 nbc->backing_bdev = file_bdev(file); 1753 nbc->backing_bdev_file = file; 1754 1755 /* 1756 * meta_dev_idx >= 0: external fixed size, possibly multiple 1757 * drbd sharing one meta device. TODO in that case, paranoia 1758 * check that [md_bdev, meta_dev_idx] is not yet used by some 1759 * other drbd minor! (if you use drbd.conf + drbdadm, that 1760 * should check it for you already; but if you don't, or 1761 * someone fooled it, we need to double check here) 1762 */ 1763 file = open_backing_dev(device, new_disk_conf->meta_dev, 1764 /* claim ptr: device, if claimed exclusively; shared drbd_m_holder, 1765 * if potentially shared with other drbd minors */ 1766 (new_disk_conf->meta_dev_idx < 0) ? (void*)device : (void*)drbd_m_holder, 1767 /* avoid double bd_claim_by_disk() for the same (source,target) tuple, 1768 * as would happen with internal metadata. */ 1769 (new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_FLEX_INT && 1770 new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_INTERNAL)); 1771 if (IS_ERR(file)) 1772 return ERR_OPEN_MD_DISK; 1773 nbc->md_bdev = file_bdev(file); 1774 nbc->f_md_bdev = file; 1775 return NO_ERROR; 1776 } 1777 1778 static void close_backing_dev(struct drbd_device *device, 1779 struct file *bdev_file, bool do_bd_unlink) 1780 { 1781 if (!bdev_file) 1782 return; 1783 if (do_bd_unlink) 1784 bd_unlink_disk_holder(file_bdev(bdev_file), device->vdisk); 1785 fput(bdev_file); 1786 } 1787 1788 void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev) 1789 { 1790 if (ldev == NULL) 1791 return; 1792 1793 close_backing_dev(device, ldev->f_md_bdev, 1794 ldev->md_bdev != ldev->backing_bdev); 1795 close_backing_dev(device, ldev->backing_bdev_file, true); 1796 1797 kfree(ldev->disk_conf); 1798 kfree(ldev); 1799 } 1800 1801 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) 1802 { 1803 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 1804 struct drbd_device *device; 1805 struct drbd_peer_device *peer_device; 1806 struct drbd_connection *connection; 1807 int err; 1808 enum drbd_ret_code retcode; 1809 enum determine_dev_size dd; 1810 sector_t max_possible_sectors; 1811 sector_t min_md_device_sectors; 1812 struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */ 1813 struct disk_conf *new_disk_conf = NULL; 1814 struct lru_cache *resync_lru = NULL; 1815 struct fifo_buffer *new_plan = NULL; 1816 union drbd_state ns, os; 1817 enum drbd_state_rv rv; 1818 struct net_conf *nc; 1819 1820 if (!adm_ctx->reply_skb) 1821 return 0; 1822 retcode = adm_ctx->reply_dh->ret_code; 1823 if (retcode != NO_ERROR) 1824 goto finish; 1825 1826 device = adm_ctx->device; 1827 mutex_lock(&adm_ctx->resource->adm_mutex); 1828 peer_device = first_peer_device(device); 1829 connection = peer_device->connection; 1830 conn_reconfig_start(connection); 1831 1832 /* if you want to reconfigure, please tear down first */ 1833 if (device->state.disk > D_DISKLESS) { 1834 retcode = ERR_DISK_CONFIGURED; 1835 goto fail; 1836 } 1837 /* It may just now have detached because of IO error. Make sure 1838 * drbd_ldev_destroy is done already, we may end up here very fast, 1839 * e.g. if someone calls attach from the on-io-error handler, 1840 * to realize a "hot spare" feature (not that I'd recommend that) */ 1841 wait_event(device->misc_wait, !test_bit(GOING_DISKLESS, &device->flags)); 1842 1843 /* make sure there is no leftover from previous force-detach attempts */ 1844 clear_bit(FORCE_DETACH, &device->flags); 1845 clear_bit(WAS_IO_ERROR, &device->flags); 1846 clear_bit(WAS_READ_ERROR, &device->flags); 1847 1848 /* and no leftover from previously aborted resync or verify, either */ 1849 device->rs_total = 0; 1850 device->rs_failed = 0; 1851 atomic_set(&device->rs_pending_cnt, 0); 1852 1853 /* allocation not in the IO path, drbdsetup context */ 1854 nbc = kzalloc_obj(struct drbd_backing_dev); 1855 if (!nbc) { 1856 retcode = ERR_NOMEM; 1857 goto fail; 1858 } 1859 spin_lock_init(&nbc->md.uuid_lock); 1860 1861 new_disk_conf = kzalloc_obj(struct disk_conf); 1862 if (!new_disk_conf) { 1863 retcode = ERR_NOMEM; 1864 goto fail; 1865 } 1866 nbc->disk_conf = new_disk_conf; 1867 1868 set_disk_conf_defaults(new_disk_conf); 1869 err = disk_conf_from_attrs(new_disk_conf, info); 1870 if (err) { 1871 retcode = ERR_MANDATORY_TAG; 1872 drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err)); 1873 goto fail; 1874 } 1875 1876 if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX) 1877 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX; 1878 1879 new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ); 1880 if (!new_plan) { 1881 retcode = ERR_NOMEM; 1882 goto fail; 1883 } 1884 1885 if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { 1886 retcode = ERR_MD_IDX_INVALID; 1887 goto fail; 1888 } 1889 1890 rcu_read_lock(); 1891 nc = rcu_dereference(connection->net_conf); 1892 if (nc) { 1893 if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) { 1894 rcu_read_unlock(); 1895 retcode = ERR_STONITH_AND_PROT_A; 1896 goto fail; 1897 } 1898 } 1899 rcu_read_unlock(); 1900 1901 retcode = open_backing_devices(device, new_disk_conf, nbc); 1902 if (retcode != NO_ERROR) 1903 goto fail; 1904 1905 if ((nbc->backing_bdev == nbc->md_bdev) != 1906 (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL || 1907 new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) { 1908 retcode = ERR_MD_IDX_INVALID; 1909 goto fail; 1910 } 1911 1912 resync_lru = lc_create("resync", drbd_bm_ext_cache, 1913 1, 61, sizeof(struct bm_extent), 1914 offsetof(struct bm_extent, lce)); 1915 if (!resync_lru) { 1916 retcode = ERR_NOMEM; 1917 goto fail; 1918 } 1919 1920 /* Read our meta data super block early. 1921 * This also sets other on-disk offsets. */ 1922 retcode = drbd_md_read(device, nbc); 1923 if (retcode != NO_ERROR) 1924 goto fail; 1925 1926 sanitize_disk_conf(device, new_disk_conf, nbc); 1927 1928 if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) { 1929 drbd_err(device, "max capacity %llu smaller than disk size %llu\n", 1930 (unsigned long long) drbd_get_max_capacity(nbc), 1931 (unsigned long long) new_disk_conf->disk_size); 1932 retcode = ERR_DISK_TOO_SMALL; 1933 goto fail; 1934 } 1935 1936 if (new_disk_conf->meta_dev_idx < 0) { 1937 max_possible_sectors = DRBD_MAX_SECTORS_FLEX; 1938 /* at least one MB, otherwise it does not make sense */ 1939 min_md_device_sectors = (2<<10); 1940 } else { 1941 max_possible_sectors = DRBD_MAX_SECTORS; 1942 min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1); 1943 } 1944 1945 if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { 1946 retcode = ERR_MD_DISK_TOO_SMALL; 1947 drbd_warn(device, "refusing attach: md-device too small, " 1948 "at least %llu sectors needed for this meta-disk type\n", 1949 (unsigned long long) min_md_device_sectors); 1950 goto fail; 1951 } 1952 1953 /* Make sure the new disk is big enough 1954 * (we may currently be R_PRIMARY with no local disk...) */ 1955 if (drbd_get_max_capacity(nbc) < get_capacity(device->vdisk)) { 1956 retcode = ERR_DISK_TOO_SMALL; 1957 goto fail; 1958 } 1959 1960 nbc->known_size = drbd_get_capacity(nbc->backing_bdev); 1961 1962 if (nbc->known_size > max_possible_sectors) { 1963 drbd_warn(device, "==> truncating very big lower level device " 1964 "to currently maximum possible %llu sectors <==\n", 1965 (unsigned long long) max_possible_sectors); 1966 if (new_disk_conf->meta_dev_idx >= 0) 1967 drbd_warn(device, "==>> using internal or flexible " 1968 "meta data may help <<==\n"); 1969 } 1970 1971 drbd_suspend_io(device); 1972 /* also wait for the last barrier ack. */ 1973 /* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171 1974 * We need a way to either ignore barrier acks for barriers sent before a device 1975 * was attached, or a way to wait for all pending barrier acks to come in. 1976 * As barriers are counted per resource, 1977 * we'd need to suspend io on all devices of a resource. 1978 */ 1979 wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device)); 1980 /* and for any other previously queued work */ 1981 drbd_flush_workqueue(&connection->sender_work); 1982 1983 rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE); 1984 retcode = (enum drbd_ret_code)rv; 1985 drbd_resume_io(device); 1986 if (rv < SS_SUCCESS) 1987 goto fail; 1988 1989 if (!get_ldev_if_state(device, D_ATTACHING)) 1990 goto force_diskless; 1991 1992 if (!device->bitmap) { 1993 if (drbd_bm_init(device)) { 1994 retcode = ERR_NOMEM; 1995 goto force_diskless_dec; 1996 } 1997 } 1998 1999 if (device->state.pdsk != D_UP_TO_DATE && device->ed_uuid && 2000 (device->state.role == R_PRIMARY || device->state.peer == R_PRIMARY) && 2001 (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { 2002 drbd_err(device, "Can only attach to data with current UUID=%016llX\n", 2003 (unsigned long long)device->ed_uuid); 2004 retcode = ERR_DATA_NOT_CURRENT; 2005 goto force_diskless_dec; 2006 } 2007 2008 /* Since we are diskless, fix the activity log first... */ 2009 if (drbd_check_al_size(device, new_disk_conf)) { 2010 retcode = ERR_NOMEM; 2011 goto force_diskless_dec; 2012 } 2013 2014 /* Prevent shrinking of consistent devices ! */ 2015 { 2016 unsigned long long nsz = drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0); 2017 unsigned long long eff = nbc->md.la_size_sect; 2018 if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && nsz < eff) { 2019 if (nsz == nbc->disk_conf->disk_size) { 2020 drbd_warn(device, "truncating a consistent device during attach (%llu < %llu)\n", nsz, eff); 2021 } else { 2022 drbd_warn(device, "refusing to truncate a consistent device (%llu < %llu)\n", nsz, eff); 2023 drbd_msg_sprintf_info(adm_ctx->reply_skb, 2024 "To-be-attached device has last effective > current size, and is consistent\n" 2025 "(%llu > %llu sectors). Refusing to attach.", eff, nsz); 2026 retcode = ERR_IMPLICIT_SHRINK; 2027 goto force_diskless_dec; 2028 } 2029 } 2030 } 2031 2032 lock_all_resources(); 2033 retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after); 2034 if (retcode != NO_ERROR) { 2035 unlock_all_resources(); 2036 goto force_diskless_dec; 2037 } 2038 2039 /* Reset the "barriers don't work" bits here, then force meta data to 2040 * be written, to ensure we determine if barriers are supported. */ 2041 if (new_disk_conf->md_flushes) 2042 clear_bit(MD_NO_FUA, &device->flags); 2043 else 2044 set_bit(MD_NO_FUA, &device->flags); 2045 2046 /* Point of no return reached. 2047 * Devices and memory are no longer released by error cleanup below. 2048 * now device takes over responsibility, and the state engine should 2049 * clean it up somewhere. */ 2050 D_ASSERT(device, device->ldev == NULL); 2051 device->ldev = nbc; 2052 device->resync = resync_lru; 2053 device->rs_plan_s = new_plan; 2054 nbc = NULL; 2055 resync_lru = NULL; 2056 new_disk_conf = NULL; 2057 new_plan = NULL; 2058 2059 drbd_resync_after_changed(device); 2060 drbd_bump_write_ordering(device->resource, device->ldev, WO_BDEV_FLUSH); 2061 unlock_all_resources(); 2062 2063 if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY)) 2064 set_bit(CRASHED_PRIMARY, &device->flags); 2065 else 2066 clear_bit(CRASHED_PRIMARY, &device->flags); 2067 2068 if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) && 2069 !(device->state.role == R_PRIMARY && device->resource->susp_nod)) 2070 set_bit(CRASHED_PRIMARY, &device->flags); 2071 2072 device->send_cnt = 0; 2073 device->recv_cnt = 0; 2074 device->read_cnt = 0; 2075 device->writ_cnt = 0; 2076 2077 drbd_reconsider_queue_parameters(device, device->ldev, NULL); 2078 2079 /* If I am currently not R_PRIMARY, 2080 * but meta data primary indicator is set, 2081 * I just now recover from a hard crash, 2082 * and have been R_PRIMARY before that crash. 2083 * 2084 * Now, if I had no connection before that crash 2085 * (have been degraded R_PRIMARY), chances are that 2086 * I won't find my peer now either. 2087 * 2088 * In that case, and _only_ in that case, 2089 * we use the degr-wfc-timeout instead of the default, 2090 * so we can automatically recover from a crash of a 2091 * degraded but active "cluster" after a certain timeout. 2092 */ 2093 clear_bit(USE_DEGR_WFC_T, &device->flags); 2094 if (device->state.role != R_PRIMARY && 2095 drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) && 2096 !drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND)) 2097 set_bit(USE_DEGR_WFC_T, &device->flags); 2098 2099 dd = drbd_determine_dev_size(device, 0, NULL); 2100 if (dd <= DS_ERROR) { 2101 retcode = ERR_NOMEM_BITMAP; 2102 goto force_diskless_dec; 2103 } else if (dd == DS_GREW) 2104 set_bit(RESYNC_AFTER_NEG, &device->flags); 2105 2106 if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) || 2107 (test_bit(CRASHED_PRIMARY, &device->flags) && 2108 drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) { 2109 drbd_info(device, "Assuming that all blocks are out of sync " 2110 "(aka FullSync)\n"); 2111 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, 2112 "set_n_write from attaching", BM_LOCKED_MASK, 2113 NULL)) { 2114 retcode = ERR_IO_MD_DISK; 2115 goto force_diskless_dec; 2116 } 2117 } else { 2118 if (drbd_bitmap_io(device, &drbd_bm_read, 2119 "read from attaching", BM_LOCKED_MASK, 2120 NULL)) { 2121 retcode = ERR_IO_MD_DISK; 2122 goto force_diskless_dec; 2123 } 2124 } 2125 2126 if (_drbd_bm_total_weight(device) == drbd_bm_bits(device)) 2127 drbd_suspend_al(device); /* IO is still suspended here... */ 2128 2129 spin_lock_irq(&device->resource->req_lock); 2130 os = drbd_read_state(device); 2131 ns = os; 2132 /* If MDF_CONSISTENT is not set go into inconsistent state, 2133 otherwise investigate MDF_WasUpToDate... 2134 If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state, 2135 otherwise into D_CONSISTENT state. 2136 */ 2137 if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) { 2138 if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE)) 2139 ns.disk = D_CONSISTENT; 2140 else 2141 ns.disk = D_OUTDATED; 2142 } else { 2143 ns.disk = D_INCONSISTENT; 2144 } 2145 2146 if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED)) 2147 ns.pdsk = D_OUTDATED; 2148 2149 rcu_read_lock(); 2150 if (ns.disk == D_CONSISTENT && 2151 (ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE)) 2152 ns.disk = D_UP_TO_DATE; 2153 2154 /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND, 2155 MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before 2156 this point, because drbd_request_state() modifies these 2157 flags. */ 2158 2159 if (rcu_dereference(device->ldev->disk_conf)->al_updates) 2160 device->ldev->md.flags &= ~MDF_AL_DISABLED; 2161 else 2162 device->ldev->md.flags |= MDF_AL_DISABLED; 2163 2164 rcu_read_unlock(); 2165 2166 /* In case we are C_CONNECTED postpone any decision on the new disk 2167 state after the negotiation phase. */ 2168 if (device->state.conn == C_CONNECTED) { 2169 device->new_state_tmp.i = ns.i; 2170 ns.i = os.i; 2171 ns.disk = D_NEGOTIATING; 2172 2173 /* We expect to receive up-to-date UUIDs soon. 2174 To avoid a race in receive_state, free p_uuid while 2175 holding req_lock. I.e. atomic with the state change */ 2176 kfree(device->p_uuid); 2177 device->p_uuid = NULL; 2178 } 2179 2180 rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL); 2181 spin_unlock_irq(&device->resource->req_lock); 2182 2183 if (rv < SS_SUCCESS) 2184 goto force_diskless_dec; 2185 2186 mod_timer(&device->request_timer, jiffies + HZ); 2187 2188 if (device->state.role == R_PRIMARY) 2189 device->ldev->md.uuid[UI_CURRENT] |= (u64)1; 2190 else 2191 device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; 2192 2193 drbd_md_mark_dirty(device); 2194 drbd_md_sync(device); 2195 2196 kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE); 2197 put_ldev(device); 2198 conn_reconfig_done(connection); 2199 mutex_unlock(&adm_ctx->resource->adm_mutex); 2200 adm_ctx->reply_dh->ret_code = retcode; 2201 return 0; 2202 2203 force_diskless_dec: 2204 put_ldev(device); 2205 force_diskless: 2206 drbd_force_state(device, NS(disk, D_DISKLESS)); 2207 drbd_md_sync(device); 2208 fail: 2209 conn_reconfig_done(connection); 2210 if (nbc) { 2211 close_backing_dev(device, nbc->f_md_bdev, 2212 nbc->md_bdev != nbc->backing_bdev); 2213 close_backing_dev(device, nbc->backing_bdev_file, true); 2214 kfree(nbc); 2215 } 2216 kfree(new_disk_conf); 2217 lc_destroy(resync_lru); 2218 kfree(new_plan); 2219 mutex_unlock(&adm_ctx->resource->adm_mutex); 2220 finish: 2221 adm_ctx->reply_dh->ret_code = retcode; 2222 return 0; 2223 } 2224 2225 static int adm_detach(struct drbd_device *device, int force) 2226 { 2227 if (force) { 2228 set_bit(FORCE_DETACH, &device->flags); 2229 drbd_force_state(device, NS(disk, D_FAILED)); 2230 return SS_SUCCESS; 2231 } 2232 2233 return drbd_request_detach_interruptible(device); 2234 } 2235 2236 /* Detaching the disk is a process in multiple stages. First we need to lock 2237 * out application IO, in-flight IO, IO stuck in drbd_al_begin_io. 2238 * Then we transition to D_DISKLESS, and wait for put_ldev() to return all 2239 * internal references as well. 2240 * Only then we have finally detached. */ 2241 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) 2242 { 2243 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 2244 enum drbd_ret_code retcode; 2245 struct detach_parms parms = { }; 2246 int err; 2247 2248 if (!adm_ctx->reply_skb) 2249 return 0; 2250 retcode = adm_ctx->reply_dh->ret_code; 2251 if (retcode != NO_ERROR) 2252 goto out; 2253 2254 if (info->attrs[DRBD_NLA_DETACH_PARMS]) { 2255 err = detach_parms_from_attrs(&parms, info); 2256 if (err) { 2257 retcode = ERR_MANDATORY_TAG; 2258 drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err)); 2259 goto out; 2260 } 2261 } 2262 2263 mutex_lock(&adm_ctx->resource->adm_mutex); 2264 retcode = adm_detach(adm_ctx->device, parms.force_detach); 2265 mutex_unlock(&adm_ctx->resource->adm_mutex); 2266 out: 2267 adm_ctx->reply_dh->ret_code = retcode; 2268 return 0; 2269 } 2270 2271 static bool conn_resync_running(struct drbd_connection *connection) 2272 { 2273 struct drbd_peer_device *peer_device; 2274 bool rv = false; 2275 int vnr; 2276 2277 rcu_read_lock(); 2278 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 2279 struct drbd_device *device = peer_device->device; 2280 if (device->state.conn == C_SYNC_SOURCE || 2281 device->state.conn == C_SYNC_TARGET || 2282 device->state.conn == C_PAUSED_SYNC_S || 2283 device->state.conn == C_PAUSED_SYNC_T) { 2284 rv = true; 2285 break; 2286 } 2287 } 2288 rcu_read_unlock(); 2289 2290 return rv; 2291 } 2292 2293 static bool conn_ov_running(struct drbd_connection *connection) 2294 { 2295 struct drbd_peer_device *peer_device; 2296 bool rv = false; 2297 int vnr; 2298 2299 rcu_read_lock(); 2300 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 2301 struct drbd_device *device = peer_device->device; 2302 if (device->state.conn == C_VERIFY_S || 2303 device->state.conn == C_VERIFY_T) { 2304 rv = true; 2305 break; 2306 } 2307 } 2308 rcu_read_unlock(); 2309 2310 return rv; 2311 } 2312 2313 static enum drbd_ret_code 2314 _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf) 2315 { 2316 struct drbd_peer_device *peer_device; 2317 int i; 2318 2319 if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) { 2320 if (new_net_conf->wire_protocol != old_net_conf->wire_protocol) 2321 return ERR_NEED_APV_100; 2322 2323 if (new_net_conf->two_primaries != old_net_conf->two_primaries) 2324 return ERR_NEED_APV_100; 2325 2326 if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg)) 2327 return ERR_NEED_APV_100; 2328 } 2329 2330 if (!new_net_conf->two_primaries && 2331 conn_highest_role(connection) == R_PRIMARY && 2332 conn_highest_peer(connection) == R_PRIMARY) 2333 return ERR_NEED_ALLOW_TWO_PRI; 2334 2335 if (new_net_conf->two_primaries && 2336 (new_net_conf->wire_protocol != DRBD_PROT_C)) 2337 return ERR_NOT_PROTO_C; 2338 2339 idr_for_each_entry(&connection->peer_devices, peer_device, i) { 2340 struct drbd_device *device = peer_device->device; 2341 if (get_ldev(device)) { 2342 enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing; 2343 put_ldev(device); 2344 if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) 2345 return ERR_STONITH_AND_PROT_A; 2346 } 2347 if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data) 2348 return ERR_DISCARD_IMPOSSIBLE; 2349 } 2350 2351 if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A) 2352 return ERR_CONG_NOT_PROTO_A; 2353 2354 return NO_ERROR; 2355 } 2356 2357 static enum drbd_ret_code 2358 check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf) 2359 { 2360 enum drbd_ret_code rv; 2361 struct drbd_peer_device *peer_device; 2362 int i; 2363 2364 rcu_read_lock(); 2365 rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf); 2366 rcu_read_unlock(); 2367 2368 /* connection->peer_devices protected by genl_lock() here */ 2369 idr_for_each_entry(&connection->peer_devices, peer_device, i) { 2370 struct drbd_device *device = peer_device->device; 2371 if (!device->bitmap) { 2372 if (drbd_bm_init(device)) 2373 return ERR_NOMEM; 2374 } 2375 } 2376 2377 return rv; 2378 } 2379 2380 struct crypto { 2381 struct crypto_shash *verify_tfm; 2382 struct crypto_shash *csums_tfm; 2383 struct crypto_shash *cram_hmac_tfm; 2384 struct crypto_shash *integrity_tfm; 2385 }; 2386 2387 static int 2388 alloc_shash(struct crypto_shash **tfm, char *tfm_name, int err_alg) 2389 { 2390 if (!tfm_name[0]) 2391 return NO_ERROR; 2392 2393 *tfm = crypto_alloc_shash(tfm_name, 0, 0); 2394 if (IS_ERR(*tfm)) { 2395 *tfm = NULL; 2396 return err_alg; 2397 } 2398 2399 return NO_ERROR; 2400 } 2401 2402 static enum drbd_ret_code 2403 alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf) 2404 { 2405 char hmac_name[CRYPTO_MAX_ALG_NAME]; 2406 enum drbd_ret_code rv; 2407 2408 rv = alloc_shash(&crypto->csums_tfm, new_net_conf->csums_alg, 2409 ERR_CSUMS_ALG); 2410 if (rv != NO_ERROR) 2411 return rv; 2412 rv = alloc_shash(&crypto->verify_tfm, new_net_conf->verify_alg, 2413 ERR_VERIFY_ALG); 2414 if (rv != NO_ERROR) 2415 return rv; 2416 rv = alloc_shash(&crypto->integrity_tfm, new_net_conf->integrity_alg, 2417 ERR_INTEGRITY_ALG); 2418 if (rv != NO_ERROR) 2419 return rv; 2420 if (new_net_conf->cram_hmac_alg[0] != 0) { 2421 snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", 2422 new_net_conf->cram_hmac_alg); 2423 2424 rv = alloc_shash(&crypto->cram_hmac_tfm, hmac_name, 2425 ERR_AUTH_ALG); 2426 } 2427 2428 return rv; 2429 } 2430 2431 static void free_crypto(struct crypto *crypto) 2432 { 2433 crypto_free_shash(crypto->cram_hmac_tfm); 2434 crypto_free_shash(crypto->integrity_tfm); 2435 crypto_free_shash(crypto->csums_tfm); 2436 crypto_free_shash(crypto->verify_tfm); 2437 } 2438 2439 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) 2440 { 2441 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 2442 enum drbd_ret_code retcode; 2443 struct drbd_connection *connection; 2444 struct net_conf *old_net_conf, *new_net_conf = NULL; 2445 int err; 2446 int ovr; /* online verify running */ 2447 int rsr; /* re-sync running */ 2448 struct crypto crypto = { }; 2449 2450 if (!adm_ctx->reply_skb) 2451 return 0; 2452 retcode = adm_ctx->reply_dh->ret_code; 2453 if (retcode != NO_ERROR) 2454 goto finish; 2455 2456 connection = adm_ctx->connection; 2457 mutex_lock(&adm_ctx->resource->adm_mutex); 2458 2459 new_net_conf = kzalloc_obj(struct net_conf); 2460 if (!new_net_conf) { 2461 retcode = ERR_NOMEM; 2462 goto out; 2463 } 2464 2465 conn_reconfig_start(connection); 2466 2467 mutex_lock(&connection->data.mutex); 2468 mutex_lock(&connection->resource->conf_update); 2469 old_net_conf = connection->net_conf; 2470 2471 if (!old_net_conf) { 2472 drbd_msg_put_info(adm_ctx->reply_skb, "net conf missing, try connect"); 2473 retcode = ERR_INVALID_REQUEST; 2474 goto fail; 2475 } 2476 2477 *new_net_conf = *old_net_conf; 2478 if (should_set_defaults(info)) 2479 set_net_conf_defaults(new_net_conf); 2480 2481 err = net_conf_from_attrs_for_change(new_net_conf, info); 2482 if (err && err != -ENOMSG) { 2483 retcode = ERR_MANDATORY_TAG; 2484 drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err)); 2485 goto fail; 2486 } 2487 2488 retcode = check_net_options(connection, new_net_conf); 2489 if (retcode != NO_ERROR) 2490 goto fail; 2491 2492 /* re-sync running */ 2493 rsr = conn_resync_running(connection); 2494 if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) { 2495 retcode = ERR_CSUMS_RESYNC_RUNNING; 2496 goto fail; 2497 } 2498 2499 /* online verify running */ 2500 ovr = conn_ov_running(connection); 2501 if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) { 2502 retcode = ERR_VERIFY_RUNNING; 2503 goto fail; 2504 } 2505 2506 retcode = alloc_crypto(&crypto, new_net_conf); 2507 if (retcode != NO_ERROR) 2508 goto fail; 2509 2510 rcu_assign_pointer(connection->net_conf, new_net_conf); 2511 2512 if (!rsr) { 2513 crypto_free_shash(connection->csums_tfm); 2514 connection->csums_tfm = crypto.csums_tfm; 2515 crypto.csums_tfm = NULL; 2516 } 2517 if (!ovr) { 2518 crypto_free_shash(connection->verify_tfm); 2519 connection->verify_tfm = crypto.verify_tfm; 2520 crypto.verify_tfm = NULL; 2521 } 2522 2523 crypto_free_shash(connection->integrity_tfm); 2524 connection->integrity_tfm = crypto.integrity_tfm; 2525 if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100) 2526 /* Do this without trying to take connection->data.mutex again. */ 2527 __drbd_send_protocol(connection, P_PROTOCOL_UPDATE); 2528 2529 crypto_free_shash(connection->cram_hmac_tfm); 2530 connection->cram_hmac_tfm = crypto.cram_hmac_tfm; 2531 2532 mutex_unlock(&connection->resource->conf_update); 2533 mutex_unlock(&connection->data.mutex); 2534 kvfree_rcu_mightsleep(old_net_conf); 2535 2536 if (connection->cstate >= C_WF_REPORT_PARAMS) { 2537 struct drbd_peer_device *peer_device; 2538 int vnr; 2539 2540 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) 2541 drbd_send_sync_param(peer_device); 2542 } 2543 2544 goto done; 2545 2546 fail: 2547 mutex_unlock(&connection->resource->conf_update); 2548 mutex_unlock(&connection->data.mutex); 2549 free_crypto(&crypto); 2550 kfree(new_net_conf); 2551 done: 2552 conn_reconfig_done(connection); 2553 out: 2554 mutex_unlock(&adm_ctx->resource->adm_mutex); 2555 finish: 2556 adm_ctx->reply_dh->ret_code = retcode; 2557 return 0; 2558 } 2559 2560 static void connection_to_info(struct connection_info *info, 2561 struct drbd_connection *connection) 2562 { 2563 info->conn_connection_state = connection->cstate; 2564 info->conn_role = conn_highest_peer(connection); 2565 } 2566 2567 static void peer_device_to_info(struct peer_device_info *info, 2568 struct drbd_peer_device *peer_device) 2569 { 2570 struct drbd_device *device = peer_device->device; 2571 2572 info->peer_repl_state = 2573 max_t(enum drbd_conns, C_WF_REPORT_PARAMS, device->state.conn); 2574 info->peer_disk_state = device->state.pdsk; 2575 info->peer_resync_susp_user = device->state.user_isp; 2576 info->peer_resync_susp_peer = device->state.peer_isp; 2577 info->peer_resync_susp_dependency = device->state.aftr_isp; 2578 } 2579 2580 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) 2581 { 2582 struct connection_info connection_info; 2583 enum drbd_notification_type flags; 2584 unsigned int peer_devices = 0; 2585 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 2586 struct drbd_peer_device *peer_device; 2587 struct net_conf *old_net_conf, *new_net_conf = NULL; 2588 struct crypto crypto = { }; 2589 struct drbd_resource *resource; 2590 struct drbd_connection *connection; 2591 enum drbd_ret_code retcode; 2592 enum drbd_state_rv rv; 2593 int i; 2594 int err; 2595 2596 if (!adm_ctx->reply_skb) 2597 return 0; 2598 retcode = adm_ctx->reply_dh->ret_code; 2599 if (retcode != NO_ERROR) 2600 goto out; 2601 if (!(adm_ctx->my_addr && adm_ctx->peer_addr)) { 2602 drbd_msg_put_info(adm_ctx->reply_skb, "connection endpoint(s) missing"); 2603 retcode = ERR_INVALID_REQUEST; 2604 goto out; 2605 } 2606 2607 /* No need for _rcu here. All reconfiguration is 2608 * strictly serialized on genl_lock(). We are protected against 2609 * concurrent reconfiguration/addition/deletion */ 2610 for_each_resource(resource, &drbd_resources) { 2611 for_each_connection(connection, resource) { 2612 if (nla_len(adm_ctx->my_addr) == connection->my_addr_len && 2613 !memcmp(nla_data(adm_ctx->my_addr), &connection->my_addr, 2614 connection->my_addr_len)) { 2615 retcode = ERR_LOCAL_ADDR; 2616 goto out; 2617 } 2618 2619 if (nla_len(adm_ctx->peer_addr) == connection->peer_addr_len && 2620 !memcmp(nla_data(adm_ctx->peer_addr), &connection->peer_addr, 2621 connection->peer_addr_len)) { 2622 retcode = ERR_PEER_ADDR; 2623 goto out; 2624 } 2625 } 2626 } 2627 2628 mutex_lock(&adm_ctx->resource->adm_mutex); 2629 connection = first_connection(adm_ctx->resource); 2630 conn_reconfig_start(connection); 2631 2632 if (connection->cstate > C_STANDALONE) { 2633 retcode = ERR_NET_CONFIGURED; 2634 goto fail; 2635 } 2636 2637 /* allocation not in the IO path, drbdsetup / netlink process context */ 2638 new_net_conf = kzalloc_obj(*new_net_conf); 2639 if (!new_net_conf) { 2640 retcode = ERR_NOMEM; 2641 goto fail; 2642 } 2643 2644 set_net_conf_defaults(new_net_conf); 2645 2646 err = net_conf_from_attrs(new_net_conf, info); 2647 if (err && err != -ENOMSG) { 2648 retcode = ERR_MANDATORY_TAG; 2649 drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err)); 2650 goto fail; 2651 } 2652 2653 retcode = check_net_options(connection, new_net_conf); 2654 if (retcode != NO_ERROR) 2655 goto fail; 2656 2657 retcode = alloc_crypto(&crypto, new_net_conf); 2658 if (retcode != NO_ERROR) 2659 goto fail; 2660 2661 ((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0; 2662 2663 drbd_flush_workqueue(&connection->sender_work); 2664 2665 mutex_lock(&adm_ctx->resource->conf_update); 2666 old_net_conf = connection->net_conf; 2667 if (old_net_conf) { 2668 retcode = ERR_NET_CONFIGURED; 2669 mutex_unlock(&adm_ctx->resource->conf_update); 2670 goto fail; 2671 } 2672 rcu_assign_pointer(connection->net_conf, new_net_conf); 2673 2674 conn_free_crypto(connection); 2675 connection->cram_hmac_tfm = crypto.cram_hmac_tfm; 2676 connection->integrity_tfm = crypto.integrity_tfm; 2677 connection->csums_tfm = crypto.csums_tfm; 2678 connection->verify_tfm = crypto.verify_tfm; 2679 2680 connection->my_addr_len = nla_len(adm_ctx->my_addr); 2681 memcpy(&connection->my_addr, nla_data(adm_ctx->my_addr), connection->my_addr_len); 2682 connection->peer_addr_len = nla_len(adm_ctx->peer_addr); 2683 memcpy(&connection->peer_addr, nla_data(adm_ctx->peer_addr), connection->peer_addr_len); 2684 2685 idr_for_each_entry(&connection->peer_devices, peer_device, i) { 2686 peer_devices++; 2687 } 2688 2689 connection_to_info(&connection_info, connection); 2690 flags = (peer_devices--) ? NOTIFY_CONTINUES : 0; 2691 mutex_lock(¬ification_mutex); 2692 notify_connection_state(NULL, 0, connection, &connection_info, NOTIFY_CREATE | flags); 2693 idr_for_each_entry(&connection->peer_devices, peer_device, i) { 2694 struct peer_device_info peer_device_info; 2695 2696 peer_device_to_info(&peer_device_info, peer_device); 2697 flags = (peer_devices--) ? NOTIFY_CONTINUES : 0; 2698 notify_peer_device_state(NULL, 0, peer_device, &peer_device_info, NOTIFY_CREATE | flags); 2699 } 2700 mutex_unlock(¬ification_mutex); 2701 mutex_unlock(&adm_ctx->resource->conf_update); 2702 2703 rcu_read_lock(); 2704 idr_for_each_entry(&connection->peer_devices, peer_device, i) { 2705 struct drbd_device *device = peer_device->device; 2706 device->send_cnt = 0; 2707 device->recv_cnt = 0; 2708 } 2709 rcu_read_unlock(); 2710 2711 rv = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE); 2712 2713 conn_reconfig_done(connection); 2714 mutex_unlock(&adm_ctx->resource->adm_mutex); 2715 adm_ctx->reply_dh->ret_code = rv; 2716 return 0; 2717 2718 fail: 2719 free_crypto(&crypto); 2720 kfree(new_net_conf); 2721 2722 conn_reconfig_done(connection); 2723 mutex_unlock(&adm_ctx->resource->adm_mutex); 2724 out: 2725 adm_ctx->reply_dh->ret_code = retcode; 2726 return 0; 2727 } 2728 2729 static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force) 2730 { 2731 enum drbd_conns cstate; 2732 enum drbd_state_rv rv; 2733 2734 repeat: 2735 rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), 2736 force ? CS_HARD : 0); 2737 2738 switch (rv) { 2739 case SS_NOTHING_TO_DO: 2740 break; 2741 case SS_ALREADY_STANDALONE: 2742 return SS_SUCCESS; 2743 case SS_PRIMARY_NOP: 2744 /* Our state checking code wants to see the peer outdated. */ 2745 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0); 2746 2747 if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */ 2748 rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE); 2749 2750 break; 2751 case SS_CW_FAILED_BY_PEER: 2752 spin_lock_irq(&connection->resource->req_lock); 2753 cstate = connection->cstate; 2754 spin_unlock_irq(&connection->resource->req_lock); 2755 if (cstate <= C_WF_CONNECTION) 2756 goto repeat; 2757 /* The peer probably wants to see us outdated. */ 2758 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, 2759 disk, D_OUTDATED), 0); 2760 if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) { 2761 rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), 2762 CS_HARD); 2763 } 2764 break; 2765 default:; 2766 /* no special handling necessary */ 2767 } 2768 2769 if (rv >= SS_SUCCESS) { 2770 enum drbd_state_rv rv2; 2771 /* No one else can reconfigure the network while I am here. 2772 * The state handling only uses drbd_thread_stop_nowait(), 2773 * we want to really wait here until the receiver is no more. 2774 */ 2775 drbd_thread_stop(&connection->receiver); 2776 2777 /* Race breaker. This additional state change request may be 2778 * necessary, if this was a forced disconnect during a receiver 2779 * restart. We may have "killed" the receiver thread just 2780 * after drbd_receiver() returned. Typically, we should be 2781 * C_STANDALONE already, now, and this becomes a no-op. 2782 */ 2783 rv2 = conn_request_state(connection, NS(conn, C_STANDALONE), 2784 CS_VERBOSE | CS_HARD); 2785 if (rv2 < SS_SUCCESS) 2786 drbd_err(connection, 2787 "unexpected rv2=%d in conn_try_disconnect()\n", 2788 rv2); 2789 /* Unlike in DRBD 9, the state engine has generated 2790 * NOTIFY_DESTROY events before clearing connection->net_conf. */ 2791 } 2792 return rv; 2793 } 2794 2795 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) 2796 { 2797 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 2798 struct disconnect_parms parms; 2799 struct drbd_connection *connection; 2800 enum drbd_state_rv rv; 2801 enum drbd_ret_code retcode; 2802 int err; 2803 2804 if (!adm_ctx->reply_skb) 2805 return 0; 2806 retcode = adm_ctx->reply_dh->ret_code; 2807 if (retcode != NO_ERROR) 2808 goto fail; 2809 2810 connection = adm_ctx->connection; 2811 memset(&parms, 0, sizeof(parms)); 2812 if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) { 2813 err = disconnect_parms_from_attrs(&parms, info); 2814 if (err) { 2815 retcode = ERR_MANDATORY_TAG; 2816 drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err)); 2817 goto fail; 2818 } 2819 } 2820 2821 mutex_lock(&adm_ctx->resource->adm_mutex); 2822 rv = conn_try_disconnect(connection, parms.force_disconnect); 2823 mutex_unlock(&adm_ctx->resource->adm_mutex); 2824 if (rv < SS_SUCCESS) { 2825 adm_ctx->reply_dh->ret_code = rv; 2826 return 0; 2827 } 2828 retcode = NO_ERROR; 2829 fail: 2830 adm_ctx->reply_dh->ret_code = retcode; 2831 return 0; 2832 } 2833 2834 void resync_after_online_grow(struct drbd_device *device) 2835 { 2836 int iass; /* I am sync source */ 2837 2838 drbd_info(device, "Resync of new storage after online grow\n"); 2839 if (device->state.role != device->state.peer) 2840 iass = (device->state.role == R_PRIMARY); 2841 else 2842 iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags); 2843 2844 if (iass) 2845 drbd_start_resync(device, C_SYNC_SOURCE); 2846 else 2847 _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE); 2848 } 2849 2850 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) 2851 { 2852 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 2853 struct disk_conf *old_disk_conf, *new_disk_conf = NULL; 2854 struct resize_parms rs; 2855 struct drbd_device *device; 2856 enum drbd_ret_code retcode; 2857 enum determine_dev_size dd; 2858 bool change_al_layout = false; 2859 enum dds_flags ddsf; 2860 sector_t u_size; 2861 int err; 2862 2863 if (!adm_ctx->reply_skb) 2864 return 0; 2865 retcode = adm_ctx->reply_dh->ret_code; 2866 if (retcode != NO_ERROR) 2867 goto finish; 2868 2869 mutex_lock(&adm_ctx->resource->adm_mutex); 2870 device = adm_ctx->device; 2871 if (!get_ldev(device)) { 2872 retcode = ERR_NO_DISK; 2873 goto fail; 2874 } 2875 2876 memset(&rs, 0, sizeof(struct resize_parms)); 2877 rs.al_stripes = device->ldev->md.al_stripes; 2878 rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4; 2879 if (info->attrs[DRBD_NLA_RESIZE_PARMS]) { 2880 err = resize_parms_from_attrs(&rs, info); 2881 if (err) { 2882 retcode = ERR_MANDATORY_TAG; 2883 drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err)); 2884 goto fail_ldev; 2885 } 2886 } 2887 2888 if (device->state.conn > C_CONNECTED) { 2889 retcode = ERR_RESIZE_RESYNC; 2890 goto fail_ldev; 2891 } 2892 2893 if (device->state.role == R_SECONDARY && 2894 device->state.peer == R_SECONDARY) { 2895 retcode = ERR_NO_PRIMARY; 2896 goto fail_ldev; 2897 } 2898 2899 if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) { 2900 retcode = ERR_NEED_APV_93; 2901 goto fail_ldev; 2902 } 2903 2904 rcu_read_lock(); 2905 u_size = rcu_dereference(device->ldev->disk_conf)->disk_size; 2906 rcu_read_unlock(); 2907 if (u_size != (sector_t)rs.resize_size) { 2908 new_disk_conf = kmalloc_obj(struct disk_conf); 2909 if (!new_disk_conf) { 2910 retcode = ERR_NOMEM; 2911 goto fail_ldev; 2912 } 2913 } 2914 2915 if (device->ldev->md.al_stripes != rs.al_stripes || 2916 device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) { 2917 u32 al_size_k = rs.al_stripes * rs.al_stripe_size; 2918 2919 if (al_size_k > (16 * 1024 * 1024)) { 2920 retcode = ERR_MD_LAYOUT_TOO_BIG; 2921 goto fail_ldev; 2922 } 2923 2924 if (al_size_k < MD_32kB_SECT/2) { 2925 retcode = ERR_MD_LAYOUT_TOO_SMALL; 2926 goto fail_ldev; 2927 } 2928 2929 if (device->state.conn != C_CONNECTED && !rs.resize_force) { 2930 retcode = ERR_MD_LAYOUT_CONNECTED; 2931 goto fail_ldev; 2932 } 2933 2934 change_al_layout = true; 2935 } 2936 2937 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) 2938 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev); 2939 2940 if (new_disk_conf) { 2941 mutex_lock(&device->resource->conf_update); 2942 old_disk_conf = device->ldev->disk_conf; 2943 *new_disk_conf = *old_disk_conf; 2944 new_disk_conf->disk_size = (sector_t)rs.resize_size; 2945 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); 2946 mutex_unlock(&device->resource->conf_update); 2947 kvfree_rcu_mightsleep(old_disk_conf); 2948 new_disk_conf = NULL; 2949 } 2950 2951 ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0); 2952 dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL); 2953 drbd_md_sync(device); 2954 put_ldev(device); 2955 if (dd == DS_ERROR) { 2956 retcode = ERR_NOMEM_BITMAP; 2957 goto fail; 2958 } else if (dd == DS_ERROR_SPACE_MD) { 2959 retcode = ERR_MD_LAYOUT_NO_FIT; 2960 goto fail; 2961 } else if (dd == DS_ERROR_SHRINK) { 2962 retcode = ERR_IMPLICIT_SHRINK; 2963 goto fail; 2964 } 2965 2966 if (device->state.conn == C_CONNECTED) { 2967 if (dd == DS_GREW) 2968 set_bit(RESIZE_PENDING, &device->flags); 2969 2970 drbd_send_uuids(first_peer_device(device)); 2971 drbd_send_sizes(first_peer_device(device), 1, ddsf); 2972 } 2973 2974 fail: 2975 mutex_unlock(&adm_ctx->resource->adm_mutex); 2976 finish: 2977 adm_ctx->reply_dh->ret_code = retcode; 2978 return 0; 2979 2980 fail_ldev: 2981 put_ldev(device); 2982 kfree(new_disk_conf); 2983 goto fail; 2984 } 2985 2986 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) 2987 { 2988 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 2989 enum drbd_ret_code retcode; 2990 struct res_opts res_opts; 2991 int err; 2992 2993 if (!adm_ctx->reply_skb) 2994 return 0; 2995 retcode = adm_ctx->reply_dh->ret_code; 2996 if (retcode != NO_ERROR) 2997 goto fail; 2998 2999 res_opts = adm_ctx->resource->res_opts; 3000 if (should_set_defaults(info)) 3001 set_res_opts_defaults(&res_opts); 3002 3003 err = res_opts_from_attrs(&res_opts, info); 3004 if (err && err != -ENOMSG) { 3005 retcode = ERR_MANDATORY_TAG; 3006 drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err)); 3007 goto fail; 3008 } 3009 3010 mutex_lock(&adm_ctx->resource->adm_mutex); 3011 err = set_resource_options(adm_ctx->resource, &res_opts); 3012 if (err) { 3013 retcode = ERR_INVALID_REQUEST; 3014 if (err == -ENOMEM) 3015 retcode = ERR_NOMEM; 3016 } 3017 mutex_unlock(&adm_ctx->resource->adm_mutex); 3018 3019 fail: 3020 adm_ctx->reply_dh->ret_code = retcode; 3021 return 0; 3022 } 3023 3024 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) 3025 { 3026 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 3027 struct drbd_device *device; 3028 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ 3029 3030 if (!adm_ctx->reply_skb) 3031 return 0; 3032 retcode = adm_ctx->reply_dh->ret_code; 3033 if (retcode != NO_ERROR) 3034 goto out; 3035 3036 device = adm_ctx->device; 3037 if (!get_ldev(device)) { 3038 retcode = ERR_NO_DISK; 3039 goto out; 3040 } 3041 3042 mutex_lock(&adm_ctx->resource->adm_mutex); 3043 3044 /* If there is still bitmap IO pending, probably because of a previous 3045 * resync just being finished, wait for it before requesting a new resync. 3046 * Also wait for it's after_state_ch(). */ 3047 drbd_suspend_io(device); 3048 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags)); 3049 drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work); 3050 3051 /* If we happen to be C_STANDALONE R_SECONDARY, just change to 3052 * D_INCONSISTENT, and set all bits in the bitmap. Otherwise, 3053 * try to start a resync handshake as sync target for full sync. 3054 */ 3055 if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) { 3056 retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT)); 3057 if (retcode >= SS_SUCCESS) { 3058 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, 3059 "set_n_write from invalidate", BM_LOCKED_MASK, NULL)) 3060 retcode = ERR_IO_MD_DISK; 3061 } 3062 } else 3063 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T)); 3064 drbd_resume_io(device); 3065 mutex_unlock(&adm_ctx->resource->adm_mutex); 3066 put_ldev(device); 3067 out: 3068 adm_ctx->reply_dh->ret_code = retcode; 3069 return 0; 3070 } 3071 3072 static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info, 3073 union drbd_state mask, union drbd_state val) 3074 { 3075 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 3076 enum drbd_ret_code retcode; 3077 3078 if (!adm_ctx->reply_skb) 3079 return 0; 3080 retcode = adm_ctx->reply_dh->ret_code; 3081 if (retcode != NO_ERROR) 3082 goto out; 3083 3084 mutex_lock(&adm_ctx->resource->adm_mutex); 3085 retcode = drbd_request_state(adm_ctx->device, mask, val); 3086 mutex_unlock(&adm_ctx->resource->adm_mutex); 3087 out: 3088 adm_ctx->reply_dh->ret_code = retcode; 3089 return 0; 3090 } 3091 3092 static int drbd_bmio_set_susp_al(struct drbd_device *device, 3093 struct drbd_peer_device *peer_device) __must_hold(local) 3094 { 3095 int rv; 3096 3097 rv = drbd_bmio_set_n_write(device, peer_device); 3098 drbd_suspend_al(device); 3099 return rv; 3100 } 3101 3102 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) 3103 { 3104 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 3105 int retcode; /* drbd_ret_code, drbd_state_rv */ 3106 struct drbd_device *device; 3107 3108 if (!adm_ctx->reply_skb) 3109 return 0; 3110 retcode = adm_ctx->reply_dh->ret_code; 3111 if (retcode != NO_ERROR) 3112 goto out; 3113 3114 device = adm_ctx->device; 3115 if (!get_ldev(device)) { 3116 retcode = ERR_NO_DISK; 3117 goto out; 3118 } 3119 3120 mutex_lock(&adm_ctx->resource->adm_mutex); 3121 3122 /* If there is still bitmap IO pending, probably because of a previous 3123 * resync just being finished, wait for it before requesting a new resync. 3124 * Also wait for it's after_state_ch(). */ 3125 drbd_suspend_io(device); 3126 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags)); 3127 drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work); 3128 3129 /* If we happen to be C_STANDALONE R_PRIMARY, just set all bits 3130 * in the bitmap. Otherwise, try to start a resync handshake 3131 * as sync source for full sync. 3132 */ 3133 if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) { 3134 /* The peer will get a resync upon connect anyways. Just make that 3135 into a full resync. */ 3136 retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT)); 3137 if (retcode >= SS_SUCCESS) { 3138 if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al, 3139 "set_n_write from invalidate_peer", 3140 BM_LOCKED_SET_ALLOWED, NULL)) 3141 retcode = ERR_IO_MD_DISK; 3142 } 3143 } else 3144 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S)); 3145 drbd_resume_io(device); 3146 mutex_unlock(&adm_ctx->resource->adm_mutex); 3147 put_ldev(device); 3148 out: 3149 adm_ctx->reply_dh->ret_code = retcode; 3150 return 0; 3151 } 3152 3153 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info) 3154 { 3155 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 3156 enum drbd_ret_code retcode; 3157 3158 if (!adm_ctx->reply_skb) 3159 return 0; 3160 retcode = adm_ctx->reply_dh->ret_code; 3161 if (retcode != NO_ERROR) 3162 goto out; 3163 3164 mutex_lock(&adm_ctx->resource->adm_mutex); 3165 if (drbd_request_state(adm_ctx->device, NS(user_isp, 1)) == SS_NOTHING_TO_DO) 3166 retcode = ERR_PAUSE_IS_SET; 3167 mutex_unlock(&adm_ctx->resource->adm_mutex); 3168 out: 3169 adm_ctx->reply_dh->ret_code = retcode; 3170 return 0; 3171 } 3172 3173 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info) 3174 { 3175 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 3176 union drbd_dev_state s; 3177 enum drbd_ret_code retcode; 3178 3179 if (!adm_ctx->reply_skb) 3180 return 0; 3181 retcode = adm_ctx->reply_dh->ret_code; 3182 if (retcode != NO_ERROR) 3183 goto out; 3184 3185 mutex_lock(&adm_ctx->resource->adm_mutex); 3186 if (drbd_request_state(adm_ctx->device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) { 3187 s = adm_ctx->device->state; 3188 if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) { 3189 retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP : 3190 s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR; 3191 } else { 3192 retcode = ERR_PAUSE_IS_CLEAR; 3193 } 3194 } 3195 mutex_unlock(&adm_ctx->resource->adm_mutex); 3196 out: 3197 adm_ctx->reply_dh->ret_code = retcode; 3198 return 0; 3199 } 3200 3201 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info) 3202 { 3203 return drbd_adm_simple_request_state(skb, info, NS(susp, 1)); 3204 } 3205 3206 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info) 3207 { 3208 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 3209 struct drbd_device *device; 3210 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ 3211 3212 if (!adm_ctx->reply_skb) 3213 return 0; 3214 retcode = adm_ctx->reply_dh->ret_code; 3215 if (retcode != NO_ERROR) 3216 goto out; 3217 3218 mutex_lock(&adm_ctx->resource->adm_mutex); 3219 device = adm_ctx->device; 3220 if (test_bit(NEW_CUR_UUID, &device->flags)) { 3221 if (get_ldev_if_state(device, D_ATTACHING)) { 3222 drbd_uuid_new_current(device); 3223 put_ldev(device); 3224 } else { 3225 /* This is effectively a multi-stage "forced down". 3226 * The NEW_CUR_UUID bit is supposedly only set, if we 3227 * lost the replication connection, and are configured 3228 * to freeze IO and wait for some fence-peer handler. 3229 * So we still don't have a replication connection. 3230 * And now we don't have a local disk either. After 3231 * resume, we will fail all pending and new IO, because 3232 * we don't have any data anymore. Which means we will 3233 * eventually be able to terminate all users of this 3234 * device, and then take it down. By bumping the 3235 * "effective" data uuid, we make sure that you really 3236 * need to tear down before you reconfigure, we will 3237 * the refuse to re-connect or re-attach (because no 3238 * matching real data uuid exists). 3239 */ 3240 u64 val; 3241 get_random_bytes(&val, sizeof(u64)); 3242 drbd_set_ed_uuid(device, val); 3243 drbd_warn(device, "Resumed without access to data; please tear down before attempting to re-configure.\n"); 3244 } 3245 clear_bit(NEW_CUR_UUID, &device->flags); 3246 } 3247 drbd_suspend_io(device); 3248 retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0)); 3249 if (retcode == SS_SUCCESS) { 3250 if (device->state.conn < C_CONNECTED) 3251 tl_clear(first_peer_device(device)->connection); 3252 if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED) 3253 tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO); 3254 } 3255 drbd_resume_io(device); 3256 mutex_unlock(&adm_ctx->resource->adm_mutex); 3257 out: 3258 adm_ctx->reply_dh->ret_code = retcode; 3259 return 0; 3260 } 3261 3262 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info) 3263 { 3264 return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED)); 3265 } 3266 3267 static int nla_put_drbd_cfg_context(struct sk_buff *skb, 3268 struct drbd_resource *resource, 3269 struct drbd_connection *connection, 3270 struct drbd_device *device) 3271 { 3272 struct nlattr *nla; 3273 nla = nla_nest_start_noflag(skb, DRBD_NLA_CFG_CONTEXT); 3274 if (!nla) 3275 goto nla_put_failure; 3276 if (device && 3277 nla_put_u32(skb, T_ctx_volume, device->vnr)) 3278 goto nla_put_failure; 3279 if (nla_put_string(skb, T_ctx_resource_name, resource->name)) 3280 goto nla_put_failure; 3281 if (connection) { 3282 if (connection->my_addr_len && 3283 nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr)) 3284 goto nla_put_failure; 3285 if (connection->peer_addr_len && 3286 nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr)) 3287 goto nla_put_failure; 3288 } 3289 nla_nest_end(skb, nla); 3290 return 0; 3291 3292 nla_put_failure: 3293 if (nla) 3294 nla_nest_cancel(skb, nla); 3295 return -EMSGSIZE; 3296 } 3297 3298 /* 3299 * The generic netlink dump callbacks are called outside the genl_lock(), so 3300 * they cannot use the simple attribute parsing code which uses global 3301 * attribute tables. 3302 */ 3303 static struct nlattr *find_cfg_context_attr(const struct nlmsghdr *nlh, int attr) 3304 { 3305 const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ; 3306 const int maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1; 3307 struct nlattr *nla; 3308 3309 nla = nla_find(nlmsg_attrdata(nlh, hdrlen), nlmsg_attrlen(nlh, hdrlen), 3310 DRBD_NLA_CFG_CONTEXT); 3311 if (!nla) 3312 return NULL; 3313 return drbd_nla_find_nested(maxtype, nla, __nla_type(attr)); 3314 } 3315 3316 static void resource_to_info(struct resource_info *, struct drbd_resource *); 3317 3318 int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb) 3319 { 3320 struct drbd_genlmsghdr *dh; 3321 struct drbd_resource *resource; 3322 struct resource_info resource_info; 3323 struct resource_statistics resource_statistics; 3324 int err; 3325 3326 rcu_read_lock(); 3327 if (cb->args[0]) { 3328 for_each_resource_rcu(resource, &drbd_resources) 3329 if (resource == (struct drbd_resource *)cb->args[0]) 3330 goto found_resource; 3331 err = 0; /* resource was probably deleted */ 3332 goto out; 3333 } 3334 resource = list_entry(&drbd_resources, 3335 struct drbd_resource, resources); 3336 3337 found_resource: 3338 list_for_each_entry_continue_rcu(resource, &drbd_resources, resources) { 3339 goto put_result; 3340 } 3341 err = 0; 3342 goto out; 3343 3344 put_result: 3345 dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, 3346 cb->nlh->nlmsg_seq, &drbd_genl_family, 3347 NLM_F_MULTI, DRBD_ADM_GET_RESOURCES); 3348 err = -ENOMEM; 3349 if (!dh) 3350 goto out; 3351 dh->minor = -1U; 3352 dh->ret_code = NO_ERROR; 3353 err = nla_put_drbd_cfg_context(skb, resource, NULL, NULL); 3354 if (err) 3355 goto out; 3356 err = res_opts_to_skb(skb, &resource->res_opts, !capable(CAP_SYS_ADMIN)); 3357 if (err) 3358 goto out; 3359 resource_to_info(&resource_info, resource); 3360 err = resource_info_to_skb(skb, &resource_info, !capable(CAP_SYS_ADMIN)); 3361 if (err) 3362 goto out; 3363 resource_statistics.res_stat_write_ordering = resource->write_ordering; 3364 err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN)); 3365 if (err) 3366 goto out; 3367 cb->args[0] = (long)resource; 3368 genlmsg_end(skb, dh); 3369 err = 0; 3370 3371 out: 3372 rcu_read_unlock(); 3373 if (err) 3374 return err; 3375 return skb->len; 3376 } 3377 3378 static void device_to_statistics(struct device_statistics *s, 3379 struct drbd_device *device) 3380 { 3381 memset(s, 0, sizeof(*s)); 3382 s->dev_upper_blocked = !may_inc_ap_bio(device); 3383 if (get_ldev(device)) { 3384 struct drbd_md *md = &device->ldev->md; 3385 u64 *history_uuids = (u64 *)s->history_uuids; 3386 int n; 3387 3388 spin_lock_irq(&md->uuid_lock); 3389 s->dev_current_uuid = md->uuid[UI_CURRENT]; 3390 BUILD_BUG_ON(sizeof(s->history_uuids) < UI_HISTORY_END - UI_HISTORY_START + 1); 3391 for (n = 0; n < UI_HISTORY_END - UI_HISTORY_START + 1; n++) 3392 history_uuids[n] = md->uuid[UI_HISTORY_START + n]; 3393 for (; n < HISTORY_UUIDS; n++) 3394 history_uuids[n] = 0; 3395 s->history_uuids_len = HISTORY_UUIDS; 3396 spin_unlock_irq(&md->uuid_lock); 3397 3398 s->dev_disk_flags = md->flags; 3399 put_ldev(device); 3400 } 3401 s->dev_size = get_capacity(device->vdisk); 3402 s->dev_read = device->read_cnt; 3403 s->dev_write = device->writ_cnt; 3404 s->dev_al_writes = device->al_writ_cnt; 3405 s->dev_bm_writes = device->bm_writ_cnt; 3406 s->dev_upper_pending = atomic_read(&device->ap_bio_cnt); 3407 s->dev_lower_pending = atomic_read(&device->local_cnt); 3408 s->dev_al_suspended = test_bit(AL_SUSPENDED, &device->flags); 3409 s->dev_exposed_data_uuid = device->ed_uuid; 3410 } 3411 3412 static int put_resource_in_arg0(struct netlink_callback *cb, int holder_nr) 3413 { 3414 if (cb->args[0]) { 3415 struct drbd_resource *resource = 3416 (struct drbd_resource *)cb->args[0]; 3417 kref_put(&resource->kref, drbd_destroy_resource); 3418 } 3419 3420 return 0; 3421 } 3422 3423 int drbd_adm_dump_devices_done(struct netlink_callback *cb) { 3424 return put_resource_in_arg0(cb, 7); 3425 } 3426 3427 static void device_to_info(struct device_info *, struct drbd_device *); 3428 3429 int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb) 3430 { 3431 struct nlattr *resource_filter; 3432 struct drbd_resource *resource; 3433 struct drbd_device *device; 3434 int minor, err, retcode; 3435 struct drbd_genlmsghdr *dh; 3436 struct device_info device_info; 3437 struct device_statistics device_statistics; 3438 struct idr *idr_to_search; 3439 3440 resource = (struct drbd_resource *)cb->args[0]; 3441 if (!cb->args[0] && !cb->args[1]) { 3442 resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name); 3443 if (resource_filter) { 3444 retcode = ERR_RES_NOT_KNOWN; 3445 resource = drbd_find_resource(nla_data(resource_filter)); 3446 if (!resource) { 3447 rcu_read_lock(); 3448 goto put_result; 3449 } 3450 cb->args[0] = (long)resource; 3451 } 3452 } 3453 3454 rcu_read_lock(); 3455 minor = cb->args[1]; 3456 idr_to_search = resource ? &resource->devices : &drbd_devices; 3457 device = idr_get_next(idr_to_search, &minor); 3458 if (!device) { 3459 err = 0; 3460 goto out; 3461 } 3462 idr_for_each_entry_continue(idr_to_search, device, minor) { 3463 retcode = NO_ERROR; 3464 goto put_result; /* only one iteration */ 3465 } 3466 err = 0; 3467 goto out; /* no more devices */ 3468 3469 put_result: 3470 dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, 3471 cb->nlh->nlmsg_seq, &drbd_genl_family, 3472 NLM_F_MULTI, DRBD_ADM_GET_DEVICES); 3473 err = -ENOMEM; 3474 if (!dh) 3475 goto out; 3476 dh->ret_code = retcode; 3477 dh->minor = -1U; 3478 if (retcode == NO_ERROR) { 3479 dh->minor = device->minor; 3480 err = nla_put_drbd_cfg_context(skb, device->resource, NULL, device); 3481 if (err) 3482 goto out; 3483 if (get_ldev(device)) { 3484 struct disk_conf *disk_conf = 3485 rcu_dereference(device->ldev->disk_conf); 3486 3487 err = disk_conf_to_skb(skb, disk_conf, !capable(CAP_SYS_ADMIN)); 3488 put_ldev(device); 3489 if (err) 3490 goto out; 3491 } 3492 device_to_info(&device_info, device); 3493 err = device_info_to_skb(skb, &device_info, !capable(CAP_SYS_ADMIN)); 3494 if (err) 3495 goto out; 3496 3497 device_to_statistics(&device_statistics, device); 3498 err = device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN)); 3499 if (err) 3500 goto out; 3501 cb->args[1] = minor + 1; 3502 } 3503 genlmsg_end(skb, dh); 3504 err = 0; 3505 3506 out: 3507 rcu_read_unlock(); 3508 if (err) 3509 return err; 3510 return skb->len; 3511 } 3512 3513 int drbd_adm_dump_connections_done(struct netlink_callback *cb) 3514 { 3515 return put_resource_in_arg0(cb, 6); 3516 } 3517 3518 enum { SINGLE_RESOURCE, ITERATE_RESOURCES }; 3519 3520 int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb) 3521 { 3522 struct nlattr *resource_filter; 3523 struct drbd_resource *resource = NULL, *next_resource; 3524 struct drbd_connection *connection; 3525 int err = 0, retcode; 3526 struct drbd_genlmsghdr *dh; 3527 struct connection_info connection_info; 3528 struct connection_statistics connection_statistics; 3529 3530 rcu_read_lock(); 3531 resource = (struct drbd_resource *)cb->args[0]; 3532 if (!cb->args[0]) { 3533 resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name); 3534 if (resource_filter) { 3535 retcode = ERR_RES_NOT_KNOWN; 3536 resource = drbd_find_resource(nla_data(resource_filter)); 3537 if (!resource) 3538 goto put_result; 3539 cb->args[0] = (long)resource; 3540 cb->args[1] = SINGLE_RESOURCE; 3541 } 3542 } 3543 if (!resource) { 3544 if (list_empty(&drbd_resources)) 3545 goto out; 3546 resource = list_first_entry(&drbd_resources, struct drbd_resource, resources); 3547 kref_get(&resource->kref); 3548 cb->args[0] = (long)resource; 3549 cb->args[1] = ITERATE_RESOURCES; 3550 } 3551 3552 next_resource: 3553 rcu_read_unlock(); 3554 mutex_lock(&resource->conf_update); 3555 rcu_read_lock(); 3556 if (cb->args[2]) { 3557 for_each_connection_rcu(connection, resource) 3558 if (connection == (struct drbd_connection *)cb->args[2]) 3559 goto found_connection; 3560 /* connection was probably deleted */ 3561 goto no_more_connections; 3562 } 3563 connection = list_entry(&resource->connections, struct drbd_connection, connections); 3564 3565 found_connection: 3566 list_for_each_entry_continue_rcu(connection, &resource->connections, connections) { 3567 if (!has_net_conf(connection)) 3568 continue; 3569 retcode = NO_ERROR; 3570 goto put_result; /* only one iteration */ 3571 } 3572 3573 no_more_connections: 3574 if (cb->args[1] == ITERATE_RESOURCES) { 3575 for_each_resource_rcu(next_resource, &drbd_resources) { 3576 if (next_resource == resource) 3577 goto found_resource; 3578 } 3579 /* resource was probably deleted */ 3580 } 3581 goto out; 3582 3583 found_resource: 3584 list_for_each_entry_continue_rcu(next_resource, &drbd_resources, resources) { 3585 mutex_unlock(&resource->conf_update); 3586 kref_put(&resource->kref, drbd_destroy_resource); 3587 resource = next_resource; 3588 kref_get(&resource->kref); 3589 cb->args[0] = (long)resource; 3590 cb->args[2] = 0; 3591 goto next_resource; 3592 } 3593 goto out; /* no more resources */ 3594 3595 put_result: 3596 dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, 3597 cb->nlh->nlmsg_seq, &drbd_genl_family, 3598 NLM_F_MULTI, DRBD_ADM_GET_CONNECTIONS); 3599 err = -ENOMEM; 3600 if (!dh) 3601 goto out; 3602 dh->ret_code = retcode; 3603 dh->minor = -1U; 3604 if (retcode == NO_ERROR) { 3605 struct net_conf *net_conf; 3606 3607 err = nla_put_drbd_cfg_context(skb, resource, connection, NULL); 3608 if (err) 3609 goto out; 3610 net_conf = rcu_dereference(connection->net_conf); 3611 if (net_conf) { 3612 err = net_conf_to_skb(skb, net_conf, !capable(CAP_SYS_ADMIN)); 3613 if (err) 3614 goto out; 3615 } 3616 connection_to_info(&connection_info, connection); 3617 err = connection_info_to_skb(skb, &connection_info, !capable(CAP_SYS_ADMIN)); 3618 if (err) 3619 goto out; 3620 connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags); 3621 err = connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN)); 3622 if (err) 3623 goto out; 3624 cb->args[2] = (long)connection; 3625 } 3626 genlmsg_end(skb, dh); 3627 err = 0; 3628 3629 out: 3630 rcu_read_unlock(); 3631 if (resource) 3632 mutex_unlock(&resource->conf_update); 3633 if (err) 3634 return err; 3635 return skb->len; 3636 } 3637 3638 enum mdf_peer_flag { 3639 MDF_PEER_CONNECTED = 1 << 0, 3640 MDF_PEER_OUTDATED = 1 << 1, 3641 MDF_PEER_FENCING = 1 << 2, 3642 MDF_PEER_FULL_SYNC = 1 << 3, 3643 }; 3644 3645 static void peer_device_to_statistics(struct peer_device_statistics *s, 3646 struct drbd_peer_device *peer_device) 3647 { 3648 struct drbd_device *device = peer_device->device; 3649 3650 memset(s, 0, sizeof(*s)); 3651 s->peer_dev_received = device->recv_cnt; 3652 s->peer_dev_sent = device->send_cnt; 3653 s->peer_dev_pending = atomic_read(&device->ap_pending_cnt) + 3654 atomic_read(&device->rs_pending_cnt); 3655 s->peer_dev_unacked = atomic_read(&device->unacked_cnt); 3656 s->peer_dev_out_of_sync = drbd_bm_total_weight(device) << (BM_BLOCK_SHIFT - 9); 3657 s->peer_dev_resync_failed = device->rs_failed << (BM_BLOCK_SHIFT - 9); 3658 if (get_ldev(device)) { 3659 struct drbd_md *md = &device->ldev->md; 3660 3661 spin_lock_irq(&md->uuid_lock); 3662 s->peer_dev_bitmap_uuid = md->uuid[UI_BITMAP]; 3663 spin_unlock_irq(&md->uuid_lock); 3664 s->peer_dev_flags = 3665 (drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND) ? 3666 MDF_PEER_CONNECTED : 0) + 3667 (drbd_md_test_flag(device->ldev, MDF_CONSISTENT) && 3668 !drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE) ? 3669 MDF_PEER_OUTDATED : 0) + 3670 /* FIXME: MDF_PEER_FENCING? */ 3671 (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ? 3672 MDF_PEER_FULL_SYNC : 0); 3673 put_ldev(device); 3674 } 3675 } 3676 3677 int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb) 3678 { 3679 return put_resource_in_arg0(cb, 9); 3680 } 3681 3682 int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb) 3683 { 3684 struct nlattr *resource_filter; 3685 struct drbd_resource *resource; 3686 struct drbd_device *device; 3687 struct drbd_peer_device *peer_device = NULL; 3688 int minor, err, retcode; 3689 struct drbd_genlmsghdr *dh; 3690 struct idr *idr_to_search; 3691 3692 resource = (struct drbd_resource *)cb->args[0]; 3693 if (!cb->args[0] && !cb->args[1]) { 3694 resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name); 3695 if (resource_filter) { 3696 retcode = ERR_RES_NOT_KNOWN; 3697 resource = drbd_find_resource(nla_data(resource_filter)); 3698 if (!resource) { 3699 rcu_read_lock(); 3700 goto put_result; 3701 } 3702 } 3703 cb->args[0] = (long)resource; 3704 } 3705 3706 rcu_read_lock(); 3707 minor = cb->args[1]; 3708 idr_to_search = resource ? &resource->devices : &drbd_devices; 3709 device = idr_find(idr_to_search, minor); 3710 if (!device) { 3711 next_device: 3712 minor++; 3713 cb->args[2] = 0; 3714 device = idr_get_next(idr_to_search, &minor); 3715 if (!device) { 3716 err = 0; 3717 goto out; 3718 } 3719 } 3720 if (cb->args[2]) { 3721 for_each_peer_device(peer_device, device) 3722 if (peer_device == (struct drbd_peer_device *)cb->args[2]) 3723 goto found_peer_device; 3724 /* peer device was probably deleted */ 3725 goto next_device; 3726 } 3727 /* Make peer_device point to the list head (not the first entry). */ 3728 peer_device = list_entry(&device->peer_devices, struct drbd_peer_device, peer_devices); 3729 3730 found_peer_device: 3731 list_for_each_entry_continue_rcu(peer_device, &device->peer_devices, peer_devices) { 3732 if (!has_net_conf(peer_device->connection)) 3733 continue; 3734 retcode = NO_ERROR; 3735 goto put_result; /* only one iteration */ 3736 } 3737 goto next_device; 3738 3739 put_result: 3740 dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, 3741 cb->nlh->nlmsg_seq, &drbd_genl_family, 3742 NLM_F_MULTI, DRBD_ADM_GET_PEER_DEVICES); 3743 err = -ENOMEM; 3744 if (!dh) 3745 goto out; 3746 dh->ret_code = retcode; 3747 dh->minor = -1U; 3748 if (retcode == NO_ERROR) { 3749 struct peer_device_info peer_device_info; 3750 struct peer_device_statistics peer_device_statistics; 3751 3752 dh->minor = minor; 3753 err = nla_put_drbd_cfg_context(skb, device->resource, peer_device->connection, device); 3754 if (err) 3755 goto out; 3756 peer_device_to_info(&peer_device_info, peer_device); 3757 err = peer_device_info_to_skb(skb, &peer_device_info, !capable(CAP_SYS_ADMIN)); 3758 if (err) 3759 goto out; 3760 peer_device_to_statistics(&peer_device_statistics, peer_device); 3761 err = peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN)); 3762 if (err) 3763 goto out; 3764 cb->args[1] = minor; 3765 cb->args[2] = (long)peer_device; 3766 } 3767 genlmsg_end(skb, dh); 3768 err = 0; 3769 3770 out: 3771 rcu_read_unlock(); 3772 if (err) 3773 return err; 3774 return skb->len; 3775 } 3776 /* 3777 * Return the connection of @resource if @resource has exactly one connection. 3778 */ 3779 static struct drbd_connection *the_only_connection(struct drbd_resource *resource) 3780 { 3781 struct list_head *connections = &resource->connections; 3782 3783 if (list_empty(connections) || connections->next->next != connections) 3784 return NULL; 3785 return list_first_entry(&resource->connections, struct drbd_connection, connections); 3786 } 3787 3788 static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device, 3789 const struct sib_info *sib) 3790 { 3791 struct drbd_resource *resource = device->resource; 3792 struct state_info *si = NULL; /* for sizeof(si->member); */ 3793 struct nlattr *nla; 3794 int got_ldev; 3795 int err = 0; 3796 int exclude_sensitive; 3797 3798 /* If sib != NULL, this is drbd_bcast_event, which anyone can listen 3799 * to. So we better exclude_sensitive information. 3800 * 3801 * If sib == NULL, this is drbd_adm_get_status, executed synchronously 3802 * in the context of the requesting user process. Exclude sensitive 3803 * information, unless current has superuser. 3804 * 3805 * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and 3806 * relies on the current implementation of netlink_dump(), which 3807 * executes the dump callback successively from netlink_recvmsg(), 3808 * always in the context of the receiving process */ 3809 exclude_sensitive = sib || !capable(CAP_SYS_ADMIN); 3810 3811 got_ldev = get_ldev(device); 3812 3813 /* We need to add connection name and volume number information still. 3814 * Minor number is in drbd_genlmsghdr. */ 3815 if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device)) 3816 goto nla_put_failure; 3817 3818 if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive)) 3819 goto nla_put_failure; 3820 3821 rcu_read_lock(); 3822 if (got_ldev) { 3823 struct disk_conf *disk_conf; 3824 3825 disk_conf = rcu_dereference(device->ldev->disk_conf); 3826 err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive); 3827 } 3828 if (!err) { 3829 struct net_conf *nc; 3830 3831 nc = rcu_dereference(first_peer_device(device)->connection->net_conf); 3832 if (nc) 3833 err = net_conf_to_skb(skb, nc, exclude_sensitive); 3834 } 3835 rcu_read_unlock(); 3836 if (err) 3837 goto nla_put_failure; 3838 3839 nla = nla_nest_start_noflag(skb, DRBD_NLA_STATE_INFO); 3840 if (!nla) 3841 goto nla_put_failure; 3842 if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) || 3843 nla_put_u32(skb, T_current_state, device->state.i) || 3844 nla_put_u64_0pad(skb, T_ed_uuid, device->ed_uuid) || 3845 nla_put_u64_0pad(skb, T_capacity, get_capacity(device->vdisk)) || 3846 nla_put_u64_0pad(skb, T_send_cnt, device->send_cnt) || 3847 nla_put_u64_0pad(skb, T_recv_cnt, device->recv_cnt) || 3848 nla_put_u64_0pad(skb, T_read_cnt, device->read_cnt) || 3849 nla_put_u64_0pad(skb, T_writ_cnt, device->writ_cnt) || 3850 nla_put_u64_0pad(skb, T_al_writ_cnt, device->al_writ_cnt) || 3851 nla_put_u64_0pad(skb, T_bm_writ_cnt, device->bm_writ_cnt) || 3852 nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) || 3853 nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) || 3854 nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt))) 3855 goto nla_put_failure; 3856 3857 if (got_ldev) { 3858 int err; 3859 3860 spin_lock_irq(&device->ldev->md.uuid_lock); 3861 err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid); 3862 spin_unlock_irq(&device->ldev->md.uuid_lock); 3863 3864 if (err) 3865 goto nla_put_failure; 3866 3867 if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) || 3868 nla_put_u64_0pad(skb, T_bits_total, drbd_bm_bits(device)) || 3869 nla_put_u64_0pad(skb, T_bits_oos, 3870 drbd_bm_total_weight(device))) 3871 goto nla_put_failure; 3872 if (C_SYNC_SOURCE <= device->state.conn && 3873 C_PAUSED_SYNC_T >= device->state.conn) { 3874 if (nla_put_u64_0pad(skb, T_bits_rs_total, 3875 device->rs_total) || 3876 nla_put_u64_0pad(skb, T_bits_rs_failed, 3877 device->rs_failed)) 3878 goto nla_put_failure; 3879 } 3880 } 3881 3882 if (sib) { 3883 switch(sib->sib_reason) { 3884 case SIB_SYNC_PROGRESS: 3885 case SIB_GET_STATUS_REPLY: 3886 break; 3887 case SIB_STATE_CHANGE: 3888 if (nla_put_u32(skb, T_prev_state, sib->os.i) || 3889 nla_put_u32(skb, T_new_state, sib->ns.i)) 3890 goto nla_put_failure; 3891 break; 3892 case SIB_HELPER_POST: 3893 if (nla_put_u32(skb, T_helper_exit_code, 3894 sib->helper_exit_code)) 3895 goto nla_put_failure; 3896 fallthrough; 3897 case SIB_HELPER_PRE: 3898 if (nla_put_string(skb, T_helper, sib->helper_name)) 3899 goto nla_put_failure; 3900 break; 3901 } 3902 } 3903 nla_nest_end(skb, nla); 3904 3905 if (0) 3906 nla_put_failure: 3907 err = -EMSGSIZE; 3908 if (got_ldev) 3909 put_ldev(device); 3910 return err; 3911 } 3912 3913 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info) 3914 { 3915 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 3916 enum drbd_ret_code retcode; 3917 int err; 3918 3919 if (!adm_ctx->reply_skb) 3920 return 0; 3921 retcode = adm_ctx->reply_dh->ret_code; 3922 if (retcode != NO_ERROR) 3923 goto out; 3924 3925 err = nla_put_status_info(adm_ctx->reply_skb, adm_ctx->device, NULL); 3926 if (err) { 3927 nlmsg_free(adm_ctx->reply_skb); 3928 adm_ctx->reply_skb = NULL; 3929 return err; 3930 } 3931 out: 3932 adm_ctx->reply_dh->ret_code = retcode; 3933 return 0; 3934 } 3935 3936 static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb) 3937 { 3938 struct drbd_device *device; 3939 struct drbd_genlmsghdr *dh; 3940 struct drbd_resource *pos = (struct drbd_resource *)cb->args[0]; 3941 struct drbd_resource *resource = NULL; 3942 struct drbd_resource *tmp; 3943 unsigned volume = cb->args[1]; 3944 3945 /* Open coded, deferred, iteration: 3946 * for_each_resource_safe(resource, tmp, &drbd_resources) { 3947 * connection = "first connection of resource or undefined"; 3948 * idr_for_each_entry(&resource->devices, device, i) { 3949 * ... 3950 * } 3951 * } 3952 * where resource is cb->args[0]; 3953 * and i is cb->args[1]; 3954 * 3955 * cb->args[2] indicates if we shall loop over all resources, 3956 * or just dump all volumes of a single resource. 3957 * 3958 * This may miss entries inserted after this dump started, 3959 * or entries deleted before they are reached. 3960 * 3961 * We need to make sure the device won't disappear while 3962 * we are looking at it, and revalidate our iterators 3963 * on each iteration. 3964 */ 3965 3966 /* synchronize with conn_create()/drbd_destroy_connection() */ 3967 rcu_read_lock(); 3968 /* revalidate iterator position */ 3969 for_each_resource_rcu(tmp, &drbd_resources) { 3970 if (pos == NULL) { 3971 /* first iteration */ 3972 pos = tmp; 3973 resource = pos; 3974 break; 3975 } 3976 if (tmp == pos) { 3977 resource = pos; 3978 break; 3979 } 3980 } 3981 if (resource) { 3982 next_resource: 3983 device = idr_get_next(&resource->devices, &volume); 3984 if (!device) { 3985 /* No more volumes to dump on this resource. 3986 * Advance resource iterator. */ 3987 pos = list_entry_rcu(resource->resources.next, 3988 struct drbd_resource, resources); 3989 /* Did we dump any volume of this resource yet? */ 3990 if (volume != 0) { 3991 /* If we reached the end of the list, 3992 * or only a single resource dump was requested, 3993 * we are done. */ 3994 if (&pos->resources == &drbd_resources || cb->args[2]) 3995 goto out; 3996 volume = 0; 3997 resource = pos; 3998 goto next_resource; 3999 } 4000 } 4001 4002 dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, 4003 cb->nlh->nlmsg_seq, &drbd_genl_family, 4004 NLM_F_MULTI, DRBD_ADM_GET_STATUS); 4005 if (!dh) 4006 goto out; 4007 4008 if (!device) { 4009 /* This is a connection without a single volume. 4010 * Suprisingly enough, it may have a network 4011 * configuration. */ 4012 struct drbd_connection *connection; 4013 4014 dh->minor = -1U; 4015 dh->ret_code = NO_ERROR; 4016 connection = the_only_connection(resource); 4017 if (nla_put_drbd_cfg_context(skb, resource, connection, NULL)) 4018 goto cancel; 4019 if (connection) { 4020 struct net_conf *nc; 4021 4022 nc = rcu_dereference(connection->net_conf); 4023 if (nc && net_conf_to_skb(skb, nc, 1) != 0) 4024 goto cancel; 4025 } 4026 goto done; 4027 } 4028 4029 D_ASSERT(device, device->vnr == volume); 4030 D_ASSERT(device, device->resource == resource); 4031 4032 dh->minor = device_to_minor(device); 4033 dh->ret_code = NO_ERROR; 4034 4035 if (nla_put_status_info(skb, device, NULL)) { 4036 cancel: 4037 genlmsg_cancel(skb, dh); 4038 goto out; 4039 } 4040 done: 4041 genlmsg_end(skb, dh); 4042 } 4043 4044 out: 4045 rcu_read_unlock(); 4046 /* where to start the next iteration */ 4047 cb->args[0] = (long)pos; 4048 cb->args[1] = (pos == resource) ? volume + 1 : 0; 4049 4050 /* No more resources/volumes/minors found results in an empty skb. 4051 * Which will terminate the dump. */ 4052 return skb->len; 4053 } 4054 4055 /* 4056 * Request status of all resources, or of all volumes within a single resource. 4057 * 4058 * This is a dump, as the answer may not fit in a single reply skb otherwise. 4059 * Which means we cannot use the family->attrbuf or other such members, because 4060 * dump is NOT protected by the genl_lock(). During dump, we only have access 4061 * to the incoming skb, and need to opencode "parsing" of the nlattr payload. 4062 * 4063 * Once things are setup properly, we call into get_one_status(). 4064 */ 4065 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb) 4066 { 4067 const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ; 4068 struct nlattr *nla; 4069 const char *resource_name; 4070 struct drbd_resource *resource; 4071 int maxtype; 4072 4073 /* Is this a followup call? */ 4074 if (cb->args[0]) { 4075 /* ... of a single resource dump, 4076 * and the resource iterator has been advanced already? */ 4077 if (cb->args[2] && cb->args[2] != cb->args[0]) 4078 return 0; /* DONE. */ 4079 goto dump; 4080 } 4081 4082 /* First call (from netlink_dump_start). We need to figure out 4083 * which resource(s) the user wants us to dump. */ 4084 nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen), 4085 nlmsg_attrlen(cb->nlh, hdrlen), 4086 DRBD_NLA_CFG_CONTEXT); 4087 4088 /* No explicit context given. Dump all. */ 4089 if (!nla) 4090 goto dump; 4091 maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1; 4092 nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name)); 4093 if (IS_ERR(nla)) 4094 return PTR_ERR(nla); 4095 /* context given, but no name present? */ 4096 if (!nla) 4097 return -EINVAL; 4098 resource_name = nla_data(nla); 4099 if (!*resource_name) 4100 return -ENODEV; 4101 resource = drbd_find_resource(resource_name); 4102 if (!resource) 4103 return -ENODEV; 4104 4105 kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */ 4106 4107 /* prime iterators, and set "filter" mode mark: 4108 * only dump this connection. */ 4109 cb->args[0] = (long)resource; 4110 /* cb->args[1] = 0; passed in this way. */ 4111 cb->args[2] = (long)resource; 4112 4113 dump: 4114 return get_one_status(skb, cb); 4115 } 4116 4117 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info) 4118 { 4119 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 4120 enum drbd_ret_code retcode; 4121 struct timeout_parms tp; 4122 int err; 4123 4124 if (!adm_ctx->reply_skb) 4125 return 0; 4126 retcode = adm_ctx->reply_dh->ret_code; 4127 if (retcode != NO_ERROR) 4128 goto out; 4129 4130 tp.timeout_type = 4131 adm_ctx->device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED : 4132 test_bit(USE_DEGR_WFC_T, &adm_ctx->device->flags) ? UT_DEGRADED : 4133 UT_DEFAULT; 4134 4135 err = timeout_parms_to_priv_skb(adm_ctx->reply_skb, &tp); 4136 if (err) { 4137 nlmsg_free(adm_ctx->reply_skb); 4138 adm_ctx->reply_skb = NULL; 4139 return err; 4140 } 4141 out: 4142 adm_ctx->reply_dh->ret_code = retcode; 4143 return 0; 4144 } 4145 4146 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) 4147 { 4148 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 4149 struct drbd_device *device; 4150 enum drbd_ret_code retcode; 4151 struct start_ov_parms parms; 4152 4153 if (!adm_ctx->reply_skb) 4154 return 0; 4155 retcode = adm_ctx->reply_dh->ret_code; 4156 if (retcode != NO_ERROR) 4157 goto out; 4158 4159 device = adm_ctx->device; 4160 4161 /* resume from last known position, if possible */ 4162 parms.ov_start_sector = device->ov_start_sector; 4163 parms.ov_stop_sector = ULLONG_MAX; 4164 if (info->attrs[DRBD_NLA_START_OV_PARMS]) { 4165 int err = start_ov_parms_from_attrs(&parms, info); 4166 if (err) { 4167 retcode = ERR_MANDATORY_TAG; 4168 drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err)); 4169 goto out; 4170 } 4171 } 4172 mutex_lock(&adm_ctx->resource->adm_mutex); 4173 4174 /* w_make_ov_request expects position to be aligned */ 4175 device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1); 4176 device->ov_stop_sector = parms.ov_stop_sector; 4177 4178 /* If there is still bitmap IO pending, e.g. previous resync or verify 4179 * just being finished, wait for it before requesting a new resync. */ 4180 drbd_suspend_io(device); 4181 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags)); 4182 retcode = drbd_request_state(device, NS(conn, C_VERIFY_S)); 4183 drbd_resume_io(device); 4184 4185 mutex_unlock(&adm_ctx->resource->adm_mutex); 4186 out: 4187 adm_ctx->reply_dh->ret_code = retcode; 4188 return 0; 4189 } 4190 4191 4192 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) 4193 { 4194 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 4195 struct drbd_device *device; 4196 enum drbd_ret_code retcode; 4197 int skip_initial_sync = 0; 4198 int err; 4199 struct new_c_uuid_parms args; 4200 4201 if (!adm_ctx->reply_skb) 4202 return 0; 4203 retcode = adm_ctx->reply_dh->ret_code; 4204 if (retcode != NO_ERROR) 4205 goto out_nolock; 4206 4207 device = adm_ctx->device; 4208 memset(&args, 0, sizeof(args)); 4209 if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) { 4210 err = new_c_uuid_parms_from_attrs(&args, info); 4211 if (err) { 4212 retcode = ERR_MANDATORY_TAG; 4213 drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err)); 4214 goto out_nolock; 4215 } 4216 } 4217 4218 mutex_lock(&adm_ctx->resource->adm_mutex); 4219 mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */ 4220 4221 if (!get_ldev(device)) { 4222 retcode = ERR_NO_DISK; 4223 goto out; 4224 } 4225 4226 /* this is "skip initial sync", assume to be clean */ 4227 if (device->state.conn == C_CONNECTED && 4228 first_peer_device(device)->connection->agreed_pro_version >= 90 && 4229 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) { 4230 drbd_info(device, "Preparing to skip initial sync\n"); 4231 skip_initial_sync = 1; 4232 } else if (device->state.conn != C_STANDALONE) { 4233 retcode = ERR_CONNECTED; 4234 goto out_dec; 4235 } 4236 4237 drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */ 4238 drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */ 4239 4240 if (args.clear_bm) { 4241 err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write, 4242 "clear_n_write from new_c_uuid", BM_LOCKED_MASK, NULL); 4243 if (err) { 4244 drbd_err(device, "Writing bitmap failed with %d\n", err); 4245 retcode = ERR_IO_MD_DISK; 4246 } 4247 if (skip_initial_sync) { 4248 drbd_send_uuids_skip_initial_sync(first_peer_device(device)); 4249 _drbd_uuid_set(device, UI_BITMAP, 0); 4250 drbd_print_uuids(device, "cleared bitmap UUID"); 4251 spin_lock_irq(&device->resource->req_lock); 4252 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), 4253 CS_VERBOSE, NULL); 4254 spin_unlock_irq(&device->resource->req_lock); 4255 } 4256 } 4257 4258 drbd_md_sync(device); 4259 out_dec: 4260 put_ldev(device); 4261 out: 4262 mutex_unlock(device->state_mutex); 4263 mutex_unlock(&adm_ctx->resource->adm_mutex); 4264 out_nolock: 4265 adm_ctx->reply_dh->ret_code = retcode; 4266 return 0; 4267 } 4268 4269 static enum drbd_ret_code 4270 drbd_check_resource_name(struct drbd_config_context *adm_ctx) 4271 { 4272 const char *name = adm_ctx->resource_name; 4273 if (!name || !name[0]) { 4274 drbd_msg_put_info(adm_ctx->reply_skb, "resource name missing"); 4275 return ERR_MANDATORY_TAG; 4276 } 4277 /* if we want to use these in sysfs/configfs/debugfs some day, 4278 * we must not allow slashes */ 4279 if (strchr(name, '/')) { 4280 drbd_msg_put_info(adm_ctx->reply_skb, "invalid resource name"); 4281 return ERR_INVALID_REQUEST; 4282 } 4283 return NO_ERROR; 4284 } 4285 4286 static void resource_to_info(struct resource_info *info, 4287 struct drbd_resource *resource) 4288 { 4289 info->res_role = conn_highest_role(first_connection(resource)); 4290 info->res_susp = resource->susp; 4291 info->res_susp_nod = resource->susp_nod; 4292 info->res_susp_fen = resource->susp_fen; 4293 } 4294 4295 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) 4296 { 4297 struct drbd_connection *connection; 4298 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 4299 enum drbd_ret_code retcode; 4300 struct res_opts res_opts; 4301 int err; 4302 4303 if (!adm_ctx->reply_skb) 4304 return 0; 4305 retcode = adm_ctx->reply_dh->ret_code; 4306 if (retcode != NO_ERROR) 4307 goto out; 4308 4309 set_res_opts_defaults(&res_opts); 4310 err = res_opts_from_attrs(&res_opts, info); 4311 if (err && err != -ENOMSG) { 4312 retcode = ERR_MANDATORY_TAG; 4313 drbd_msg_put_info(adm_ctx->reply_skb, from_attrs_err_to_txt(err)); 4314 goto out; 4315 } 4316 4317 retcode = drbd_check_resource_name(adm_ctx); 4318 if (retcode != NO_ERROR) 4319 goto out; 4320 4321 if (adm_ctx->resource) { 4322 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) { 4323 retcode = ERR_INVALID_REQUEST; 4324 drbd_msg_put_info(adm_ctx->reply_skb, "resource exists"); 4325 } 4326 /* else: still NO_ERROR */ 4327 goto out; 4328 } 4329 4330 /* not yet safe for genl_family.parallel_ops */ 4331 mutex_lock(&resources_mutex); 4332 connection = conn_create(adm_ctx->resource_name, &res_opts); 4333 mutex_unlock(&resources_mutex); 4334 4335 if (connection) { 4336 struct resource_info resource_info; 4337 4338 mutex_lock(¬ification_mutex); 4339 resource_to_info(&resource_info, connection->resource); 4340 notify_resource_state(NULL, 0, connection->resource, 4341 &resource_info, NOTIFY_CREATE); 4342 mutex_unlock(¬ification_mutex); 4343 } else 4344 retcode = ERR_NOMEM; 4345 4346 out: 4347 adm_ctx->reply_dh->ret_code = retcode; 4348 return 0; 4349 } 4350 4351 static void device_to_info(struct device_info *info, 4352 struct drbd_device *device) 4353 { 4354 info->dev_disk_state = device->state.disk; 4355 } 4356 4357 4358 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info) 4359 { 4360 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 4361 struct drbd_genlmsghdr *dh = genl_info_userhdr(info); 4362 enum drbd_ret_code retcode; 4363 4364 if (!adm_ctx->reply_skb) 4365 return 0; 4366 retcode = adm_ctx->reply_dh->ret_code; 4367 if (retcode != NO_ERROR) 4368 goto out; 4369 4370 if (dh->minor > MINORMASK) { 4371 drbd_msg_put_info(adm_ctx->reply_skb, "requested minor out of range"); 4372 retcode = ERR_INVALID_REQUEST; 4373 goto out; 4374 } 4375 if (adm_ctx->volume > DRBD_VOLUME_MAX) { 4376 drbd_msg_put_info(adm_ctx->reply_skb, "requested volume id out of range"); 4377 retcode = ERR_INVALID_REQUEST; 4378 goto out; 4379 } 4380 4381 /* drbd_adm_prepare made sure already 4382 * that first_peer_device(device)->connection and device->vnr match the request. */ 4383 if (adm_ctx->device) { 4384 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) 4385 retcode = ERR_MINOR_OR_VOLUME_EXISTS; 4386 /* else: still NO_ERROR */ 4387 goto out; 4388 } 4389 4390 mutex_lock(&adm_ctx->resource->adm_mutex); 4391 retcode = drbd_create_device(adm_ctx, dh->minor); 4392 if (retcode == NO_ERROR) { 4393 struct drbd_device *device; 4394 struct drbd_peer_device *peer_device; 4395 struct device_info info; 4396 unsigned int peer_devices = 0; 4397 enum drbd_notification_type flags; 4398 4399 device = minor_to_device(dh->minor); 4400 for_each_peer_device(peer_device, device) { 4401 if (!has_net_conf(peer_device->connection)) 4402 continue; 4403 peer_devices++; 4404 } 4405 4406 device_to_info(&info, device); 4407 mutex_lock(¬ification_mutex); 4408 flags = (peer_devices--) ? NOTIFY_CONTINUES : 0; 4409 notify_device_state(NULL, 0, device, &info, NOTIFY_CREATE | flags); 4410 for_each_peer_device(peer_device, device) { 4411 struct peer_device_info peer_device_info; 4412 4413 if (!has_net_conf(peer_device->connection)) 4414 continue; 4415 peer_device_to_info(&peer_device_info, peer_device); 4416 flags = (peer_devices--) ? NOTIFY_CONTINUES : 0; 4417 notify_peer_device_state(NULL, 0, peer_device, &peer_device_info, 4418 NOTIFY_CREATE | flags); 4419 } 4420 mutex_unlock(¬ification_mutex); 4421 } 4422 mutex_unlock(&adm_ctx->resource->adm_mutex); 4423 out: 4424 adm_ctx->reply_dh->ret_code = retcode; 4425 return 0; 4426 } 4427 4428 static enum drbd_ret_code adm_del_minor(struct drbd_device *device) 4429 { 4430 struct drbd_peer_device *peer_device; 4431 4432 if (device->state.disk == D_DISKLESS && 4433 /* no need to be device->state.conn == C_STANDALONE && 4434 * we may want to delete a minor from a live replication group. 4435 */ 4436 device->state.role == R_SECONDARY) { 4437 struct drbd_connection *connection = 4438 first_connection(device->resource); 4439 4440 _drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS), 4441 CS_VERBOSE + CS_WAIT_COMPLETE); 4442 4443 /* If the state engine hasn't stopped the sender thread yet, we 4444 * need to flush the sender work queue before generating the 4445 * DESTROY events here. */ 4446 if (get_t_state(&connection->worker) == RUNNING) 4447 drbd_flush_workqueue(&connection->sender_work); 4448 4449 mutex_lock(¬ification_mutex); 4450 for_each_peer_device(peer_device, device) { 4451 if (!has_net_conf(peer_device->connection)) 4452 continue; 4453 notify_peer_device_state(NULL, 0, peer_device, NULL, 4454 NOTIFY_DESTROY | NOTIFY_CONTINUES); 4455 } 4456 notify_device_state(NULL, 0, device, NULL, NOTIFY_DESTROY); 4457 mutex_unlock(¬ification_mutex); 4458 4459 drbd_delete_device(device); 4460 return NO_ERROR; 4461 } else 4462 return ERR_MINOR_CONFIGURED; 4463 } 4464 4465 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info) 4466 { 4467 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 4468 enum drbd_ret_code retcode; 4469 4470 if (!adm_ctx->reply_skb) 4471 return 0; 4472 retcode = adm_ctx->reply_dh->ret_code; 4473 if (retcode != NO_ERROR) 4474 goto out; 4475 4476 mutex_lock(&adm_ctx->resource->adm_mutex); 4477 retcode = adm_del_minor(adm_ctx->device); 4478 mutex_unlock(&adm_ctx->resource->adm_mutex); 4479 out: 4480 adm_ctx->reply_dh->ret_code = retcode; 4481 return 0; 4482 } 4483 4484 static int adm_del_resource(struct drbd_resource *resource) 4485 { 4486 struct drbd_connection *connection; 4487 4488 for_each_connection(connection, resource) { 4489 if (connection->cstate > C_STANDALONE) 4490 return ERR_NET_CONFIGURED; 4491 } 4492 if (!idr_is_empty(&resource->devices)) 4493 return ERR_RES_IN_USE; 4494 4495 /* The state engine has stopped the sender thread, so we don't 4496 * need to flush the sender work queue before generating the 4497 * DESTROY event here. */ 4498 mutex_lock(¬ification_mutex); 4499 notify_resource_state(NULL, 0, resource, NULL, NOTIFY_DESTROY); 4500 mutex_unlock(¬ification_mutex); 4501 4502 mutex_lock(&resources_mutex); 4503 list_del_rcu(&resource->resources); 4504 mutex_unlock(&resources_mutex); 4505 /* Make sure all threads have actually stopped: state handling only 4506 * does drbd_thread_stop_nowait(). */ 4507 list_for_each_entry(connection, &resource->connections, connections) 4508 drbd_thread_stop(&connection->worker); 4509 synchronize_rcu(); 4510 drbd_free_resource(resource); 4511 return NO_ERROR; 4512 } 4513 4514 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) 4515 { 4516 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 4517 struct drbd_resource *resource; 4518 struct drbd_connection *connection; 4519 struct drbd_device *device; 4520 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ 4521 unsigned i; 4522 4523 if (!adm_ctx->reply_skb) 4524 return 0; 4525 retcode = adm_ctx->reply_dh->ret_code; 4526 if (retcode != NO_ERROR) 4527 goto finish; 4528 4529 resource = adm_ctx->resource; 4530 mutex_lock(&resource->adm_mutex); 4531 /* demote */ 4532 for_each_connection(connection, resource) { 4533 struct drbd_peer_device *peer_device; 4534 4535 idr_for_each_entry(&connection->peer_devices, peer_device, i) { 4536 retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0); 4537 if (retcode < SS_SUCCESS) { 4538 drbd_msg_put_info(adm_ctx->reply_skb, "failed to demote"); 4539 goto out; 4540 } 4541 } 4542 4543 retcode = conn_try_disconnect(connection, 0); 4544 if (retcode < SS_SUCCESS) { 4545 drbd_msg_put_info(adm_ctx->reply_skb, "failed to disconnect"); 4546 goto out; 4547 } 4548 } 4549 4550 /* detach */ 4551 idr_for_each_entry(&resource->devices, device, i) { 4552 retcode = adm_detach(device, 0); 4553 if (retcode < SS_SUCCESS || retcode > NO_ERROR) { 4554 drbd_msg_put_info(adm_ctx->reply_skb, "failed to detach"); 4555 goto out; 4556 } 4557 } 4558 4559 /* delete volumes */ 4560 idr_for_each_entry(&resource->devices, device, i) { 4561 retcode = adm_del_minor(device); 4562 if (retcode != NO_ERROR) { 4563 /* "can not happen" */ 4564 drbd_msg_put_info(adm_ctx->reply_skb, "failed to delete volume"); 4565 goto out; 4566 } 4567 } 4568 4569 retcode = adm_del_resource(resource); 4570 out: 4571 mutex_unlock(&resource->adm_mutex); 4572 finish: 4573 adm_ctx->reply_dh->ret_code = retcode; 4574 return 0; 4575 } 4576 4577 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info) 4578 { 4579 struct drbd_config_context *adm_ctx = info->user_ptr[0]; 4580 struct drbd_resource *resource; 4581 enum drbd_ret_code retcode; 4582 4583 if (!adm_ctx->reply_skb) 4584 return 0; 4585 retcode = adm_ctx->reply_dh->ret_code; 4586 if (retcode != NO_ERROR) 4587 goto finish; 4588 resource = adm_ctx->resource; 4589 4590 mutex_lock(&resource->adm_mutex); 4591 retcode = adm_del_resource(resource); 4592 mutex_unlock(&resource->adm_mutex); 4593 finish: 4594 adm_ctx->reply_dh->ret_code = retcode; 4595 return 0; 4596 } 4597 4598 void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib) 4599 { 4600 struct sk_buff *msg; 4601 struct drbd_genlmsghdr *d_out; 4602 unsigned seq; 4603 int err = -ENOMEM; 4604 4605 seq = atomic_inc_return(&drbd_genl_seq); 4606 msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); 4607 if (!msg) 4608 goto failed; 4609 4610 err = -EMSGSIZE; 4611 d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT); 4612 if (!d_out) /* cannot happen, but anyways. */ 4613 goto nla_put_failure; 4614 d_out->minor = device_to_minor(device); 4615 d_out->ret_code = NO_ERROR; 4616 4617 if (nla_put_status_info(msg, device, sib)) 4618 goto nla_put_failure; 4619 genlmsg_end(msg, d_out); 4620 err = drbd_genl_multicast_events(msg, GFP_NOWAIT); 4621 /* msg has been consumed or freed in netlink_broadcast() */ 4622 if (err && err != -ESRCH) 4623 goto failed; 4624 4625 return; 4626 4627 nla_put_failure: 4628 nlmsg_free(msg); 4629 failed: 4630 drbd_err(device, "Error %d while broadcasting event. " 4631 "Event seq:%u sib_reason:%u\n", 4632 err, seq, sib->sib_reason); 4633 } 4634 4635 static int nla_put_notification_header(struct sk_buff *msg, 4636 enum drbd_notification_type type) 4637 { 4638 struct drbd_notification_header nh = { 4639 .nh_type = type, 4640 }; 4641 4642 return drbd_notification_header_to_skb(msg, &nh, true); 4643 } 4644 4645 int notify_resource_state(struct sk_buff *skb, 4646 unsigned int seq, 4647 struct drbd_resource *resource, 4648 struct resource_info *resource_info, 4649 enum drbd_notification_type type) 4650 { 4651 struct resource_statistics resource_statistics; 4652 struct drbd_genlmsghdr *dh; 4653 bool multicast = false; 4654 int err; 4655 4656 if (!skb) { 4657 seq = atomic_inc_return(¬ify_genl_seq); 4658 skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); 4659 err = -ENOMEM; 4660 if (!skb) 4661 goto failed; 4662 multicast = true; 4663 } 4664 4665 err = -EMSGSIZE; 4666 dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_RESOURCE_STATE); 4667 if (!dh) 4668 goto nla_put_failure; 4669 dh->minor = -1U; 4670 dh->ret_code = NO_ERROR; 4671 if (nla_put_drbd_cfg_context(skb, resource, NULL, NULL) || 4672 nla_put_notification_header(skb, type) || 4673 ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY && 4674 resource_info_to_skb(skb, resource_info, true))) 4675 goto nla_put_failure; 4676 resource_statistics.res_stat_write_ordering = resource->write_ordering; 4677 err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN)); 4678 if (err) 4679 goto nla_put_failure; 4680 genlmsg_end(skb, dh); 4681 if (multicast) { 4682 err = drbd_genl_multicast_events(skb, GFP_NOWAIT); 4683 /* skb has been consumed or freed in netlink_broadcast() */ 4684 if (err && err != -ESRCH) 4685 goto failed; 4686 } 4687 return 0; 4688 4689 nla_put_failure: 4690 nlmsg_free(skb); 4691 failed: 4692 drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n", 4693 err, seq); 4694 return err; 4695 } 4696 4697 int notify_device_state(struct sk_buff *skb, 4698 unsigned int seq, 4699 struct drbd_device *device, 4700 struct device_info *device_info, 4701 enum drbd_notification_type type) 4702 { 4703 struct device_statistics device_statistics; 4704 struct drbd_genlmsghdr *dh; 4705 bool multicast = false; 4706 int err; 4707 4708 if (!skb) { 4709 seq = atomic_inc_return(¬ify_genl_seq); 4710 skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); 4711 err = -ENOMEM; 4712 if (!skb) 4713 goto failed; 4714 multicast = true; 4715 } 4716 4717 err = -EMSGSIZE; 4718 dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_DEVICE_STATE); 4719 if (!dh) 4720 goto nla_put_failure; 4721 dh->minor = device->minor; 4722 dh->ret_code = NO_ERROR; 4723 if (nla_put_drbd_cfg_context(skb, device->resource, NULL, device) || 4724 nla_put_notification_header(skb, type) || 4725 ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY && 4726 device_info_to_skb(skb, device_info, true))) 4727 goto nla_put_failure; 4728 device_to_statistics(&device_statistics, device); 4729 device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN)); 4730 genlmsg_end(skb, dh); 4731 if (multicast) { 4732 err = drbd_genl_multicast_events(skb, GFP_NOWAIT); 4733 /* skb has been consumed or freed in netlink_broadcast() */ 4734 if (err && err != -ESRCH) 4735 goto failed; 4736 } 4737 return 0; 4738 4739 nla_put_failure: 4740 nlmsg_free(skb); 4741 failed: 4742 drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n", 4743 err, seq); 4744 return err; 4745 } 4746 4747 int notify_connection_state(struct sk_buff *skb, 4748 unsigned int seq, 4749 struct drbd_connection *connection, 4750 struct connection_info *connection_info, 4751 enum drbd_notification_type type) 4752 { 4753 struct connection_statistics connection_statistics; 4754 struct drbd_genlmsghdr *dh; 4755 bool multicast = false; 4756 int err; 4757 4758 if (!skb) { 4759 seq = atomic_inc_return(¬ify_genl_seq); 4760 skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); 4761 err = -ENOMEM; 4762 if (!skb) 4763 goto failed; 4764 multicast = true; 4765 } 4766 4767 err = -EMSGSIZE; 4768 dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_CONNECTION_STATE); 4769 if (!dh) 4770 goto nla_put_failure; 4771 dh->minor = -1U; 4772 dh->ret_code = NO_ERROR; 4773 if (nla_put_drbd_cfg_context(skb, connection->resource, connection, NULL) || 4774 nla_put_notification_header(skb, type) || 4775 ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY && 4776 connection_info_to_skb(skb, connection_info, true))) 4777 goto nla_put_failure; 4778 connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags); 4779 connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN)); 4780 genlmsg_end(skb, dh); 4781 if (multicast) { 4782 err = drbd_genl_multicast_events(skb, GFP_NOWAIT); 4783 /* skb has been consumed or freed in netlink_broadcast() */ 4784 if (err && err != -ESRCH) 4785 goto failed; 4786 } 4787 return 0; 4788 4789 nla_put_failure: 4790 nlmsg_free(skb); 4791 failed: 4792 drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n", 4793 err, seq); 4794 return err; 4795 } 4796 4797 int notify_peer_device_state(struct sk_buff *skb, 4798 unsigned int seq, 4799 struct drbd_peer_device *peer_device, 4800 struct peer_device_info *peer_device_info, 4801 enum drbd_notification_type type) 4802 { 4803 struct peer_device_statistics peer_device_statistics; 4804 struct drbd_resource *resource = peer_device->device->resource; 4805 struct drbd_genlmsghdr *dh; 4806 bool multicast = false; 4807 int err; 4808 4809 if (!skb) { 4810 seq = atomic_inc_return(¬ify_genl_seq); 4811 skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); 4812 err = -ENOMEM; 4813 if (!skb) 4814 goto failed; 4815 multicast = true; 4816 } 4817 4818 err = -EMSGSIZE; 4819 dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_PEER_DEVICE_STATE); 4820 if (!dh) 4821 goto nla_put_failure; 4822 dh->minor = -1U; 4823 dh->ret_code = NO_ERROR; 4824 if (nla_put_drbd_cfg_context(skb, resource, peer_device->connection, peer_device->device) || 4825 nla_put_notification_header(skb, type) || 4826 ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY && 4827 peer_device_info_to_skb(skb, peer_device_info, true))) 4828 goto nla_put_failure; 4829 peer_device_to_statistics(&peer_device_statistics, peer_device); 4830 peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN)); 4831 genlmsg_end(skb, dh); 4832 if (multicast) { 4833 err = drbd_genl_multicast_events(skb, GFP_NOWAIT); 4834 /* skb has been consumed or freed in netlink_broadcast() */ 4835 if (err && err != -ESRCH) 4836 goto failed; 4837 } 4838 return 0; 4839 4840 nla_put_failure: 4841 nlmsg_free(skb); 4842 failed: 4843 drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n", 4844 err, seq); 4845 return err; 4846 } 4847 4848 void notify_helper(enum drbd_notification_type type, 4849 struct drbd_device *device, struct drbd_connection *connection, 4850 const char *name, int status) 4851 { 4852 struct drbd_resource *resource = device ? device->resource : connection->resource; 4853 struct drbd_helper_info helper_info; 4854 unsigned int seq = atomic_inc_return(¬ify_genl_seq); 4855 struct sk_buff *skb = NULL; 4856 struct drbd_genlmsghdr *dh; 4857 int err; 4858 4859 strscpy(helper_info.helper_name, name, sizeof(helper_info.helper_name)); 4860 helper_info.helper_name_len = min(strlen(name), sizeof(helper_info.helper_name)); 4861 helper_info.helper_status = status; 4862 4863 skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); 4864 err = -ENOMEM; 4865 if (!skb) 4866 goto fail; 4867 4868 err = -EMSGSIZE; 4869 dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_HELPER); 4870 if (!dh) 4871 goto fail; 4872 dh->minor = device ? device->minor : -1; 4873 dh->ret_code = NO_ERROR; 4874 mutex_lock(¬ification_mutex); 4875 if (nla_put_drbd_cfg_context(skb, resource, connection, device) || 4876 nla_put_notification_header(skb, type) || 4877 drbd_helper_info_to_skb(skb, &helper_info, true)) 4878 goto unlock_fail; 4879 genlmsg_end(skb, dh); 4880 err = drbd_genl_multicast_events(skb, GFP_NOWAIT); 4881 skb = NULL; 4882 /* skb has been consumed or freed in netlink_broadcast() */ 4883 if (err && err != -ESRCH) 4884 goto unlock_fail; 4885 mutex_unlock(¬ification_mutex); 4886 return; 4887 4888 unlock_fail: 4889 mutex_unlock(¬ification_mutex); 4890 fail: 4891 nlmsg_free(skb); 4892 drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n", 4893 err, seq); 4894 } 4895 4896 static int notify_initial_state_done(struct sk_buff *skb, unsigned int seq) 4897 { 4898 struct drbd_genlmsghdr *dh; 4899 int err; 4900 4901 err = -EMSGSIZE; 4902 dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_INITIAL_STATE_DONE); 4903 if (!dh) 4904 goto nla_put_failure; 4905 dh->minor = -1U; 4906 dh->ret_code = NO_ERROR; 4907 if (nla_put_notification_header(skb, NOTIFY_EXISTS)) 4908 goto nla_put_failure; 4909 genlmsg_end(skb, dh); 4910 return 0; 4911 4912 nla_put_failure: 4913 nlmsg_free(skb); 4914 pr_err("Error %d sending event. Event seq:%u\n", err, seq); 4915 return err; 4916 } 4917 4918 static void free_state_changes(struct list_head *list) 4919 { 4920 while (!list_empty(list)) { 4921 struct drbd_state_change *state_change = 4922 list_first_entry(list, struct drbd_state_change, list); 4923 list_del(&state_change->list); 4924 forget_state_change(state_change); 4925 } 4926 } 4927 4928 static unsigned int notifications_for_state_change(struct drbd_state_change *state_change) 4929 { 4930 return 1 + 4931 state_change->n_connections + 4932 state_change->n_devices + 4933 state_change->n_devices * state_change->n_connections; 4934 } 4935 4936 static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) 4937 { 4938 struct drbd_state_change *state_change = (struct drbd_state_change *)cb->args[0]; 4939 unsigned int seq = cb->args[2]; 4940 unsigned int n; 4941 enum drbd_notification_type flags = 0; 4942 int err = 0; 4943 4944 /* There is no need for taking notification_mutex here: it doesn't 4945 matter if the initial state events mix with later state chage 4946 events; we can always tell the events apart by the NOTIFY_EXISTS 4947 flag. */ 4948 4949 cb->args[5]--; 4950 if (cb->args[5] == 1) { 4951 err = notify_initial_state_done(skb, seq); 4952 goto out; 4953 } 4954 n = cb->args[4]++; 4955 if (cb->args[4] < cb->args[3]) 4956 flags |= NOTIFY_CONTINUES; 4957 if (n < 1) { 4958 err = notify_resource_state_change(skb, seq, state_change->resource, 4959 NOTIFY_EXISTS | flags); 4960 goto next; 4961 } 4962 n--; 4963 if (n < state_change->n_connections) { 4964 err = notify_connection_state_change(skb, seq, &state_change->connections[n], 4965 NOTIFY_EXISTS | flags); 4966 goto next; 4967 } 4968 n -= state_change->n_connections; 4969 if (n < state_change->n_devices) { 4970 err = notify_device_state_change(skb, seq, &state_change->devices[n], 4971 NOTIFY_EXISTS | flags); 4972 goto next; 4973 } 4974 n -= state_change->n_devices; 4975 if (n < state_change->n_devices * state_change->n_connections) { 4976 err = notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n], 4977 NOTIFY_EXISTS | flags); 4978 goto next; 4979 } 4980 4981 next: 4982 if (cb->args[4] == cb->args[3]) { 4983 struct drbd_state_change *next_state_change = 4984 list_entry(state_change->list.next, 4985 struct drbd_state_change, list); 4986 cb->args[0] = (long)next_state_change; 4987 cb->args[3] = notifications_for_state_change(next_state_change); 4988 cb->args[4] = 0; 4989 } 4990 out: 4991 if (err) 4992 return err; 4993 else 4994 return skb->len; 4995 } 4996 4997 int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) 4998 { 4999 struct drbd_resource *resource; 5000 LIST_HEAD(head); 5001 5002 if (cb->args[5] >= 1) { 5003 if (cb->args[5] > 1) 5004 return get_initial_state(skb, cb); 5005 if (cb->args[0]) { 5006 struct drbd_state_change *state_change = 5007 (struct drbd_state_change *)cb->args[0]; 5008 5009 /* connect list to head */ 5010 list_add(&head, &state_change->list); 5011 free_state_changes(&head); 5012 } 5013 return 0; 5014 } 5015 5016 cb->args[5] = 2; /* number of iterations */ 5017 mutex_lock(&resources_mutex); 5018 for_each_resource(resource, &drbd_resources) { 5019 struct drbd_state_change *state_change; 5020 5021 state_change = remember_old_state(resource, GFP_KERNEL); 5022 if (!state_change) { 5023 if (!list_empty(&head)) 5024 free_state_changes(&head); 5025 mutex_unlock(&resources_mutex); 5026 return -ENOMEM; 5027 } 5028 copy_old_to_new_state_change(state_change); 5029 list_add_tail(&state_change->list, &head); 5030 cb->args[5] += notifications_for_state_change(state_change); 5031 } 5032 mutex_unlock(&resources_mutex); 5033 5034 if (!list_empty(&head)) { 5035 struct drbd_state_change *state_change = 5036 list_entry(head.next, struct drbd_state_change, list); 5037 cb->args[0] = (long)state_change; 5038 cb->args[3] = notifications_for_state_change(state_change); 5039 list_del(&head); /* detach list from head */ 5040 } 5041 5042 cb->args[2] = cb->nlh->nlmsg_seq; 5043 return get_initial_state(skb, cb); 5044 } 5045