1 /* 2 * Network block device - make block devices work over TCP 3 * 4 * Note that you can not swap over this thing, yet. Seems to work but 5 * deadlocks sometimes - you can not swap over TCP in general. 6 * 7 * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz> 8 * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com> 9 * 10 * This file is released under GPLv2 or later. 11 * 12 * (part of code stolen from loop.c) 13 */ 14 15 #include <linux/major.h> 16 17 #include <linux/blkdev.h> 18 #include <linux/module.h> 19 #include <linux/init.h> 20 #include <linux/sched.h> 21 #include <linux/sched/mm.h> 22 #include <linux/fs.h> 23 #include <linux/bio.h> 24 #include <linux/stat.h> 25 #include <linux/errno.h> 26 #include <linux/file.h> 27 #include <linux/ioctl.h> 28 #include <linux/mutex.h> 29 #include <linux/compiler.h> 30 #include <linux/err.h> 31 #include <linux/kernel.h> 32 #include <linux/slab.h> 33 #include <net/sock.h> 34 #include <linux/net.h> 35 #include <linux/kthread.h> 36 #include <linux/types.h> 37 #include <linux/debugfs.h> 38 #include <linux/blk-mq.h> 39 40 #include <linux/uaccess.h> 41 #include <asm/types.h> 42 43 #include <linux/nbd.h> 44 #include <linux/nbd-netlink.h> 45 #include <net/genetlink.h> 46 47 static DEFINE_IDR(nbd_index_idr); 48 static DEFINE_MUTEX(nbd_index_mutex); 49 static int nbd_total_devices = 0; 50 51 struct nbd_sock { 52 struct socket *sock; 53 struct mutex tx_lock; 54 struct request *pending; 55 int sent; 56 bool dead; 57 int fallback_index; 58 int cookie; 59 }; 60 61 struct recv_thread_args { 62 struct work_struct work; 63 struct nbd_device *nbd; 64 int index; 65 }; 66 67 struct link_dead_args { 68 struct work_struct work; 69 int index; 70 }; 71 72 #define NBD_TIMEDOUT 0 73 #define NBD_DISCONNECT_REQUESTED 1 74 #define NBD_DISCONNECTED 2 75 #define NBD_HAS_PID_FILE 3 76 #define NBD_HAS_CONFIG_REF 4 77 #define NBD_BOUND 5 78 #define NBD_DESTROY_ON_DISCONNECT 6 79 80 struct nbd_config { 81 u32 flags; 82 unsigned long runtime_flags; 83 u64 dead_conn_timeout; 84 85 struct nbd_sock **socks; 86 int num_connections; 87 atomic_t live_connections; 88 wait_queue_head_t conn_wait; 89 90 atomic_t recv_threads; 91 wait_queue_head_t recv_wq; 92 loff_t blksize; 93 loff_t bytesize; 94 #if IS_ENABLED(CONFIG_DEBUG_FS) 95 struct dentry *dbg_dir; 96 #endif 97 }; 98 99 struct nbd_device { 100 struct blk_mq_tag_set tag_set; 101 102 int index; 103 refcount_t config_refs; 104 refcount_t refs; 105 struct nbd_config *config; 106 struct mutex config_lock; 107 struct gendisk *disk; 108 109 struct list_head list; 110 struct task_struct *task_recv; 111 struct task_struct *task_setup; 112 }; 113 114 struct nbd_cmd { 115 struct nbd_device *nbd; 116 int index; 117 int cookie; 118 struct completion send_complete; 119 blk_status_t status; 120 }; 121 122 #if IS_ENABLED(CONFIG_DEBUG_FS) 123 static struct dentry *nbd_dbg_dir; 124 #endif 125 126 #define nbd_name(nbd) ((nbd)->disk->disk_name) 127 128 #define NBD_MAGIC 0x68797548 129 130 static unsigned int nbds_max = 16; 131 static int max_part = 16; 132 static struct workqueue_struct *recv_workqueue; 133 static int part_shift; 134 135 static int nbd_dev_dbg_init(struct nbd_device *nbd); 136 static void nbd_dev_dbg_close(struct nbd_device *nbd); 137 static void nbd_config_put(struct nbd_device *nbd); 138 static void nbd_connect_reply(struct genl_info *info, int index); 139 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info); 140 static void nbd_dead_link_work(struct work_struct *work); 141 142 static inline struct device *nbd_to_dev(struct nbd_device *nbd) 143 { 144 return disk_to_dev(nbd->disk); 145 } 146 147 static const char *nbdcmd_to_ascii(int cmd) 148 { 149 switch (cmd) { 150 case NBD_CMD_READ: return "read"; 151 case NBD_CMD_WRITE: return "write"; 152 case NBD_CMD_DISC: return "disconnect"; 153 case NBD_CMD_FLUSH: return "flush"; 154 case NBD_CMD_TRIM: return "trim/discard"; 155 } 156 return "invalid"; 157 } 158 159 static ssize_t pid_show(struct device *dev, 160 struct device_attribute *attr, char *buf) 161 { 162 struct gendisk *disk = dev_to_disk(dev); 163 struct nbd_device *nbd = (struct nbd_device *)disk->private_data; 164 165 return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv)); 166 } 167 168 static const struct device_attribute pid_attr = { 169 .attr = { .name = "pid", .mode = S_IRUGO}, 170 .show = pid_show, 171 }; 172 173 static void nbd_dev_remove(struct nbd_device *nbd) 174 { 175 struct gendisk *disk = nbd->disk; 176 if (disk) { 177 del_gendisk(disk); 178 blk_cleanup_queue(disk->queue); 179 blk_mq_free_tag_set(&nbd->tag_set); 180 disk->private_data = NULL; 181 put_disk(disk); 182 } 183 kfree(nbd); 184 } 185 186 static void nbd_put(struct nbd_device *nbd) 187 { 188 if (refcount_dec_and_mutex_lock(&nbd->refs, 189 &nbd_index_mutex)) { 190 idr_remove(&nbd_index_idr, nbd->index); 191 mutex_unlock(&nbd_index_mutex); 192 nbd_dev_remove(nbd); 193 } 194 } 195 196 static int nbd_disconnected(struct nbd_config *config) 197 { 198 return test_bit(NBD_DISCONNECTED, &config->runtime_flags) || 199 test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags); 200 } 201 202 static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, 203 int notify) 204 { 205 if (!nsock->dead && notify && !nbd_disconnected(nbd->config)) { 206 struct link_dead_args *args; 207 args = kmalloc(sizeof(struct link_dead_args), GFP_NOIO); 208 if (args) { 209 INIT_WORK(&args->work, nbd_dead_link_work); 210 args->index = nbd->index; 211 queue_work(system_wq, &args->work); 212 } 213 } 214 if (!nsock->dead) { 215 kernel_sock_shutdown(nsock->sock, SHUT_RDWR); 216 atomic_dec(&nbd->config->live_connections); 217 } 218 nsock->dead = true; 219 nsock->pending = NULL; 220 nsock->sent = 0; 221 } 222 223 static void nbd_size_clear(struct nbd_device *nbd) 224 { 225 if (nbd->config->bytesize) { 226 set_capacity(nbd->disk, 0); 227 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); 228 } 229 } 230 231 static void nbd_size_update(struct nbd_device *nbd) 232 { 233 struct nbd_config *config = nbd->config; 234 blk_queue_logical_block_size(nbd->disk->queue, config->blksize); 235 blk_queue_physical_block_size(nbd->disk->queue, config->blksize); 236 set_capacity(nbd->disk, config->bytesize >> 9); 237 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); 238 } 239 240 static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, 241 loff_t nr_blocks) 242 { 243 struct nbd_config *config = nbd->config; 244 config->blksize = blocksize; 245 config->bytesize = blocksize * nr_blocks; 246 } 247 248 static void nbd_complete_rq(struct request *req) 249 { 250 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 251 252 dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", cmd, 253 cmd->status ? "failed" : "done"); 254 255 blk_mq_end_request(req, cmd->status); 256 } 257 258 /* 259 * Forcibly shutdown the socket causing all listeners to error 260 */ 261 static void sock_shutdown(struct nbd_device *nbd) 262 { 263 struct nbd_config *config = nbd->config; 264 int i; 265 266 if (config->num_connections == 0) 267 return; 268 if (test_and_set_bit(NBD_DISCONNECTED, &config->runtime_flags)) 269 return; 270 271 for (i = 0; i < config->num_connections; i++) { 272 struct nbd_sock *nsock = config->socks[i]; 273 mutex_lock(&nsock->tx_lock); 274 nbd_mark_nsock_dead(nbd, nsock, 0); 275 mutex_unlock(&nsock->tx_lock); 276 } 277 dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n"); 278 } 279 280 static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, 281 bool reserved) 282 { 283 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 284 struct nbd_device *nbd = cmd->nbd; 285 struct nbd_config *config; 286 287 if (!refcount_inc_not_zero(&nbd->config_refs)) { 288 cmd->status = BLK_STS_TIMEOUT; 289 return BLK_EH_HANDLED; 290 } 291 292 /* If we are waiting on our dead timer then we could get timeout 293 * callbacks for our request. For this we just want to reset the timer 294 * and let the queue side take care of everything. 295 */ 296 if (!completion_done(&cmd->send_complete)) { 297 nbd_config_put(nbd); 298 return BLK_EH_RESET_TIMER; 299 } 300 config = nbd->config; 301 302 if (config->num_connections > 1) { 303 dev_err_ratelimited(nbd_to_dev(nbd), 304 "Connection timed out, retrying\n"); 305 /* 306 * Hooray we have more connections, requeue this IO, the submit 307 * path will put it on a real connection. 308 */ 309 if (config->socks && config->num_connections > 1) { 310 if (cmd->index < config->num_connections) { 311 struct nbd_sock *nsock = 312 config->socks[cmd->index]; 313 mutex_lock(&nsock->tx_lock); 314 /* We can have multiple outstanding requests, so 315 * we don't want to mark the nsock dead if we've 316 * already reconnected with a new socket, so 317 * only mark it dead if its the same socket we 318 * were sent out on. 319 */ 320 if (cmd->cookie == nsock->cookie) 321 nbd_mark_nsock_dead(nbd, nsock, 1); 322 mutex_unlock(&nsock->tx_lock); 323 } 324 blk_mq_requeue_request(req, true); 325 nbd_config_put(nbd); 326 return BLK_EH_NOT_HANDLED; 327 } 328 } else { 329 dev_err_ratelimited(nbd_to_dev(nbd), 330 "Connection timed out\n"); 331 } 332 set_bit(NBD_TIMEDOUT, &config->runtime_flags); 333 cmd->status = BLK_STS_IOERR; 334 sock_shutdown(nbd); 335 nbd_config_put(nbd); 336 337 return BLK_EH_HANDLED; 338 } 339 340 /* 341 * Send or receive packet. 342 */ 343 static int sock_xmit(struct nbd_device *nbd, int index, int send, 344 struct iov_iter *iter, int msg_flags, int *sent) 345 { 346 struct nbd_config *config = nbd->config; 347 struct socket *sock = config->socks[index]->sock; 348 int result; 349 struct msghdr msg; 350 unsigned int noreclaim_flag; 351 352 if (unlikely(!sock)) { 353 dev_err_ratelimited(disk_to_dev(nbd->disk), 354 "Attempted %s on closed socket in sock_xmit\n", 355 (send ? "send" : "recv")); 356 return -EINVAL; 357 } 358 359 msg.msg_iter = *iter; 360 361 noreclaim_flag = memalloc_noreclaim_save(); 362 do { 363 sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC; 364 msg.msg_name = NULL; 365 msg.msg_namelen = 0; 366 msg.msg_control = NULL; 367 msg.msg_controllen = 0; 368 msg.msg_flags = msg_flags | MSG_NOSIGNAL; 369 370 if (send) 371 result = sock_sendmsg(sock, &msg); 372 else 373 result = sock_recvmsg(sock, &msg, msg.msg_flags); 374 375 if (result <= 0) { 376 if (result == 0) 377 result = -EPIPE; /* short read */ 378 break; 379 } 380 if (sent) 381 *sent += result; 382 } while (msg_data_left(&msg)); 383 384 memalloc_noreclaim_restore(noreclaim_flag); 385 386 return result; 387 } 388 389 /* 390 * Different settings for sk->sk_sndtimeo can result in different return values 391 * if there is a signal pending when we enter sendmsg, because reasons? 392 */ 393 static inline int was_interrupted(int result) 394 { 395 return result == -ERESTARTSYS || result == -EINTR; 396 } 397 398 /* always call with the tx_lock held */ 399 static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) 400 { 401 struct request *req = blk_mq_rq_from_pdu(cmd); 402 struct nbd_config *config = nbd->config; 403 struct nbd_sock *nsock = config->socks[index]; 404 int result; 405 struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)}; 406 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; 407 struct iov_iter from; 408 unsigned long size = blk_rq_bytes(req); 409 struct bio *bio; 410 u32 type; 411 u32 nbd_cmd_flags = 0; 412 u32 tag = blk_mq_unique_tag(req); 413 int sent = nsock->sent, skip = 0; 414 415 iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); 416 417 switch (req_op(req)) { 418 case REQ_OP_DISCARD: 419 type = NBD_CMD_TRIM; 420 break; 421 case REQ_OP_FLUSH: 422 type = NBD_CMD_FLUSH; 423 break; 424 case REQ_OP_WRITE: 425 type = NBD_CMD_WRITE; 426 break; 427 case REQ_OP_READ: 428 type = NBD_CMD_READ; 429 break; 430 default: 431 return -EIO; 432 } 433 434 if (rq_data_dir(req) == WRITE && 435 (config->flags & NBD_FLAG_READ_ONLY)) { 436 dev_err_ratelimited(disk_to_dev(nbd->disk), 437 "Write on read-only\n"); 438 return -EIO; 439 } 440 441 if (req->cmd_flags & REQ_FUA) 442 nbd_cmd_flags |= NBD_CMD_FLAG_FUA; 443 444 /* We did a partial send previously, and we at least sent the whole 445 * request struct, so just go and send the rest of the pages in the 446 * request. 447 */ 448 if (sent) { 449 if (sent >= sizeof(request)) { 450 skip = sent - sizeof(request); 451 goto send_pages; 452 } 453 iov_iter_advance(&from, sent); 454 } 455 cmd->index = index; 456 cmd->cookie = nsock->cookie; 457 request.type = htonl(type | nbd_cmd_flags); 458 if (type != NBD_CMD_FLUSH) { 459 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); 460 request.len = htonl(size); 461 } 462 memcpy(request.handle, &tag, sizeof(tag)); 463 464 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", 465 cmd, nbdcmd_to_ascii(type), 466 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); 467 result = sock_xmit(nbd, index, 1, &from, 468 (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent); 469 if (result <= 0) { 470 if (was_interrupted(result)) { 471 /* If we havne't sent anything we can just return BUSY, 472 * however if we have sent something we need to make 473 * sure we only allow this req to be sent until we are 474 * completely done. 475 */ 476 if (sent) { 477 nsock->pending = req; 478 nsock->sent = sent; 479 } 480 return BLK_STS_RESOURCE; 481 } 482 dev_err_ratelimited(disk_to_dev(nbd->disk), 483 "Send control failed (result %d)\n", result); 484 return -EAGAIN; 485 } 486 send_pages: 487 if (type != NBD_CMD_WRITE) 488 goto out; 489 490 bio = req->bio; 491 while (bio) { 492 struct bio *next = bio->bi_next; 493 struct bvec_iter iter; 494 struct bio_vec bvec; 495 496 bio_for_each_segment(bvec, bio, iter) { 497 bool is_last = !next && bio_iter_last(bvec, iter); 498 int flags = is_last ? 0 : MSG_MORE; 499 500 dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", 501 cmd, bvec.bv_len); 502 iov_iter_bvec(&from, ITER_BVEC | WRITE, 503 &bvec, 1, bvec.bv_len); 504 if (skip) { 505 if (skip >= iov_iter_count(&from)) { 506 skip -= iov_iter_count(&from); 507 continue; 508 } 509 iov_iter_advance(&from, skip); 510 skip = 0; 511 } 512 result = sock_xmit(nbd, index, 1, &from, flags, &sent); 513 if (result <= 0) { 514 if (was_interrupted(result)) { 515 /* We've already sent the header, we 516 * have no choice but to set pending and 517 * return BUSY. 518 */ 519 nsock->pending = req; 520 nsock->sent = sent; 521 return BLK_STS_RESOURCE; 522 } 523 dev_err(disk_to_dev(nbd->disk), 524 "Send data failed (result %d)\n", 525 result); 526 return -EAGAIN; 527 } 528 /* 529 * The completion might already have come in, 530 * so break for the last one instead of letting 531 * the iterator do it. This prevents use-after-free 532 * of the bio. 533 */ 534 if (is_last) 535 break; 536 } 537 bio = next; 538 } 539 out: 540 nsock->pending = NULL; 541 nsock->sent = 0; 542 return 0; 543 } 544 545 /* NULL returned = something went wrong, inform userspace */ 546 static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) 547 { 548 struct nbd_config *config = nbd->config; 549 int result; 550 struct nbd_reply reply; 551 struct nbd_cmd *cmd; 552 struct request *req = NULL; 553 u16 hwq; 554 u32 tag; 555 struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)}; 556 struct iov_iter to; 557 558 reply.magic = 0; 559 iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); 560 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); 561 if (result <= 0) { 562 if (!nbd_disconnected(config)) 563 dev_err(disk_to_dev(nbd->disk), 564 "Receive control failed (result %d)\n", result); 565 return ERR_PTR(result); 566 } 567 568 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { 569 dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n", 570 (unsigned long)ntohl(reply.magic)); 571 return ERR_PTR(-EPROTO); 572 } 573 574 memcpy(&tag, reply.handle, sizeof(u32)); 575 576 hwq = blk_mq_unique_tag_to_hwq(tag); 577 if (hwq < nbd->tag_set.nr_hw_queues) 578 req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq], 579 blk_mq_unique_tag_to_tag(tag)); 580 if (!req || !blk_mq_request_started(req)) { 581 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n", 582 tag, req); 583 return ERR_PTR(-ENOENT); 584 } 585 cmd = blk_mq_rq_to_pdu(req); 586 if (ntohl(reply.error)) { 587 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", 588 ntohl(reply.error)); 589 cmd->status = BLK_STS_IOERR; 590 return cmd; 591 } 592 593 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", cmd); 594 if (rq_data_dir(req) != WRITE) { 595 struct req_iterator iter; 596 struct bio_vec bvec; 597 598 rq_for_each_segment(bvec, req, iter) { 599 iov_iter_bvec(&to, ITER_BVEC | READ, 600 &bvec, 1, bvec.bv_len); 601 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); 602 if (result <= 0) { 603 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", 604 result); 605 /* 606 * If we've disconnected or we only have 1 607 * connection then we need to make sure we 608 * complete this request, otherwise error out 609 * and let the timeout stuff handle resubmitting 610 * this request onto another connection. 611 */ 612 if (nbd_disconnected(config) || 613 config->num_connections <= 1) { 614 cmd->status = BLK_STS_IOERR; 615 return cmd; 616 } 617 return ERR_PTR(-EIO); 618 } 619 dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", 620 cmd, bvec.bv_len); 621 } 622 } else { 623 /* See the comment in nbd_queue_rq. */ 624 wait_for_completion(&cmd->send_complete); 625 } 626 return cmd; 627 } 628 629 static void recv_work(struct work_struct *work) 630 { 631 struct recv_thread_args *args = container_of(work, 632 struct recv_thread_args, 633 work); 634 struct nbd_device *nbd = args->nbd; 635 struct nbd_config *config = nbd->config; 636 struct nbd_cmd *cmd; 637 638 while (1) { 639 cmd = nbd_read_stat(nbd, args->index); 640 if (IS_ERR(cmd)) { 641 struct nbd_sock *nsock = config->socks[args->index]; 642 643 mutex_lock(&nsock->tx_lock); 644 nbd_mark_nsock_dead(nbd, nsock, 1); 645 mutex_unlock(&nsock->tx_lock); 646 break; 647 } 648 649 blk_mq_complete_request(blk_mq_rq_from_pdu(cmd)); 650 } 651 atomic_dec(&config->recv_threads); 652 wake_up(&config->recv_wq); 653 nbd_config_put(nbd); 654 kfree(args); 655 } 656 657 static void nbd_clear_req(struct request *req, void *data, bool reserved) 658 { 659 struct nbd_cmd *cmd; 660 661 if (!blk_mq_request_started(req)) 662 return; 663 cmd = blk_mq_rq_to_pdu(req); 664 cmd->status = BLK_STS_IOERR; 665 blk_mq_complete_request(req); 666 } 667 668 static void nbd_clear_que(struct nbd_device *nbd) 669 { 670 blk_mq_quiesce_queue(nbd->disk->queue); 671 blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL); 672 blk_mq_unquiesce_queue(nbd->disk->queue); 673 dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n"); 674 } 675 676 static int find_fallback(struct nbd_device *nbd, int index) 677 { 678 struct nbd_config *config = nbd->config; 679 int new_index = -1; 680 struct nbd_sock *nsock = config->socks[index]; 681 int fallback = nsock->fallback_index; 682 683 if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) 684 return new_index; 685 686 if (config->num_connections <= 1) { 687 dev_err_ratelimited(disk_to_dev(nbd->disk), 688 "Attempted send on invalid socket\n"); 689 return new_index; 690 } 691 692 if (fallback >= 0 && fallback < config->num_connections && 693 !config->socks[fallback]->dead) 694 return fallback; 695 696 if (nsock->fallback_index < 0 || 697 nsock->fallback_index >= config->num_connections || 698 config->socks[nsock->fallback_index]->dead) { 699 int i; 700 for (i = 0; i < config->num_connections; i++) { 701 if (i == index) 702 continue; 703 if (!config->socks[i]->dead) { 704 new_index = i; 705 break; 706 } 707 } 708 nsock->fallback_index = new_index; 709 if (new_index < 0) { 710 dev_err_ratelimited(disk_to_dev(nbd->disk), 711 "Dead connection, failed to find a fallback\n"); 712 return new_index; 713 } 714 } 715 new_index = nsock->fallback_index; 716 return new_index; 717 } 718 719 static int wait_for_reconnect(struct nbd_device *nbd) 720 { 721 struct nbd_config *config = nbd->config; 722 if (!config->dead_conn_timeout) 723 return 0; 724 if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) 725 return 0; 726 wait_event_interruptible_timeout(config->conn_wait, 727 atomic_read(&config->live_connections), 728 config->dead_conn_timeout); 729 return atomic_read(&config->live_connections); 730 } 731 732 static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) 733 { 734 struct request *req = blk_mq_rq_from_pdu(cmd); 735 struct nbd_device *nbd = cmd->nbd; 736 struct nbd_config *config; 737 struct nbd_sock *nsock; 738 int ret; 739 740 if (!refcount_inc_not_zero(&nbd->config_refs)) { 741 dev_err_ratelimited(disk_to_dev(nbd->disk), 742 "Socks array is empty\n"); 743 return -EINVAL; 744 } 745 config = nbd->config; 746 747 if (index >= config->num_connections) { 748 dev_err_ratelimited(disk_to_dev(nbd->disk), 749 "Attempted send on invalid socket\n"); 750 nbd_config_put(nbd); 751 return -EINVAL; 752 } 753 cmd->status = BLK_STS_OK; 754 again: 755 nsock = config->socks[index]; 756 mutex_lock(&nsock->tx_lock); 757 if (nsock->dead) { 758 int old_index = index; 759 index = find_fallback(nbd, index); 760 mutex_unlock(&nsock->tx_lock); 761 if (index < 0) { 762 if (wait_for_reconnect(nbd)) { 763 index = old_index; 764 goto again; 765 } 766 /* All the sockets should already be down at this point, 767 * we just want to make sure that DISCONNECTED is set so 768 * any requests that come in that were queue'ed waiting 769 * for the reconnect timer don't trigger the timer again 770 * and instead just error out. 771 */ 772 sock_shutdown(nbd); 773 nbd_config_put(nbd); 774 return -EIO; 775 } 776 goto again; 777 } 778 779 /* Handle the case that we have a pending request that was partially 780 * transmitted that _has_ to be serviced first. We need to call requeue 781 * here so that it gets put _after_ the request that is already on the 782 * dispatch list. 783 */ 784 if (unlikely(nsock->pending && nsock->pending != req)) { 785 blk_mq_requeue_request(req, true); 786 ret = 0; 787 goto out; 788 } 789 /* 790 * Some failures are related to the link going down, so anything that 791 * returns EAGAIN can be retried on a different socket. 792 */ 793 ret = nbd_send_cmd(nbd, cmd, index); 794 if (ret == -EAGAIN) { 795 dev_err_ratelimited(disk_to_dev(nbd->disk), 796 "Request send failed trying another connection\n"); 797 nbd_mark_nsock_dead(nbd, nsock, 1); 798 mutex_unlock(&nsock->tx_lock); 799 goto again; 800 } 801 out: 802 mutex_unlock(&nsock->tx_lock); 803 nbd_config_put(nbd); 804 return ret; 805 } 806 807 static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, 808 const struct blk_mq_queue_data *bd) 809 { 810 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); 811 int ret; 812 813 /* 814 * Since we look at the bio's to send the request over the network we 815 * need to make sure the completion work doesn't mark this request done 816 * before we are done doing our send. This keeps us from dereferencing 817 * freed data if we have particularly fast completions (ie we get the 818 * completion before we exit sock_xmit on the last bvec) or in the case 819 * that the server is misbehaving (or there was an error) before we're 820 * done sending everything over the wire. 821 */ 822 init_completion(&cmd->send_complete); 823 blk_mq_start_request(bd->rq); 824 825 /* We can be called directly from the user space process, which means we 826 * could possibly have signals pending so our sendmsg will fail. In 827 * this case we need to return that we are busy, otherwise error out as 828 * appropriate. 829 */ 830 ret = nbd_handle_cmd(cmd, hctx->queue_num); 831 if (ret < 0) 832 ret = BLK_STS_IOERR; 833 else if (!ret) 834 ret = BLK_STS_OK; 835 complete(&cmd->send_complete); 836 837 return ret; 838 } 839 840 static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, 841 bool netlink) 842 { 843 struct nbd_config *config = nbd->config; 844 struct socket *sock; 845 struct nbd_sock **socks; 846 struct nbd_sock *nsock; 847 int err; 848 849 sock = sockfd_lookup(arg, &err); 850 if (!sock) 851 return err; 852 853 if (!netlink && !nbd->task_setup && 854 !test_bit(NBD_BOUND, &config->runtime_flags)) 855 nbd->task_setup = current; 856 857 if (!netlink && 858 (nbd->task_setup != current || 859 test_bit(NBD_BOUND, &config->runtime_flags))) { 860 dev_err(disk_to_dev(nbd->disk), 861 "Device being setup by another task"); 862 sockfd_put(sock); 863 return -EBUSY; 864 } 865 866 socks = krealloc(config->socks, (config->num_connections + 1) * 867 sizeof(struct nbd_sock *), GFP_KERNEL); 868 if (!socks) { 869 sockfd_put(sock); 870 return -ENOMEM; 871 } 872 nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL); 873 if (!nsock) { 874 sockfd_put(sock); 875 return -ENOMEM; 876 } 877 878 config->socks = socks; 879 880 nsock->fallback_index = -1; 881 nsock->dead = false; 882 mutex_init(&nsock->tx_lock); 883 nsock->sock = sock; 884 nsock->pending = NULL; 885 nsock->sent = 0; 886 nsock->cookie = 0; 887 socks[config->num_connections++] = nsock; 888 atomic_inc(&config->live_connections); 889 890 return 0; 891 } 892 893 static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) 894 { 895 struct nbd_config *config = nbd->config; 896 struct socket *sock, *old; 897 struct recv_thread_args *args; 898 int i; 899 int err; 900 901 sock = sockfd_lookup(arg, &err); 902 if (!sock) 903 return err; 904 905 args = kzalloc(sizeof(*args), GFP_KERNEL); 906 if (!args) { 907 sockfd_put(sock); 908 return -ENOMEM; 909 } 910 911 for (i = 0; i < config->num_connections; i++) { 912 struct nbd_sock *nsock = config->socks[i]; 913 914 if (!nsock->dead) 915 continue; 916 917 mutex_lock(&nsock->tx_lock); 918 if (!nsock->dead) { 919 mutex_unlock(&nsock->tx_lock); 920 continue; 921 } 922 sk_set_memalloc(sock->sk); 923 if (nbd->tag_set.timeout) 924 sock->sk->sk_sndtimeo = nbd->tag_set.timeout; 925 atomic_inc(&config->recv_threads); 926 refcount_inc(&nbd->config_refs); 927 old = nsock->sock; 928 nsock->fallback_index = -1; 929 nsock->sock = sock; 930 nsock->dead = false; 931 INIT_WORK(&args->work, recv_work); 932 args->index = i; 933 args->nbd = nbd; 934 nsock->cookie++; 935 mutex_unlock(&nsock->tx_lock); 936 sockfd_put(old); 937 938 clear_bit(NBD_DISCONNECTED, &config->runtime_flags); 939 940 /* We take the tx_mutex in an error path in the recv_work, so we 941 * need to queue_work outside of the tx_mutex. 942 */ 943 queue_work(recv_workqueue, &args->work); 944 945 atomic_inc(&config->live_connections); 946 wake_up(&config->conn_wait); 947 return 0; 948 } 949 sockfd_put(sock); 950 kfree(args); 951 return -ENOSPC; 952 } 953 954 static void nbd_bdev_reset(struct block_device *bdev) 955 { 956 if (bdev->bd_openers > 1) 957 return; 958 bd_set_size(bdev, 0); 959 if (max_part > 0) { 960 blkdev_reread_part(bdev); 961 bdev->bd_invalidated = 1; 962 } 963 } 964 965 static void nbd_parse_flags(struct nbd_device *nbd) 966 { 967 struct nbd_config *config = nbd->config; 968 if (config->flags & NBD_FLAG_READ_ONLY) 969 set_disk_ro(nbd->disk, true); 970 else 971 set_disk_ro(nbd->disk, false); 972 if (config->flags & NBD_FLAG_SEND_TRIM) 973 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); 974 if (config->flags & NBD_FLAG_SEND_FLUSH) { 975 if (config->flags & NBD_FLAG_SEND_FUA) 976 blk_queue_write_cache(nbd->disk->queue, true, true); 977 else 978 blk_queue_write_cache(nbd->disk->queue, true, false); 979 } 980 else 981 blk_queue_write_cache(nbd->disk->queue, false, false); 982 } 983 984 static void send_disconnects(struct nbd_device *nbd) 985 { 986 struct nbd_config *config = nbd->config; 987 struct nbd_request request = { 988 .magic = htonl(NBD_REQUEST_MAGIC), 989 .type = htonl(NBD_CMD_DISC), 990 }; 991 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; 992 struct iov_iter from; 993 int i, ret; 994 995 for (i = 0; i < config->num_connections; i++) { 996 struct nbd_sock *nsock = config->socks[i]; 997 998 iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); 999 mutex_lock(&nsock->tx_lock); 1000 ret = sock_xmit(nbd, i, 1, &from, 0, NULL); 1001 if (ret <= 0) 1002 dev_err(disk_to_dev(nbd->disk), 1003 "Send disconnect failed %d\n", ret); 1004 mutex_unlock(&nsock->tx_lock); 1005 } 1006 } 1007 1008 static int nbd_disconnect(struct nbd_device *nbd) 1009 { 1010 struct nbd_config *config = nbd->config; 1011 1012 dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); 1013 set_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags); 1014 send_disconnects(nbd); 1015 return 0; 1016 } 1017 1018 static void nbd_clear_sock(struct nbd_device *nbd) 1019 { 1020 sock_shutdown(nbd); 1021 nbd_clear_que(nbd); 1022 nbd->task_setup = NULL; 1023 } 1024 1025 static void nbd_config_put(struct nbd_device *nbd) 1026 { 1027 if (refcount_dec_and_mutex_lock(&nbd->config_refs, 1028 &nbd->config_lock)) { 1029 struct nbd_config *config = nbd->config; 1030 nbd_dev_dbg_close(nbd); 1031 nbd_size_clear(nbd); 1032 if (test_and_clear_bit(NBD_HAS_PID_FILE, 1033 &config->runtime_flags)) 1034 device_remove_file(disk_to_dev(nbd->disk), &pid_attr); 1035 nbd->task_recv = NULL; 1036 nbd_clear_sock(nbd); 1037 if (config->num_connections) { 1038 int i; 1039 for (i = 0; i < config->num_connections; i++) { 1040 sockfd_put(config->socks[i]->sock); 1041 kfree(config->socks[i]); 1042 } 1043 kfree(config->socks); 1044 } 1045 kfree(nbd->config); 1046 nbd->config = NULL; 1047 1048 nbd->tag_set.timeout = 0; 1049 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); 1050 1051 mutex_unlock(&nbd->config_lock); 1052 nbd_put(nbd); 1053 module_put(THIS_MODULE); 1054 } 1055 } 1056 1057 static int nbd_start_device(struct nbd_device *nbd) 1058 { 1059 struct nbd_config *config = nbd->config; 1060 int num_connections = config->num_connections; 1061 int error = 0, i; 1062 1063 if (nbd->task_recv) 1064 return -EBUSY; 1065 if (!config->socks) 1066 return -EINVAL; 1067 if (num_connections > 1 && 1068 !(config->flags & NBD_FLAG_CAN_MULTI_CONN)) { 1069 dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n"); 1070 return -EINVAL; 1071 } 1072 1073 blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections); 1074 nbd->task_recv = current; 1075 1076 nbd_parse_flags(nbd); 1077 1078 error = device_create_file(disk_to_dev(nbd->disk), &pid_attr); 1079 if (error) { 1080 dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); 1081 return error; 1082 } 1083 set_bit(NBD_HAS_PID_FILE, &config->runtime_flags); 1084 1085 nbd_dev_dbg_init(nbd); 1086 for (i = 0; i < num_connections; i++) { 1087 struct recv_thread_args *args; 1088 1089 args = kzalloc(sizeof(*args), GFP_KERNEL); 1090 if (!args) { 1091 sock_shutdown(nbd); 1092 return -ENOMEM; 1093 } 1094 sk_set_memalloc(config->socks[i]->sock->sk); 1095 if (nbd->tag_set.timeout) 1096 config->socks[i]->sock->sk->sk_sndtimeo = 1097 nbd->tag_set.timeout; 1098 atomic_inc(&config->recv_threads); 1099 refcount_inc(&nbd->config_refs); 1100 INIT_WORK(&args->work, recv_work); 1101 args->nbd = nbd; 1102 args->index = i; 1103 queue_work(recv_workqueue, &args->work); 1104 } 1105 nbd_size_update(nbd); 1106 return error; 1107 } 1108 1109 static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev) 1110 { 1111 struct nbd_config *config = nbd->config; 1112 int ret; 1113 1114 ret = nbd_start_device(nbd); 1115 if (ret) 1116 return ret; 1117 1118 bd_set_size(bdev, config->bytesize); 1119 if (max_part) 1120 bdev->bd_invalidated = 1; 1121 mutex_unlock(&nbd->config_lock); 1122 ret = wait_event_interruptible(config->recv_wq, 1123 atomic_read(&config->recv_threads) == 0); 1124 if (ret) 1125 sock_shutdown(nbd); 1126 mutex_lock(&nbd->config_lock); 1127 bd_set_size(bdev, 0); 1128 /* user requested, ignore socket errors */ 1129 if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags)) 1130 ret = 0; 1131 if (test_bit(NBD_TIMEDOUT, &config->runtime_flags)) 1132 ret = -ETIMEDOUT; 1133 return ret; 1134 } 1135 1136 static void nbd_clear_sock_ioctl(struct nbd_device *nbd, 1137 struct block_device *bdev) 1138 { 1139 sock_shutdown(nbd); 1140 kill_bdev(bdev); 1141 nbd_bdev_reset(bdev); 1142 if (test_and_clear_bit(NBD_HAS_CONFIG_REF, 1143 &nbd->config->runtime_flags)) 1144 nbd_config_put(nbd); 1145 } 1146 1147 /* Must be called with config_lock held */ 1148 static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, 1149 unsigned int cmd, unsigned long arg) 1150 { 1151 struct nbd_config *config = nbd->config; 1152 1153 switch (cmd) { 1154 case NBD_DISCONNECT: 1155 return nbd_disconnect(nbd); 1156 case NBD_CLEAR_SOCK: 1157 nbd_clear_sock_ioctl(nbd, bdev); 1158 return 0; 1159 case NBD_SET_SOCK: 1160 return nbd_add_socket(nbd, arg, false); 1161 case NBD_SET_BLKSIZE: 1162 nbd_size_set(nbd, arg, 1163 div_s64(config->bytesize, arg)); 1164 return 0; 1165 case NBD_SET_SIZE: 1166 nbd_size_set(nbd, config->blksize, 1167 div_s64(arg, config->blksize)); 1168 return 0; 1169 case NBD_SET_SIZE_BLOCKS: 1170 nbd_size_set(nbd, config->blksize, arg); 1171 return 0; 1172 case NBD_SET_TIMEOUT: 1173 if (arg) { 1174 nbd->tag_set.timeout = arg * HZ; 1175 blk_queue_rq_timeout(nbd->disk->queue, arg * HZ); 1176 } 1177 return 0; 1178 1179 case NBD_SET_FLAGS: 1180 config->flags = arg; 1181 return 0; 1182 case NBD_DO_IT: 1183 return nbd_start_device_ioctl(nbd, bdev); 1184 case NBD_CLEAR_QUE: 1185 /* 1186 * This is for compatibility only. The queue is always cleared 1187 * by NBD_DO_IT or NBD_CLEAR_SOCK. 1188 */ 1189 return 0; 1190 case NBD_PRINT_DEBUG: 1191 /* 1192 * For compatibility only, we no longer keep a list of 1193 * outstanding requests. 1194 */ 1195 return 0; 1196 } 1197 return -ENOTTY; 1198 } 1199 1200 static int nbd_ioctl(struct block_device *bdev, fmode_t mode, 1201 unsigned int cmd, unsigned long arg) 1202 { 1203 struct nbd_device *nbd = bdev->bd_disk->private_data; 1204 struct nbd_config *config = nbd->config; 1205 int error = -EINVAL; 1206 1207 if (!capable(CAP_SYS_ADMIN)) 1208 return -EPERM; 1209 1210 /* The block layer will pass back some non-nbd ioctls in case we have 1211 * special handling for them, but we don't so just return an error. 1212 */ 1213 if (_IOC_TYPE(cmd) != 0xab) 1214 return -EINVAL; 1215 1216 mutex_lock(&nbd->config_lock); 1217 1218 /* Don't allow ioctl operations on a nbd device that was created with 1219 * netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine. 1220 */ 1221 if (!test_bit(NBD_BOUND, &config->runtime_flags) || 1222 (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK)) 1223 error = __nbd_ioctl(bdev, nbd, cmd, arg); 1224 else 1225 dev_err(nbd_to_dev(nbd), "Cannot use ioctl interface on a netlink controlled device.\n"); 1226 mutex_unlock(&nbd->config_lock); 1227 return error; 1228 } 1229 1230 static struct nbd_config *nbd_alloc_config(void) 1231 { 1232 struct nbd_config *config; 1233 1234 config = kzalloc(sizeof(struct nbd_config), GFP_NOFS); 1235 if (!config) 1236 return NULL; 1237 atomic_set(&config->recv_threads, 0); 1238 init_waitqueue_head(&config->recv_wq); 1239 init_waitqueue_head(&config->conn_wait); 1240 config->blksize = 1024; 1241 atomic_set(&config->live_connections, 0); 1242 try_module_get(THIS_MODULE); 1243 return config; 1244 } 1245 1246 static int nbd_open(struct block_device *bdev, fmode_t mode) 1247 { 1248 struct nbd_device *nbd; 1249 int ret = 0; 1250 1251 mutex_lock(&nbd_index_mutex); 1252 nbd = bdev->bd_disk->private_data; 1253 if (!nbd) { 1254 ret = -ENXIO; 1255 goto out; 1256 } 1257 if (!refcount_inc_not_zero(&nbd->refs)) { 1258 ret = -ENXIO; 1259 goto out; 1260 } 1261 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1262 struct nbd_config *config; 1263 1264 mutex_lock(&nbd->config_lock); 1265 if (refcount_inc_not_zero(&nbd->config_refs)) { 1266 mutex_unlock(&nbd->config_lock); 1267 goto out; 1268 } 1269 config = nbd->config = nbd_alloc_config(); 1270 if (!config) { 1271 ret = -ENOMEM; 1272 mutex_unlock(&nbd->config_lock); 1273 goto out; 1274 } 1275 refcount_set(&nbd->config_refs, 1); 1276 refcount_inc(&nbd->refs); 1277 mutex_unlock(&nbd->config_lock); 1278 } 1279 out: 1280 mutex_unlock(&nbd_index_mutex); 1281 return ret; 1282 } 1283 1284 static void nbd_release(struct gendisk *disk, fmode_t mode) 1285 { 1286 struct nbd_device *nbd = disk->private_data; 1287 nbd_config_put(nbd); 1288 nbd_put(nbd); 1289 } 1290 1291 static const struct block_device_operations nbd_fops = 1292 { 1293 .owner = THIS_MODULE, 1294 .open = nbd_open, 1295 .release = nbd_release, 1296 .ioctl = nbd_ioctl, 1297 .compat_ioctl = nbd_ioctl, 1298 }; 1299 1300 #if IS_ENABLED(CONFIG_DEBUG_FS) 1301 1302 static int nbd_dbg_tasks_show(struct seq_file *s, void *unused) 1303 { 1304 struct nbd_device *nbd = s->private; 1305 1306 if (nbd->task_recv) 1307 seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv)); 1308 1309 return 0; 1310 } 1311 1312 static int nbd_dbg_tasks_open(struct inode *inode, struct file *file) 1313 { 1314 return single_open(file, nbd_dbg_tasks_show, inode->i_private); 1315 } 1316 1317 static const struct file_operations nbd_dbg_tasks_ops = { 1318 .open = nbd_dbg_tasks_open, 1319 .read = seq_read, 1320 .llseek = seq_lseek, 1321 .release = single_release, 1322 }; 1323 1324 static int nbd_dbg_flags_show(struct seq_file *s, void *unused) 1325 { 1326 struct nbd_device *nbd = s->private; 1327 u32 flags = nbd->config->flags; 1328 1329 seq_printf(s, "Hex: 0x%08x\n\n", flags); 1330 1331 seq_puts(s, "Known flags:\n"); 1332 1333 if (flags & NBD_FLAG_HAS_FLAGS) 1334 seq_puts(s, "NBD_FLAG_HAS_FLAGS\n"); 1335 if (flags & NBD_FLAG_READ_ONLY) 1336 seq_puts(s, "NBD_FLAG_READ_ONLY\n"); 1337 if (flags & NBD_FLAG_SEND_FLUSH) 1338 seq_puts(s, "NBD_FLAG_SEND_FLUSH\n"); 1339 if (flags & NBD_FLAG_SEND_FUA) 1340 seq_puts(s, "NBD_FLAG_SEND_FUA\n"); 1341 if (flags & NBD_FLAG_SEND_TRIM) 1342 seq_puts(s, "NBD_FLAG_SEND_TRIM\n"); 1343 1344 return 0; 1345 } 1346 1347 static int nbd_dbg_flags_open(struct inode *inode, struct file *file) 1348 { 1349 return single_open(file, nbd_dbg_flags_show, inode->i_private); 1350 } 1351 1352 static const struct file_operations nbd_dbg_flags_ops = { 1353 .open = nbd_dbg_flags_open, 1354 .read = seq_read, 1355 .llseek = seq_lseek, 1356 .release = single_release, 1357 }; 1358 1359 static int nbd_dev_dbg_init(struct nbd_device *nbd) 1360 { 1361 struct dentry *dir; 1362 struct nbd_config *config = nbd->config; 1363 1364 if (!nbd_dbg_dir) 1365 return -EIO; 1366 1367 dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir); 1368 if (!dir) { 1369 dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n", 1370 nbd_name(nbd)); 1371 return -EIO; 1372 } 1373 config->dbg_dir = dir; 1374 1375 debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops); 1376 debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize); 1377 debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout); 1378 debugfs_create_u64("blocksize", 0444, dir, &config->blksize); 1379 debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops); 1380 1381 return 0; 1382 } 1383 1384 static void nbd_dev_dbg_close(struct nbd_device *nbd) 1385 { 1386 debugfs_remove_recursive(nbd->config->dbg_dir); 1387 } 1388 1389 static int nbd_dbg_init(void) 1390 { 1391 struct dentry *dbg_dir; 1392 1393 dbg_dir = debugfs_create_dir("nbd", NULL); 1394 if (!dbg_dir) 1395 return -EIO; 1396 1397 nbd_dbg_dir = dbg_dir; 1398 1399 return 0; 1400 } 1401 1402 static void nbd_dbg_close(void) 1403 { 1404 debugfs_remove_recursive(nbd_dbg_dir); 1405 } 1406 1407 #else /* IS_ENABLED(CONFIG_DEBUG_FS) */ 1408 1409 static int nbd_dev_dbg_init(struct nbd_device *nbd) 1410 { 1411 return 0; 1412 } 1413 1414 static void nbd_dev_dbg_close(struct nbd_device *nbd) 1415 { 1416 } 1417 1418 static int nbd_dbg_init(void) 1419 { 1420 return 0; 1421 } 1422 1423 static void nbd_dbg_close(void) 1424 { 1425 } 1426 1427 #endif 1428 1429 static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq, 1430 unsigned int hctx_idx, unsigned int numa_node) 1431 { 1432 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); 1433 cmd->nbd = set->driver_data; 1434 return 0; 1435 } 1436 1437 static const struct blk_mq_ops nbd_mq_ops = { 1438 .queue_rq = nbd_queue_rq, 1439 .complete = nbd_complete_rq, 1440 .init_request = nbd_init_request, 1441 .timeout = nbd_xmit_timeout, 1442 }; 1443 1444 static int nbd_dev_add(int index) 1445 { 1446 struct nbd_device *nbd; 1447 struct gendisk *disk; 1448 struct request_queue *q; 1449 int err = -ENOMEM; 1450 1451 nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL); 1452 if (!nbd) 1453 goto out; 1454 1455 disk = alloc_disk(1 << part_shift); 1456 if (!disk) 1457 goto out_free_nbd; 1458 1459 if (index >= 0) { 1460 err = idr_alloc(&nbd_index_idr, nbd, index, index + 1, 1461 GFP_KERNEL); 1462 if (err == -ENOSPC) 1463 err = -EEXIST; 1464 } else { 1465 err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL); 1466 if (err >= 0) 1467 index = err; 1468 } 1469 if (err < 0) 1470 goto out_free_disk; 1471 1472 nbd->index = index; 1473 nbd->disk = disk; 1474 nbd->tag_set.ops = &nbd_mq_ops; 1475 nbd->tag_set.nr_hw_queues = 1; 1476 nbd->tag_set.queue_depth = 128; 1477 nbd->tag_set.numa_node = NUMA_NO_NODE; 1478 nbd->tag_set.cmd_size = sizeof(struct nbd_cmd); 1479 nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | 1480 BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING; 1481 nbd->tag_set.driver_data = nbd; 1482 1483 err = blk_mq_alloc_tag_set(&nbd->tag_set); 1484 if (err) 1485 goto out_free_idr; 1486 1487 q = blk_mq_init_queue(&nbd->tag_set); 1488 if (IS_ERR(q)) { 1489 err = PTR_ERR(q); 1490 goto out_free_tags; 1491 } 1492 disk->queue = q; 1493 1494 /* 1495 * Tell the block layer that we are not a rotational device 1496 */ 1497 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue); 1498 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue); 1499 disk->queue->limits.discard_granularity = 512; 1500 blk_queue_max_discard_sectors(disk->queue, UINT_MAX); 1501 blk_queue_max_segment_size(disk->queue, UINT_MAX); 1502 blk_queue_max_segments(disk->queue, USHRT_MAX); 1503 blk_queue_max_hw_sectors(disk->queue, 65536); 1504 disk->queue->limits.max_sectors = 256; 1505 1506 mutex_init(&nbd->config_lock); 1507 refcount_set(&nbd->config_refs, 0); 1508 refcount_set(&nbd->refs, 1); 1509 INIT_LIST_HEAD(&nbd->list); 1510 disk->major = NBD_MAJOR; 1511 disk->first_minor = index << part_shift; 1512 disk->fops = &nbd_fops; 1513 disk->private_data = nbd; 1514 sprintf(disk->disk_name, "nbd%d", index); 1515 add_disk(disk); 1516 nbd_total_devices++; 1517 return index; 1518 1519 out_free_tags: 1520 blk_mq_free_tag_set(&nbd->tag_set); 1521 out_free_idr: 1522 idr_remove(&nbd_index_idr, index); 1523 out_free_disk: 1524 put_disk(disk); 1525 out_free_nbd: 1526 kfree(nbd); 1527 out: 1528 return err; 1529 } 1530 1531 static int find_free_cb(int id, void *ptr, void *data) 1532 { 1533 struct nbd_device *nbd = ptr; 1534 struct nbd_device **found = data; 1535 1536 if (!refcount_read(&nbd->config_refs)) { 1537 *found = nbd; 1538 return 1; 1539 } 1540 return 0; 1541 } 1542 1543 /* Netlink interface. */ 1544 static struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = { 1545 [NBD_ATTR_INDEX] = { .type = NLA_U32 }, 1546 [NBD_ATTR_SIZE_BYTES] = { .type = NLA_U64 }, 1547 [NBD_ATTR_BLOCK_SIZE_BYTES] = { .type = NLA_U64 }, 1548 [NBD_ATTR_TIMEOUT] = { .type = NLA_U64 }, 1549 [NBD_ATTR_SERVER_FLAGS] = { .type = NLA_U64 }, 1550 [NBD_ATTR_CLIENT_FLAGS] = { .type = NLA_U64 }, 1551 [NBD_ATTR_SOCKETS] = { .type = NLA_NESTED}, 1552 [NBD_ATTR_DEAD_CONN_TIMEOUT] = { .type = NLA_U64 }, 1553 [NBD_ATTR_DEVICE_LIST] = { .type = NLA_NESTED}, 1554 }; 1555 1556 static struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = { 1557 [NBD_SOCK_FD] = { .type = NLA_U32 }, 1558 }; 1559 1560 /* We don't use this right now since we don't parse the incoming list, but we 1561 * still want it here so userspace knows what to expect. 1562 */ 1563 static struct nla_policy __attribute__((unused)) 1564 nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = { 1565 [NBD_DEVICE_INDEX] = { .type = NLA_U32 }, 1566 [NBD_DEVICE_CONNECTED] = { .type = NLA_U8 }, 1567 }; 1568 1569 static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) 1570 { 1571 struct nbd_device *nbd = NULL; 1572 struct nbd_config *config; 1573 int index = -1; 1574 int ret; 1575 bool put_dev = false; 1576 1577 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1578 return -EPERM; 1579 1580 if (info->attrs[NBD_ATTR_INDEX]) 1581 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1582 if (!info->attrs[NBD_ATTR_SOCKETS]) { 1583 printk(KERN_ERR "nbd: must specify at least one socket\n"); 1584 return -EINVAL; 1585 } 1586 if (!info->attrs[NBD_ATTR_SIZE_BYTES]) { 1587 printk(KERN_ERR "nbd: must specify a size in bytes for the device\n"); 1588 return -EINVAL; 1589 } 1590 again: 1591 mutex_lock(&nbd_index_mutex); 1592 if (index == -1) { 1593 ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd); 1594 if (ret == 0) { 1595 int new_index; 1596 new_index = nbd_dev_add(-1); 1597 if (new_index < 0) { 1598 mutex_unlock(&nbd_index_mutex); 1599 printk(KERN_ERR "nbd: failed to add new device\n"); 1600 return ret; 1601 } 1602 nbd = idr_find(&nbd_index_idr, new_index); 1603 } 1604 } else { 1605 nbd = idr_find(&nbd_index_idr, index); 1606 if (!nbd) { 1607 ret = nbd_dev_add(index); 1608 if (ret < 0) { 1609 mutex_unlock(&nbd_index_mutex); 1610 printk(KERN_ERR "nbd: failed to add new device\n"); 1611 return ret; 1612 } 1613 nbd = idr_find(&nbd_index_idr, index); 1614 } 1615 } 1616 if (!nbd) { 1617 printk(KERN_ERR "nbd: couldn't find device at index %d\n", 1618 index); 1619 mutex_unlock(&nbd_index_mutex); 1620 return -EINVAL; 1621 } 1622 if (!refcount_inc_not_zero(&nbd->refs)) { 1623 mutex_unlock(&nbd_index_mutex); 1624 if (index == -1) 1625 goto again; 1626 printk(KERN_ERR "nbd: device at index %d is going down\n", 1627 index); 1628 return -EINVAL; 1629 } 1630 mutex_unlock(&nbd_index_mutex); 1631 1632 mutex_lock(&nbd->config_lock); 1633 if (refcount_read(&nbd->config_refs)) { 1634 mutex_unlock(&nbd->config_lock); 1635 nbd_put(nbd); 1636 if (index == -1) 1637 goto again; 1638 printk(KERN_ERR "nbd: nbd%d already in use\n", index); 1639 return -EBUSY; 1640 } 1641 if (WARN_ON(nbd->config)) { 1642 mutex_unlock(&nbd->config_lock); 1643 nbd_put(nbd); 1644 return -EINVAL; 1645 } 1646 config = nbd->config = nbd_alloc_config(); 1647 if (!nbd->config) { 1648 mutex_unlock(&nbd->config_lock); 1649 nbd_put(nbd); 1650 printk(KERN_ERR "nbd: couldn't allocate config\n"); 1651 return -ENOMEM; 1652 } 1653 refcount_set(&nbd->config_refs, 1); 1654 set_bit(NBD_BOUND, &config->runtime_flags); 1655 1656 if (info->attrs[NBD_ATTR_SIZE_BYTES]) { 1657 u64 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]); 1658 nbd_size_set(nbd, config->blksize, 1659 div64_u64(bytes, config->blksize)); 1660 } 1661 if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) { 1662 u64 bsize = 1663 nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]); 1664 nbd_size_set(nbd, bsize, div64_u64(config->bytesize, bsize)); 1665 } 1666 if (info->attrs[NBD_ATTR_TIMEOUT]) { 1667 u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); 1668 nbd->tag_set.timeout = timeout * HZ; 1669 blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); 1670 } 1671 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { 1672 config->dead_conn_timeout = 1673 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); 1674 config->dead_conn_timeout *= HZ; 1675 } 1676 if (info->attrs[NBD_ATTR_SERVER_FLAGS]) 1677 config->flags = 1678 nla_get_u64(info->attrs[NBD_ATTR_SERVER_FLAGS]); 1679 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { 1680 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); 1681 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { 1682 set_bit(NBD_DESTROY_ON_DISCONNECT, 1683 &config->runtime_flags); 1684 put_dev = true; 1685 } 1686 } 1687 1688 if (info->attrs[NBD_ATTR_SOCKETS]) { 1689 struct nlattr *attr; 1690 int rem, fd; 1691 1692 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], 1693 rem) { 1694 struct nlattr *socks[NBD_SOCK_MAX+1]; 1695 1696 if (nla_type(attr) != NBD_SOCK_ITEM) { 1697 printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); 1698 ret = -EINVAL; 1699 goto out; 1700 } 1701 ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, 1702 nbd_sock_policy, info->extack); 1703 if (ret != 0) { 1704 printk(KERN_ERR "nbd: error processing sock list\n"); 1705 ret = -EINVAL; 1706 goto out; 1707 } 1708 if (!socks[NBD_SOCK_FD]) 1709 continue; 1710 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); 1711 ret = nbd_add_socket(nbd, fd, true); 1712 if (ret) 1713 goto out; 1714 } 1715 } 1716 ret = nbd_start_device(nbd); 1717 out: 1718 mutex_unlock(&nbd->config_lock); 1719 if (!ret) { 1720 set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags); 1721 refcount_inc(&nbd->config_refs); 1722 nbd_connect_reply(info, nbd->index); 1723 } 1724 nbd_config_put(nbd); 1725 if (put_dev) 1726 nbd_put(nbd); 1727 return ret; 1728 } 1729 1730 static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) 1731 { 1732 struct nbd_device *nbd; 1733 int index; 1734 1735 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1736 return -EPERM; 1737 1738 if (!info->attrs[NBD_ATTR_INDEX]) { 1739 printk(KERN_ERR "nbd: must specify an index to disconnect\n"); 1740 return -EINVAL; 1741 } 1742 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1743 mutex_lock(&nbd_index_mutex); 1744 nbd = idr_find(&nbd_index_idr, index); 1745 if (!nbd) { 1746 mutex_unlock(&nbd_index_mutex); 1747 printk(KERN_ERR "nbd: couldn't find device at index %d\n", 1748 index); 1749 return -EINVAL; 1750 } 1751 if (!refcount_inc_not_zero(&nbd->refs)) { 1752 mutex_unlock(&nbd_index_mutex); 1753 printk(KERN_ERR "nbd: device at index %d is going down\n", 1754 index); 1755 return -EINVAL; 1756 } 1757 mutex_unlock(&nbd_index_mutex); 1758 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1759 nbd_put(nbd); 1760 return 0; 1761 } 1762 mutex_lock(&nbd->config_lock); 1763 nbd_disconnect(nbd); 1764 mutex_unlock(&nbd->config_lock); 1765 if (test_and_clear_bit(NBD_HAS_CONFIG_REF, 1766 &nbd->config->runtime_flags)) 1767 nbd_config_put(nbd); 1768 nbd_config_put(nbd); 1769 nbd_put(nbd); 1770 return 0; 1771 } 1772 1773 static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) 1774 { 1775 struct nbd_device *nbd = NULL; 1776 struct nbd_config *config; 1777 int index; 1778 int ret = -EINVAL; 1779 bool put_dev = false; 1780 1781 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1782 return -EPERM; 1783 1784 if (!info->attrs[NBD_ATTR_INDEX]) { 1785 printk(KERN_ERR "nbd: must specify a device to reconfigure\n"); 1786 return -EINVAL; 1787 } 1788 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1789 mutex_lock(&nbd_index_mutex); 1790 nbd = idr_find(&nbd_index_idr, index); 1791 if (!nbd) { 1792 mutex_unlock(&nbd_index_mutex); 1793 printk(KERN_ERR "nbd: couldn't find a device at index %d\n", 1794 index); 1795 return -EINVAL; 1796 } 1797 if (!refcount_inc_not_zero(&nbd->refs)) { 1798 mutex_unlock(&nbd_index_mutex); 1799 printk(KERN_ERR "nbd: device at index %d is going down\n", 1800 index); 1801 return -EINVAL; 1802 } 1803 mutex_unlock(&nbd_index_mutex); 1804 1805 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1806 dev_err(nbd_to_dev(nbd), 1807 "not configured, cannot reconfigure\n"); 1808 nbd_put(nbd); 1809 return -EINVAL; 1810 } 1811 1812 mutex_lock(&nbd->config_lock); 1813 config = nbd->config; 1814 if (!test_bit(NBD_BOUND, &config->runtime_flags) || 1815 !nbd->task_recv) { 1816 dev_err(nbd_to_dev(nbd), 1817 "not configured, cannot reconfigure\n"); 1818 goto out; 1819 } 1820 1821 if (info->attrs[NBD_ATTR_TIMEOUT]) { 1822 u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); 1823 nbd->tag_set.timeout = timeout * HZ; 1824 blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); 1825 } 1826 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { 1827 config->dead_conn_timeout = 1828 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); 1829 config->dead_conn_timeout *= HZ; 1830 } 1831 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { 1832 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); 1833 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { 1834 if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT, 1835 &config->runtime_flags)) 1836 put_dev = true; 1837 } else { 1838 if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT, 1839 &config->runtime_flags)) 1840 refcount_inc(&nbd->refs); 1841 } 1842 } 1843 1844 if (info->attrs[NBD_ATTR_SOCKETS]) { 1845 struct nlattr *attr; 1846 int rem, fd; 1847 1848 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], 1849 rem) { 1850 struct nlattr *socks[NBD_SOCK_MAX+1]; 1851 1852 if (nla_type(attr) != NBD_SOCK_ITEM) { 1853 printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); 1854 ret = -EINVAL; 1855 goto out; 1856 } 1857 ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, 1858 nbd_sock_policy, info->extack); 1859 if (ret != 0) { 1860 printk(KERN_ERR "nbd: error processing sock list\n"); 1861 ret = -EINVAL; 1862 goto out; 1863 } 1864 if (!socks[NBD_SOCK_FD]) 1865 continue; 1866 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); 1867 ret = nbd_reconnect_socket(nbd, fd); 1868 if (ret) { 1869 if (ret == -ENOSPC) 1870 ret = 0; 1871 goto out; 1872 } 1873 dev_info(nbd_to_dev(nbd), "reconnected socket\n"); 1874 } 1875 } 1876 out: 1877 mutex_unlock(&nbd->config_lock); 1878 nbd_config_put(nbd); 1879 nbd_put(nbd); 1880 if (put_dev) 1881 nbd_put(nbd); 1882 return ret; 1883 } 1884 1885 static const struct genl_ops nbd_connect_genl_ops[] = { 1886 { 1887 .cmd = NBD_CMD_CONNECT, 1888 .policy = nbd_attr_policy, 1889 .doit = nbd_genl_connect, 1890 }, 1891 { 1892 .cmd = NBD_CMD_DISCONNECT, 1893 .policy = nbd_attr_policy, 1894 .doit = nbd_genl_disconnect, 1895 }, 1896 { 1897 .cmd = NBD_CMD_RECONFIGURE, 1898 .policy = nbd_attr_policy, 1899 .doit = nbd_genl_reconfigure, 1900 }, 1901 { 1902 .cmd = NBD_CMD_STATUS, 1903 .policy = nbd_attr_policy, 1904 .doit = nbd_genl_status, 1905 }, 1906 }; 1907 1908 static const struct genl_multicast_group nbd_mcast_grps[] = { 1909 { .name = NBD_GENL_MCAST_GROUP_NAME, }, 1910 }; 1911 1912 static struct genl_family nbd_genl_family __ro_after_init = { 1913 .hdrsize = 0, 1914 .name = NBD_GENL_FAMILY_NAME, 1915 .version = NBD_GENL_VERSION, 1916 .module = THIS_MODULE, 1917 .ops = nbd_connect_genl_ops, 1918 .n_ops = ARRAY_SIZE(nbd_connect_genl_ops), 1919 .maxattr = NBD_ATTR_MAX, 1920 .mcgrps = nbd_mcast_grps, 1921 .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps), 1922 }; 1923 1924 static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply) 1925 { 1926 struct nlattr *dev_opt; 1927 u8 connected = 0; 1928 int ret; 1929 1930 /* This is a little racey, but for status it's ok. The 1931 * reason we don't take a ref here is because we can't 1932 * take a ref in the index == -1 case as we would need 1933 * to put under the nbd_index_mutex, which could 1934 * deadlock if we are configured to remove ourselves 1935 * once we're disconnected. 1936 */ 1937 if (refcount_read(&nbd->config_refs)) 1938 connected = 1; 1939 dev_opt = nla_nest_start(reply, NBD_DEVICE_ITEM); 1940 if (!dev_opt) 1941 return -EMSGSIZE; 1942 ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index); 1943 if (ret) 1944 return -EMSGSIZE; 1945 ret = nla_put_u8(reply, NBD_DEVICE_CONNECTED, 1946 connected); 1947 if (ret) 1948 return -EMSGSIZE; 1949 nla_nest_end(reply, dev_opt); 1950 return 0; 1951 } 1952 1953 static int status_cb(int id, void *ptr, void *data) 1954 { 1955 struct nbd_device *nbd = ptr; 1956 return populate_nbd_status(nbd, (struct sk_buff *)data); 1957 } 1958 1959 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) 1960 { 1961 struct nlattr *dev_list; 1962 struct sk_buff *reply; 1963 void *reply_head; 1964 size_t msg_size; 1965 int index = -1; 1966 int ret = -ENOMEM; 1967 1968 if (info->attrs[NBD_ATTR_INDEX]) 1969 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1970 1971 mutex_lock(&nbd_index_mutex); 1972 1973 msg_size = nla_total_size(nla_attr_size(sizeof(u32)) + 1974 nla_attr_size(sizeof(u8))); 1975 msg_size *= (index == -1) ? nbd_total_devices : 1; 1976 1977 reply = genlmsg_new(msg_size, GFP_KERNEL); 1978 if (!reply) 1979 goto out; 1980 reply_head = genlmsg_put_reply(reply, info, &nbd_genl_family, 0, 1981 NBD_CMD_STATUS); 1982 if (!reply_head) { 1983 nlmsg_free(reply); 1984 goto out; 1985 } 1986 1987 dev_list = nla_nest_start(reply, NBD_ATTR_DEVICE_LIST); 1988 if (index == -1) { 1989 ret = idr_for_each(&nbd_index_idr, &status_cb, reply); 1990 if (ret) { 1991 nlmsg_free(reply); 1992 goto out; 1993 } 1994 } else { 1995 struct nbd_device *nbd; 1996 nbd = idr_find(&nbd_index_idr, index); 1997 if (nbd) { 1998 ret = populate_nbd_status(nbd, reply); 1999 if (ret) { 2000 nlmsg_free(reply); 2001 goto out; 2002 } 2003 } 2004 } 2005 nla_nest_end(reply, dev_list); 2006 genlmsg_end(reply, reply_head); 2007 genlmsg_reply(reply, info); 2008 ret = 0; 2009 out: 2010 mutex_unlock(&nbd_index_mutex); 2011 return ret; 2012 } 2013 2014 static void nbd_connect_reply(struct genl_info *info, int index) 2015 { 2016 struct sk_buff *skb; 2017 void *msg_head; 2018 int ret; 2019 2020 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); 2021 if (!skb) 2022 return; 2023 msg_head = genlmsg_put_reply(skb, info, &nbd_genl_family, 0, 2024 NBD_CMD_CONNECT); 2025 if (!msg_head) { 2026 nlmsg_free(skb); 2027 return; 2028 } 2029 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); 2030 if (ret) { 2031 nlmsg_free(skb); 2032 return; 2033 } 2034 genlmsg_end(skb, msg_head); 2035 genlmsg_reply(skb, info); 2036 } 2037 2038 static void nbd_mcast_index(int index) 2039 { 2040 struct sk_buff *skb; 2041 void *msg_head; 2042 int ret; 2043 2044 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); 2045 if (!skb) 2046 return; 2047 msg_head = genlmsg_put(skb, 0, 0, &nbd_genl_family, 0, 2048 NBD_CMD_LINK_DEAD); 2049 if (!msg_head) { 2050 nlmsg_free(skb); 2051 return; 2052 } 2053 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); 2054 if (ret) { 2055 nlmsg_free(skb); 2056 return; 2057 } 2058 genlmsg_end(skb, msg_head); 2059 genlmsg_multicast(&nbd_genl_family, skb, 0, 0, GFP_KERNEL); 2060 } 2061 2062 static void nbd_dead_link_work(struct work_struct *work) 2063 { 2064 struct link_dead_args *args = container_of(work, struct link_dead_args, 2065 work); 2066 nbd_mcast_index(args->index); 2067 kfree(args); 2068 } 2069 2070 static int __init nbd_init(void) 2071 { 2072 int i; 2073 2074 BUILD_BUG_ON(sizeof(struct nbd_request) != 28); 2075 2076 if (max_part < 0) { 2077 printk(KERN_ERR "nbd: max_part must be >= 0\n"); 2078 return -EINVAL; 2079 } 2080 2081 part_shift = 0; 2082 if (max_part > 0) { 2083 part_shift = fls(max_part); 2084 2085 /* 2086 * Adjust max_part according to part_shift as it is exported 2087 * to user space so that user can know the max number of 2088 * partition kernel should be able to manage. 2089 * 2090 * Note that -1 is required because partition 0 is reserved 2091 * for the whole disk. 2092 */ 2093 max_part = (1UL << part_shift) - 1; 2094 } 2095 2096 if ((1UL << part_shift) > DISK_MAX_PARTS) 2097 return -EINVAL; 2098 2099 if (nbds_max > 1UL << (MINORBITS - part_shift)) 2100 return -EINVAL; 2101 recv_workqueue = alloc_workqueue("knbd-recv", 2102 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); 2103 if (!recv_workqueue) 2104 return -ENOMEM; 2105 2106 if (register_blkdev(NBD_MAJOR, "nbd")) { 2107 destroy_workqueue(recv_workqueue); 2108 return -EIO; 2109 } 2110 2111 if (genl_register_family(&nbd_genl_family)) { 2112 unregister_blkdev(NBD_MAJOR, "nbd"); 2113 destroy_workqueue(recv_workqueue); 2114 return -EINVAL; 2115 } 2116 nbd_dbg_init(); 2117 2118 mutex_lock(&nbd_index_mutex); 2119 for (i = 0; i < nbds_max; i++) 2120 nbd_dev_add(i); 2121 mutex_unlock(&nbd_index_mutex); 2122 return 0; 2123 } 2124 2125 static int nbd_exit_cb(int id, void *ptr, void *data) 2126 { 2127 struct list_head *list = (struct list_head *)data; 2128 struct nbd_device *nbd = ptr; 2129 2130 list_add_tail(&nbd->list, list); 2131 return 0; 2132 } 2133 2134 static void __exit nbd_cleanup(void) 2135 { 2136 struct nbd_device *nbd; 2137 LIST_HEAD(del_list); 2138 2139 nbd_dbg_close(); 2140 2141 mutex_lock(&nbd_index_mutex); 2142 idr_for_each(&nbd_index_idr, &nbd_exit_cb, &del_list); 2143 mutex_unlock(&nbd_index_mutex); 2144 2145 while (!list_empty(&del_list)) { 2146 nbd = list_first_entry(&del_list, struct nbd_device, list); 2147 list_del_init(&nbd->list); 2148 if (refcount_read(&nbd->refs) != 1) 2149 printk(KERN_ERR "nbd: possibly leaking a device\n"); 2150 nbd_put(nbd); 2151 } 2152 2153 idr_destroy(&nbd_index_idr); 2154 genl_unregister_family(&nbd_genl_family); 2155 destroy_workqueue(recv_workqueue); 2156 unregister_blkdev(NBD_MAJOR, "nbd"); 2157 } 2158 2159 module_init(nbd_init); 2160 module_exit(nbd_cleanup); 2161 2162 MODULE_DESCRIPTION("Network Block Device"); 2163 MODULE_LICENSE("GPL"); 2164 2165 module_param(nbds_max, int, 0444); 2166 MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)"); 2167 module_param(max_part, int, 0444); 2168 MODULE_PARM_DESC(max_part, "number of partitions per device (default: 16)"); 2169