1 /* 2 * Network block device - make block devices work over TCP 3 * 4 * Note that you can not swap over this thing, yet. Seems to work but 5 * deadlocks sometimes - you can not swap over TCP in general. 6 * 7 * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz> 8 * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com> 9 * 10 * This file is released under GPLv2 or later. 11 * 12 * (part of code stolen from loop.c) 13 */ 14 15 #include <linux/major.h> 16 17 #include <linux/blkdev.h> 18 #include <linux/module.h> 19 #include <linux/init.h> 20 #include <linux/sched.h> 21 #include <linux/sched/mm.h> 22 #include <linux/fs.h> 23 #include <linux/bio.h> 24 #include <linux/stat.h> 25 #include <linux/errno.h> 26 #include <linux/file.h> 27 #include <linux/ioctl.h> 28 #include <linux/mutex.h> 29 #include <linux/compiler.h> 30 #include <linux/err.h> 31 #include <linux/kernel.h> 32 #include <linux/slab.h> 33 #include <net/sock.h> 34 #include <linux/net.h> 35 #include <linux/kthread.h> 36 #include <linux/types.h> 37 #include <linux/debugfs.h> 38 #include <linux/blk-mq.h> 39 40 #include <linux/uaccess.h> 41 #include <asm/types.h> 42 43 #include <linux/nbd.h> 44 #include <linux/nbd-netlink.h> 45 #include <net/genetlink.h> 46 47 static DEFINE_IDR(nbd_index_idr); 48 static DEFINE_MUTEX(nbd_index_mutex); 49 static int nbd_total_devices = 0; 50 51 struct nbd_sock { 52 struct socket *sock; 53 struct mutex tx_lock; 54 struct request *pending; 55 int sent; 56 bool dead; 57 int fallback_index; 58 int cookie; 59 }; 60 61 struct recv_thread_args { 62 struct work_struct work; 63 struct nbd_device *nbd; 64 int index; 65 }; 66 67 struct link_dead_args { 68 struct work_struct work; 69 int index; 70 }; 71 72 #define NBD_TIMEDOUT 0 73 #define NBD_DISCONNECT_REQUESTED 1 74 #define NBD_DISCONNECTED 2 75 #define NBD_HAS_PID_FILE 3 76 #define NBD_HAS_CONFIG_REF 4 77 #define NBD_BOUND 5 78 #define NBD_DESTROY_ON_DISCONNECT 6 79 80 struct nbd_config { 81 u32 flags; 82 unsigned long runtime_flags; 83 u64 dead_conn_timeout; 84 85 struct nbd_sock **socks; 86 int num_connections; 87 atomic_t live_connections; 88 wait_queue_head_t conn_wait; 89 90 atomic_t recv_threads; 91 wait_queue_head_t recv_wq; 92 loff_t blksize; 93 loff_t bytesize; 94 #if IS_ENABLED(CONFIG_DEBUG_FS) 95 struct dentry *dbg_dir; 96 #endif 97 }; 98 99 struct nbd_device { 100 struct blk_mq_tag_set tag_set; 101 102 int index; 103 refcount_t config_refs; 104 refcount_t refs; 105 struct nbd_config *config; 106 struct mutex config_lock; 107 struct gendisk *disk; 108 109 struct list_head list; 110 struct task_struct *task_recv; 111 struct task_struct *task_setup; 112 }; 113 114 struct nbd_cmd { 115 struct nbd_device *nbd; 116 int index; 117 int cookie; 118 struct completion send_complete; 119 blk_status_t status; 120 }; 121 122 #if IS_ENABLED(CONFIG_DEBUG_FS) 123 static struct dentry *nbd_dbg_dir; 124 #endif 125 126 #define nbd_name(nbd) ((nbd)->disk->disk_name) 127 128 #define NBD_MAGIC 0x68797548 129 130 static unsigned int nbds_max = 16; 131 static int max_part = 16; 132 static struct workqueue_struct *recv_workqueue; 133 static int part_shift; 134 135 static int nbd_dev_dbg_init(struct nbd_device *nbd); 136 static void nbd_dev_dbg_close(struct nbd_device *nbd); 137 static void nbd_config_put(struct nbd_device *nbd); 138 static void nbd_connect_reply(struct genl_info *info, int index); 139 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info); 140 static void nbd_dead_link_work(struct work_struct *work); 141 142 static inline struct device *nbd_to_dev(struct nbd_device *nbd) 143 { 144 return disk_to_dev(nbd->disk); 145 } 146 147 static const char *nbdcmd_to_ascii(int cmd) 148 { 149 switch (cmd) { 150 case NBD_CMD_READ: return "read"; 151 case NBD_CMD_WRITE: return "write"; 152 case NBD_CMD_DISC: return "disconnect"; 153 case NBD_CMD_FLUSH: return "flush"; 154 case NBD_CMD_TRIM: return "trim/discard"; 155 } 156 return "invalid"; 157 } 158 159 static ssize_t pid_show(struct device *dev, 160 struct device_attribute *attr, char *buf) 161 { 162 struct gendisk *disk = dev_to_disk(dev); 163 struct nbd_device *nbd = (struct nbd_device *)disk->private_data; 164 165 return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv)); 166 } 167 168 static const struct device_attribute pid_attr = { 169 .attr = { .name = "pid", .mode = 0444}, 170 .show = pid_show, 171 }; 172 173 static void nbd_dev_remove(struct nbd_device *nbd) 174 { 175 struct gendisk *disk = nbd->disk; 176 struct request_queue *q; 177 178 if (disk) { 179 q = disk->queue; 180 del_gendisk(disk); 181 blk_cleanup_queue(q); 182 blk_mq_free_tag_set(&nbd->tag_set); 183 disk->private_data = NULL; 184 put_disk(disk); 185 } 186 kfree(nbd); 187 } 188 189 static void nbd_put(struct nbd_device *nbd) 190 { 191 if (refcount_dec_and_mutex_lock(&nbd->refs, 192 &nbd_index_mutex)) { 193 idr_remove(&nbd_index_idr, nbd->index); 194 mutex_unlock(&nbd_index_mutex); 195 nbd_dev_remove(nbd); 196 } 197 } 198 199 static int nbd_disconnected(struct nbd_config *config) 200 { 201 return test_bit(NBD_DISCONNECTED, &config->runtime_flags) || 202 test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags); 203 } 204 205 static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, 206 int notify) 207 { 208 if (!nsock->dead && notify && !nbd_disconnected(nbd->config)) { 209 struct link_dead_args *args; 210 args = kmalloc(sizeof(struct link_dead_args), GFP_NOIO); 211 if (args) { 212 INIT_WORK(&args->work, nbd_dead_link_work); 213 args->index = nbd->index; 214 queue_work(system_wq, &args->work); 215 } 216 } 217 if (!nsock->dead) { 218 kernel_sock_shutdown(nsock->sock, SHUT_RDWR); 219 if (atomic_dec_return(&nbd->config->live_connections) == 0) { 220 if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED, 221 &nbd->config->runtime_flags)) { 222 set_bit(NBD_DISCONNECTED, 223 &nbd->config->runtime_flags); 224 dev_info(nbd_to_dev(nbd), 225 "Disconnected due to user request.\n"); 226 } 227 } 228 } 229 nsock->dead = true; 230 nsock->pending = NULL; 231 nsock->sent = 0; 232 } 233 234 static void nbd_size_clear(struct nbd_device *nbd) 235 { 236 if (nbd->config->bytesize) { 237 set_capacity(nbd->disk, 0); 238 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); 239 } 240 } 241 242 static void nbd_size_update(struct nbd_device *nbd) 243 { 244 struct nbd_config *config = nbd->config; 245 struct block_device *bdev = bdget_disk(nbd->disk, 0); 246 247 if (config->flags & NBD_FLAG_SEND_TRIM) { 248 nbd->disk->queue->limits.discard_granularity = config->blksize; 249 blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); 250 } 251 blk_queue_logical_block_size(nbd->disk->queue, config->blksize); 252 blk_queue_physical_block_size(nbd->disk->queue, config->blksize); 253 set_capacity(nbd->disk, config->bytesize >> 9); 254 if (bdev) { 255 if (bdev->bd_disk) 256 bd_set_size(bdev, config->bytesize); 257 else 258 bdev->bd_invalidated = 1; 259 bdput(bdev); 260 } 261 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); 262 } 263 264 static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, 265 loff_t nr_blocks) 266 { 267 struct nbd_config *config = nbd->config; 268 config->blksize = blocksize; 269 config->bytesize = blocksize * nr_blocks; 270 if (nbd->task_recv != NULL) 271 nbd_size_update(nbd); 272 } 273 274 static void nbd_complete_rq(struct request *req) 275 { 276 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 277 278 dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", cmd, 279 cmd->status ? "failed" : "done"); 280 281 blk_mq_end_request(req, cmd->status); 282 } 283 284 /* 285 * Forcibly shutdown the socket causing all listeners to error 286 */ 287 static void sock_shutdown(struct nbd_device *nbd) 288 { 289 struct nbd_config *config = nbd->config; 290 int i; 291 292 if (config->num_connections == 0) 293 return; 294 if (test_and_set_bit(NBD_DISCONNECTED, &config->runtime_flags)) 295 return; 296 297 for (i = 0; i < config->num_connections; i++) { 298 struct nbd_sock *nsock = config->socks[i]; 299 mutex_lock(&nsock->tx_lock); 300 nbd_mark_nsock_dead(nbd, nsock, 0); 301 mutex_unlock(&nsock->tx_lock); 302 } 303 dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n"); 304 } 305 306 static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, 307 bool reserved) 308 { 309 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 310 struct nbd_device *nbd = cmd->nbd; 311 struct nbd_config *config; 312 313 if (!refcount_inc_not_zero(&nbd->config_refs)) { 314 cmd->status = BLK_STS_TIMEOUT; 315 goto done; 316 } 317 config = nbd->config; 318 319 if (config->num_connections > 1) { 320 dev_err_ratelimited(nbd_to_dev(nbd), 321 "Connection timed out, retrying (%d/%d alive)\n", 322 atomic_read(&config->live_connections), 323 config->num_connections); 324 /* 325 * Hooray we have more connections, requeue this IO, the submit 326 * path will put it on a real connection. 327 */ 328 if (config->socks && config->num_connections > 1) { 329 if (cmd->index < config->num_connections) { 330 struct nbd_sock *nsock = 331 config->socks[cmd->index]; 332 mutex_lock(&nsock->tx_lock); 333 /* We can have multiple outstanding requests, so 334 * we don't want to mark the nsock dead if we've 335 * already reconnected with a new socket, so 336 * only mark it dead if its the same socket we 337 * were sent out on. 338 */ 339 if (cmd->cookie == nsock->cookie) 340 nbd_mark_nsock_dead(nbd, nsock, 1); 341 mutex_unlock(&nsock->tx_lock); 342 } 343 blk_mq_requeue_request(req, true); 344 nbd_config_put(nbd); 345 return BLK_EH_DONE; 346 } 347 } else { 348 dev_err_ratelimited(nbd_to_dev(nbd), 349 "Connection timed out\n"); 350 } 351 set_bit(NBD_TIMEDOUT, &config->runtime_flags); 352 cmd->status = BLK_STS_IOERR; 353 sock_shutdown(nbd); 354 nbd_config_put(nbd); 355 done: 356 blk_mq_complete_request(req); 357 return BLK_EH_DONE; 358 } 359 360 /* 361 * Send or receive packet. 362 */ 363 static int sock_xmit(struct nbd_device *nbd, int index, int send, 364 struct iov_iter *iter, int msg_flags, int *sent) 365 { 366 struct nbd_config *config = nbd->config; 367 struct socket *sock = config->socks[index]->sock; 368 int result; 369 struct msghdr msg; 370 unsigned int noreclaim_flag; 371 372 if (unlikely(!sock)) { 373 dev_err_ratelimited(disk_to_dev(nbd->disk), 374 "Attempted %s on closed socket in sock_xmit\n", 375 (send ? "send" : "recv")); 376 return -EINVAL; 377 } 378 379 msg.msg_iter = *iter; 380 381 noreclaim_flag = memalloc_noreclaim_save(); 382 do { 383 sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC; 384 msg.msg_name = NULL; 385 msg.msg_namelen = 0; 386 msg.msg_control = NULL; 387 msg.msg_controllen = 0; 388 msg.msg_flags = msg_flags | MSG_NOSIGNAL; 389 390 if (send) 391 result = sock_sendmsg(sock, &msg); 392 else 393 result = sock_recvmsg(sock, &msg, msg.msg_flags); 394 395 if (result <= 0) { 396 if (result == 0) 397 result = -EPIPE; /* short read */ 398 break; 399 } 400 if (sent) 401 *sent += result; 402 } while (msg_data_left(&msg)); 403 404 memalloc_noreclaim_restore(noreclaim_flag); 405 406 return result; 407 } 408 409 /* 410 * Different settings for sk->sk_sndtimeo can result in different return values 411 * if there is a signal pending when we enter sendmsg, because reasons? 412 */ 413 static inline int was_interrupted(int result) 414 { 415 return result == -ERESTARTSYS || result == -EINTR; 416 } 417 418 /* always call with the tx_lock held */ 419 static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) 420 { 421 struct request *req = blk_mq_rq_from_pdu(cmd); 422 struct nbd_config *config = nbd->config; 423 struct nbd_sock *nsock = config->socks[index]; 424 int result; 425 struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)}; 426 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; 427 struct iov_iter from; 428 unsigned long size = blk_rq_bytes(req); 429 struct bio *bio; 430 u32 type; 431 u32 nbd_cmd_flags = 0; 432 u32 tag = blk_mq_unique_tag(req); 433 int sent = nsock->sent, skip = 0; 434 435 iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); 436 437 switch (req_op(req)) { 438 case REQ_OP_DISCARD: 439 type = NBD_CMD_TRIM; 440 break; 441 case REQ_OP_FLUSH: 442 type = NBD_CMD_FLUSH; 443 break; 444 case REQ_OP_WRITE: 445 type = NBD_CMD_WRITE; 446 break; 447 case REQ_OP_READ: 448 type = NBD_CMD_READ; 449 break; 450 default: 451 return -EIO; 452 } 453 454 if (rq_data_dir(req) == WRITE && 455 (config->flags & NBD_FLAG_READ_ONLY)) { 456 dev_err_ratelimited(disk_to_dev(nbd->disk), 457 "Write on read-only\n"); 458 return -EIO; 459 } 460 461 if (req->cmd_flags & REQ_FUA) 462 nbd_cmd_flags |= NBD_CMD_FLAG_FUA; 463 464 /* We did a partial send previously, and we at least sent the whole 465 * request struct, so just go and send the rest of the pages in the 466 * request. 467 */ 468 if (sent) { 469 if (sent >= sizeof(request)) { 470 skip = sent - sizeof(request); 471 goto send_pages; 472 } 473 iov_iter_advance(&from, sent); 474 } 475 cmd->index = index; 476 cmd->cookie = nsock->cookie; 477 request.type = htonl(type | nbd_cmd_flags); 478 if (type != NBD_CMD_FLUSH) { 479 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); 480 request.len = htonl(size); 481 } 482 memcpy(request.handle, &tag, sizeof(tag)); 483 484 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", 485 cmd, nbdcmd_to_ascii(type), 486 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); 487 result = sock_xmit(nbd, index, 1, &from, 488 (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent); 489 if (result <= 0) { 490 if (was_interrupted(result)) { 491 /* If we havne't sent anything we can just return BUSY, 492 * however if we have sent something we need to make 493 * sure we only allow this req to be sent until we are 494 * completely done. 495 */ 496 if (sent) { 497 nsock->pending = req; 498 nsock->sent = sent; 499 } 500 return BLK_STS_RESOURCE; 501 } 502 dev_err_ratelimited(disk_to_dev(nbd->disk), 503 "Send control failed (result %d)\n", result); 504 return -EAGAIN; 505 } 506 send_pages: 507 if (type != NBD_CMD_WRITE) 508 goto out; 509 510 bio = req->bio; 511 while (bio) { 512 struct bio *next = bio->bi_next; 513 struct bvec_iter iter; 514 struct bio_vec bvec; 515 516 bio_for_each_segment(bvec, bio, iter) { 517 bool is_last = !next && bio_iter_last(bvec, iter); 518 int flags = is_last ? 0 : MSG_MORE; 519 520 dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", 521 cmd, bvec.bv_len); 522 iov_iter_bvec(&from, ITER_BVEC | WRITE, 523 &bvec, 1, bvec.bv_len); 524 if (skip) { 525 if (skip >= iov_iter_count(&from)) { 526 skip -= iov_iter_count(&from); 527 continue; 528 } 529 iov_iter_advance(&from, skip); 530 skip = 0; 531 } 532 result = sock_xmit(nbd, index, 1, &from, flags, &sent); 533 if (result <= 0) { 534 if (was_interrupted(result)) { 535 /* We've already sent the header, we 536 * have no choice but to set pending and 537 * return BUSY. 538 */ 539 nsock->pending = req; 540 nsock->sent = sent; 541 return BLK_STS_RESOURCE; 542 } 543 dev_err(disk_to_dev(nbd->disk), 544 "Send data failed (result %d)\n", 545 result); 546 return -EAGAIN; 547 } 548 /* 549 * The completion might already have come in, 550 * so break for the last one instead of letting 551 * the iterator do it. This prevents use-after-free 552 * of the bio. 553 */ 554 if (is_last) 555 break; 556 } 557 bio = next; 558 } 559 out: 560 nsock->pending = NULL; 561 nsock->sent = 0; 562 return 0; 563 } 564 565 /* NULL returned = something went wrong, inform userspace */ 566 static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) 567 { 568 struct nbd_config *config = nbd->config; 569 int result; 570 struct nbd_reply reply; 571 struct nbd_cmd *cmd; 572 struct request *req = NULL; 573 u16 hwq; 574 u32 tag; 575 struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)}; 576 struct iov_iter to; 577 578 reply.magic = 0; 579 iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); 580 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); 581 if (result <= 0) { 582 if (!nbd_disconnected(config)) 583 dev_err(disk_to_dev(nbd->disk), 584 "Receive control failed (result %d)\n", result); 585 return ERR_PTR(result); 586 } 587 588 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { 589 dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n", 590 (unsigned long)ntohl(reply.magic)); 591 return ERR_PTR(-EPROTO); 592 } 593 594 memcpy(&tag, reply.handle, sizeof(u32)); 595 596 hwq = blk_mq_unique_tag_to_hwq(tag); 597 if (hwq < nbd->tag_set.nr_hw_queues) 598 req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq], 599 blk_mq_unique_tag_to_tag(tag)); 600 if (!req || !blk_mq_request_started(req)) { 601 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n", 602 tag, req); 603 return ERR_PTR(-ENOENT); 604 } 605 cmd = blk_mq_rq_to_pdu(req); 606 if (ntohl(reply.error)) { 607 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", 608 ntohl(reply.error)); 609 cmd->status = BLK_STS_IOERR; 610 return cmd; 611 } 612 613 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", cmd); 614 if (rq_data_dir(req) != WRITE) { 615 struct req_iterator iter; 616 struct bio_vec bvec; 617 618 rq_for_each_segment(bvec, req, iter) { 619 iov_iter_bvec(&to, ITER_BVEC | READ, 620 &bvec, 1, bvec.bv_len); 621 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); 622 if (result <= 0) { 623 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", 624 result); 625 /* 626 * If we've disconnected or we only have 1 627 * connection then we need to make sure we 628 * complete this request, otherwise error out 629 * and let the timeout stuff handle resubmitting 630 * this request onto another connection. 631 */ 632 if (nbd_disconnected(config) || 633 config->num_connections <= 1) { 634 cmd->status = BLK_STS_IOERR; 635 return cmd; 636 } 637 return ERR_PTR(-EIO); 638 } 639 dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", 640 cmd, bvec.bv_len); 641 } 642 } else { 643 /* See the comment in nbd_queue_rq. */ 644 wait_for_completion(&cmd->send_complete); 645 } 646 return cmd; 647 } 648 649 static void recv_work(struct work_struct *work) 650 { 651 struct recv_thread_args *args = container_of(work, 652 struct recv_thread_args, 653 work); 654 struct nbd_device *nbd = args->nbd; 655 struct nbd_config *config = nbd->config; 656 struct nbd_cmd *cmd; 657 658 while (1) { 659 cmd = nbd_read_stat(nbd, args->index); 660 if (IS_ERR(cmd)) { 661 struct nbd_sock *nsock = config->socks[args->index]; 662 663 mutex_lock(&nsock->tx_lock); 664 nbd_mark_nsock_dead(nbd, nsock, 1); 665 mutex_unlock(&nsock->tx_lock); 666 break; 667 } 668 669 blk_mq_complete_request(blk_mq_rq_from_pdu(cmd)); 670 } 671 atomic_dec(&config->recv_threads); 672 wake_up(&config->recv_wq); 673 nbd_config_put(nbd); 674 kfree(args); 675 } 676 677 static void nbd_clear_req(struct request *req, void *data, bool reserved) 678 { 679 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 680 681 cmd->status = BLK_STS_IOERR; 682 blk_mq_complete_request(req); 683 } 684 685 static void nbd_clear_que(struct nbd_device *nbd) 686 { 687 blk_mq_quiesce_queue(nbd->disk->queue); 688 blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL); 689 blk_mq_unquiesce_queue(nbd->disk->queue); 690 dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n"); 691 } 692 693 static int find_fallback(struct nbd_device *nbd, int index) 694 { 695 struct nbd_config *config = nbd->config; 696 int new_index = -1; 697 struct nbd_sock *nsock = config->socks[index]; 698 int fallback = nsock->fallback_index; 699 700 if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) 701 return new_index; 702 703 if (config->num_connections <= 1) { 704 dev_err_ratelimited(disk_to_dev(nbd->disk), 705 "Attempted send on invalid socket\n"); 706 return new_index; 707 } 708 709 if (fallback >= 0 && fallback < config->num_connections && 710 !config->socks[fallback]->dead) 711 return fallback; 712 713 if (nsock->fallback_index < 0 || 714 nsock->fallback_index >= config->num_connections || 715 config->socks[nsock->fallback_index]->dead) { 716 int i; 717 for (i = 0; i < config->num_connections; i++) { 718 if (i == index) 719 continue; 720 if (!config->socks[i]->dead) { 721 new_index = i; 722 break; 723 } 724 } 725 nsock->fallback_index = new_index; 726 if (new_index < 0) { 727 dev_err_ratelimited(disk_to_dev(nbd->disk), 728 "Dead connection, failed to find a fallback\n"); 729 return new_index; 730 } 731 } 732 new_index = nsock->fallback_index; 733 return new_index; 734 } 735 736 static int wait_for_reconnect(struct nbd_device *nbd) 737 { 738 struct nbd_config *config = nbd->config; 739 if (!config->dead_conn_timeout) 740 return 0; 741 if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) 742 return 0; 743 return wait_event_timeout(config->conn_wait, 744 atomic_read(&config->live_connections) > 0, 745 config->dead_conn_timeout) > 0; 746 } 747 748 static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) 749 { 750 struct request *req = blk_mq_rq_from_pdu(cmd); 751 struct nbd_device *nbd = cmd->nbd; 752 struct nbd_config *config; 753 struct nbd_sock *nsock; 754 int ret; 755 756 if (!refcount_inc_not_zero(&nbd->config_refs)) { 757 dev_err_ratelimited(disk_to_dev(nbd->disk), 758 "Socks array is empty\n"); 759 blk_mq_start_request(req); 760 return -EINVAL; 761 } 762 config = nbd->config; 763 764 if (index >= config->num_connections) { 765 dev_err_ratelimited(disk_to_dev(nbd->disk), 766 "Attempted send on invalid socket\n"); 767 nbd_config_put(nbd); 768 blk_mq_start_request(req); 769 return -EINVAL; 770 } 771 cmd->status = BLK_STS_OK; 772 again: 773 nsock = config->socks[index]; 774 mutex_lock(&nsock->tx_lock); 775 if (nsock->dead) { 776 int old_index = index; 777 index = find_fallback(nbd, index); 778 mutex_unlock(&nsock->tx_lock); 779 if (index < 0) { 780 if (wait_for_reconnect(nbd)) { 781 index = old_index; 782 goto again; 783 } 784 /* All the sockets should already be down at this point, 785 * we just want to make sure that DISCONNECTED is set so 786 * any requests that come in that were queue'ed waiting 787 * for the reconnect timer don't trigger the timer again 788 * and instead just error out. 789 */ 790 sock_shutdown(nbd); 791 nbd_config_put(nbd); 792 blk_mq_start_request(req); 793 return -EIO; 794 } 795 goto again; 796 } 797 798 /* Handle the case that we have a pending request that was partially 799 * transmitted that _has_ to be serviced first. We need to call requeue 800 * here so that it gets put _after_ the request that is already on the 801 * dispatch list. 802 */ 803 blk_mq_start_request(req); 804 if (unlikely(nsock->pending && nsock->pending != req)) { 805 blk_mq_requeue_request(req, true); 806 ret = 0; 807 goto out; 808 } 809 /* 810 * Some failures are related to the link going down, so anything that 811 * returns EAGAIN can be retried on a different socket. 812 */ 813 ret = nbd_send_cmd(nbd, cmd, index); 814 if (ret == -EAGAIN) { 815 dev_err_ratelimited(disk_to_dev(nbd->disk), 816 "Request send failed, requeueing\n"); 817 nbd_mark_nsock_dead(nbd, nsock, 1); 818 blk_mq_requeue_request(req, true); 819 ret = 0; 820 } 821 out: 822 mutex_unlock(&nsock->tx_lock); 823 nbd_config_put(nbd); 824 return ret; 825 } 826 827 static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, 828 const struct blk_mq_queue_data *bd) 829 { 830 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); 831 int ret; 832 833 /* 834 * Since we look at the bio's to send the request over the network we 835 * need to make sure the completion work doesn't mark this request done 836 * before we are done doing our send. This keeps us from dereferencing 837 * freed data if we have particularly fast completions (ie we get the 838 * completion before we exit sock_xmit on the last bvec) or in the case 839 * that the server is misbehaving (or there was an error) before we're 840 * done sending everything over the wire. 841 */ 842 init_completion(&cmd->send_complete); 843 844 /* We can be called directly from the user space process, which means we 845 * could possibly have signals pending so our sendmsg will fail. In 846 * this case we need to return that we are busy, otherwise error out as 847 * appropriate. 848 */ 849 ret = nbd_handle_cmd(cmd, hctx->queue_num); 850 if (ret < 0) 851 ret = BLK_STS_IOERR; 852 else if (!ret) 853 ret = BLK_STS_OK; 854 complete(&cmd->send_complete); 855 856 return ret; 857 } 858 859 static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, 860 bool netlink) 861 { 862 struct nbd_config *config = nbd->config; 863 struct socket *sock; 864 struct nbd_sock **socks; 865 struct nbd_sock *nsock; 866 int err; 867 868 sock = sockfd_lookup(arg, &err); 869 if (!sock) 870 return err; 871 872 if (!netlink && !nbd->task_setup && 873 !test_bit(NBD_BOUND, &config->runtime_flags)) 874 nbd->task_setup = current; 875 876 if (!netlink && 877 (nbd->task_setup != current || 878 test_bit(NBD_BOUND, &config->runtime_flags))) { 879 dev_err(disk_to_dev(nbd->disk), 880 "Device being setup by another task"); 881 sockfd_put(sock); 882 return -EBUSY; 883 } 884 885 socks = krealloc(config->socks, (config->num_connections + 1) * 886 sizeof(struct nbd_sock *), GFP_KERNEL); 887 if (!socks) { 888 sockfd_put(sock); 889 return -ENOMEM; 890 } 891 nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL); 892 if (!nsock) { 893 sockfd_put(sock); 894 return -ENOMEM; 895 } 896 897 config->socks = socks; 898 899 nsock->fallback_index = -1; 900 nsock->dead = false; 901 mutex_init(&nsock->tx_lock); 902 nsock->sock = sock; 903 nsock->pending = NULL; 904 nsock->sent = 0; 905 nsock->cookie = 0; 906 socks[config->num_connections++] = nsock; 907 atomic_inc(&config->live_connections); 908 909 return 0; 910 } 911 912 static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) 913 { 914 struct nbd_config *config = nbd->config; 915 struct socket *sock, *old; 916 struct recv_thread_args *args; 917 int i; 918 int err; 919 920 sock = sockfd_lookup(arg, &err); 921 if (!sock) 922 return err; 923 924 args = kzalloc(sizeof(*args), GFP_KERNEL); 925 if (!args) { 926 sockfd_put(sock); 927 return -ENOMEM; 928 } 929 930 for (i = 0; i < config->num_connections; i++) { 931 struct nbd_sock *nsock = config->socks[i]; 932 933 if (!nsock->dead) 934 continue; 935 936 mutex_lock(&nsock->tx_lock); 937 if (!nsock->dead) { 938 mutex_unlock(&nsock->tx_lock); 939 continue; 940 } 941 sk_set_memalloc(sock->sk); 942 if (nbd->tag_set.timeout) 943 sock->sk->sk_sndtimeo = nbd->tag_set.timeout; 944 atomic_inc(&config->recv_threads); 945 refcount_inc(&nbd->config_refs); 946 old = nsock->sock; 947 nsock->fallback_index = -1; 948 nsock->sock = sock; 949 nsock->dead = false; 950 INIT_WORK(&args->work, recv_work); 951 args->index = i; 952 args->nbd = nbd; 953 nsock->cookie++; 954 mutex_unlock(&nsock->tx_lock); 955 sockfd_put(old); 956 957 clear_bit(NBD_DISCONNECTED, &config->runtime_flags); 958 959 /* We take the tx_mutex in an error path in the recv_work, so we 960 * need to queue_work outside of the tx_mutex. 961 */ 962 queue_work(recv_workqueue, &args->work); 963 964 atomic_inc(&config->live_connections); 965 wake_up(&config->conn_wait); 966 return 0; 967 } 968 sockfd_put(sock); 969 kfree(args); 970 return -ENOSPC; 971 } 972 973 static void nbd_bdev_reset(struct block_device *bdev) 974 { 975 if (bdev->bd_openers > 1) 976 return; 977 bd_set_size(bdev, 0); 978 } 979 980 static void nbd_parse_flags(struct nbd_device *nbd) 981 { 982 struct nbd_config *config = nbd->config; 983 if (config->flags & NBD_FLAG_READ_ONLY) 984 set_disk_ro(nbd->disk, true); 985 else 986 set_disk_ro(nbd->disk, false); 987 if (config->flags & NBD_FLAG_SEND_TRIM) 988 blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue); 989 if (config->flags & NBD_FLAG_SEND_FLUSH) { 990 if (config->flags & NBD_FLAG_SEND_FUA) 991 blk_queue_write_cache(nbd->disk->queue, true, true); 992 else 993 blk_queue_write_cache(nbd->disk->queue, true, false); 994 } 995 else 996 blk_queue_write_cache(nbd->disk->queue, false, false); 997 } 998 999 static void send_disconnects(struct nbd_device *nbd) 1000 { 1001 struct nbd_config *config = nbd->config; 1002 struct nbd_request request = { 1003 .magic = htonl(NBD_REQUEST_MAGIC), 1004 .type = htonl(NBD_CMD_DISC), 1005 }; 1006 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; 1007 struct iov_iter from; 1008 int i, ret; 1009 1010 for (i = 0; i < config->num_connections; i++) { 1011 struct nbd_sock *nsock = config->socks[i]; 1012 1013 iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); 1014 mutex_lock(&nsock->tx_lock); 1015 ret = sock_xmit(nbd, i, 1, &from, 0, NULL); 1016 if (ret <= 0) 1017 dev_err(disk_to_dev(nbd->disk), 1018 "Send disconnect failed %d\n", ret); 1019 mutex_unlock(&nsock->tx_lock); 1020 } 1021 } 1022 1023 static int nbd_disconnect(struct nbd_device *nbd) 1024 { 1025 struct nbd_config *config = nbd->config; 1026 1027 dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); 1028 set_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags); 1029 send_disconnects(nbd); 1030 return 0; 1031 } 1032 1033 static void nbd_clear_sock(struct nbd_device *nbd) 1034 { 1035 sock_shutdown(nbd); 1036 nbd_clear_que(nbd); 1037 nbd->task_setup = NULL; 1038 } 1039 1040 static void nbd_config_put(struct nbd_device *nbd) 1041 { 1042 if (refcount_dec_and_mutex_lock(&nbd->config_refs, 1043 &nbd->config_lock)) { 1044 struct nbd_config *config = nbd->config; 1045 nbd_dev_dbg_close(nbd); 1046 nbd_size_clear(nbd); 1047 if (test_and_clear_bit(NBD_HAS_PID_FILE, 1048 &config->runtime_flags)) 1049 device_remove_file(disk_to_dev(nbd->disk), &pid_attr); 1050 nbd->task_recv = NULL; 1051 nbd_clear_sock(nbd); 1052 if (config->num_connections) { 1053 int i; 1054 for (i = 0; i < config->num_connections; i++) { 1055 sockfd_put(config->socks[i]->sock); 1056 kfree(config->socks[i]); 1057 } 1058 kfree(config->socks); 1059 } 1060 kfree(nbd->config); 1061 nbd->config = NULL; 1062 1063 nbd->tag_set.timeout = 0; 1064 nbd->disk->queue->limits.discard_granularity = 0; 1065 blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); 1066 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue); 1067 1068 mutex_unlock(&nbd->config_lock); 1069 nbd_put(nbd); 1070 module_put(THIS_MODULE); 1071 } 1072 } 1073 1074 static int nbd_start_device(struct nbd_device *nbd) 1075 { 1076 struct nbd_config *config = nbd->config; 1077 int num_connections = config->num_connections; 1078 int error = 0, i; 1079 1080 if (nbd->task_recv) 1081 return -EBUSY; 1082 if (!config->socks) 1083 return -EINVAL; 1084 if (num_connections > 1 && 1085 !(config->flags & NBD_FLAG_CAN_MULTI_CONN)) { 1086 dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n"); 1087 return -EINVAL; 1088 } 1089 1090 blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections); 1091 nbd->task_recv = current; 1092 1093 nbd_parse_flags(nbd); 1094 1095 error = device_create_file(disk_to_dev(nbd->disk), &pid_attr); 1096 if (error) { 1097 dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); 1098 return error; 1099 } 1100 set_bit(NBD_HAS_PID_FILE, &config->runtime_flags); 1101 1102 nbd_dev_dbg_init(nbd); 1103 for (i = 0; i < num_connections; i++) { 1104 struct recv_thread_args *args; 1105 1106 args = kzalloc(sizeof(*args), GFP_KERNEL); 1107 if (!args) { 1108 sock_shutdown(nbd); 1109 return -ENOMEM; 1110 } 1111 sk_set_memalloc(config->socks[i]->sock->sk); 1112 if (nbd->tag_set.timeout) 1113 config->socks[i]->sock->sk->sk_sndtimeo = 1114 nbd->tag_set.timeout; 1115 atomic_inc(&config->recv_threads); 1116 refcount_inc(&nbd->config_refs); 1117 INIT_WORK(&args->work, recv_work); 1118 args->nbd = nbd; 1119 args->index = i; 1120 queue_work(recv_workqueue, &args->work); 1121 } 1122 nbd_size_update(nbd); 1123 return error; 1124 } 1125 1126 static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev) 1127 { 1128 struct nbd_config *config = nbd->config; 1129 int ret; 1130 1131 ret = nbd_start_device(nbd); 1132 if (ret) 1133 return ret; 1134 1135 if (max_part) 1136 bdev->bd_invalidated = 1; 1137 mutex_unlock(&nbd->config_lock); 1138 ret = wait_event_interruptible(config->recv_wq, 1139 atomic_read(&config->recv_threads) == 0); 1140 if (ret) 1141 sock_shutdown(nbd); 1142 mutex_lock(&nbd->config_lock); 1143 nbd_bdev_reset(bdev); 1144 /* user requested, ignore socket errors */ 1145 if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags)) 1146 ret = 0; 1147 if (test_bit(NBD_TIMEDOUT, &config->runtime_flags)) 1148 ret = -ETIMEDOUT; 1149 return ret; 1150 } 1151 1152 static void nbd_clear_sock_ioctl(struct nbd_device *nbd, 1153 struct block_device *bdev) 1154 { 1155 sock_shutdown(nbd); 1156 kill_bdev(bdev); 1157 nbd_bdev_reset(bdev); 1158 if (test_and_clear_bit(NBD_HAS_CONFIG_REF, 1159 &nbd->config->runtime_flags)) 1160 nbd_config_put(nbd); 1161 } 1162 1163 /* Must be called with config_lock held */ 1164 static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, 1165 unsigned int cmd, unsigned long arg) 1166 { 1167 struct nbd_config *config = nbd->config; 1168 1169 switch (cmd) { 1170 case NBD_DISCONNECT: 1171 return nbd_disconnect(nbd); 1172 case NBD_CLEAR_SOCK: 1173 nbd_clear_sock_ioctl(nbd, bdev); 1174 return 0; 1175 case NBD_SET_SOCK: 1176 return nbd_add_socket(nbd, arg, false); 1177 case NBD_SET_BLKSIZE: 1178 nbd_size_set(nbd, arg, 1179 div_s64(config->bytesize, arg)); 1180 return 0; 1181 case NBD_SET_SIZE: 1182 nbd_size_set(nbd, config->blksize, 1183 div_s64(arg, config->blksize)); 1184 return 0; 1185 case NBD_SET_SIZE_BLOCKS: 1186 nbd_size_set(nbd, config->blksize, arg); 1187 return 0; 1188 case NBD_SET_TIMEOUT: 1189 if (arg) { 1190 nbd->tag_set.timeout = arg * HZ; 1191 blk_queue_rq_timeout(nbd->disk->queue, arg * HZ); 1192 } 1193 return 0; 1194 1195 case NBD_SET_FLAGS: 1196 config->flags = arg; 1197 return 0; 1198 case NBD_DO_IT: 1199 return nbd_start_device_ioctl(nbd, bdev); 1200 case NBD_CLEAR_QUE: 1201 /* 1202 * This is for compatibility only. The queue is always cleared 1203 * by NBD_DO_IT or NBD_CLEAR_SOCK. 1204 */ 1205 return 0; 1206 case NBD_PRINT_DEBUG: 1207 /* 1208 * For compatibility only, we no longer keep a list of 1209 * outstanding requests. 1210 */ 1211 return 0; 1212 } 1213 return -ENOTTY; 1214 } 1215 1216 static int nbd_ioctl(struct block_device *bdev, fmode_t mode, 1217 unsigned int cmd, unsigned long arg) 1218 { 1219 struct nbd_device *nbd = bdev->bd_disk->private_data; 1220 struct nbd_config *config = nbd->config; 1221 int error = -EINVAL; 1222 1223 if (!capable(CAP_SYS_ADMIN)) 1224 return -EPERM; 1225 1226 /* The block layer will pass back some non-nbd ioctls in case we have 1227 * special handling for them, but we don't so just return an error. 1228 */ 1229 if (_IOC_TYPE(cmd) != 0xab) 1230 return -EINVAL; 1231 1232 mutex_lock(&nbd->config_lock); 1233 1234 /* Don't allow ioctl operations on a nbd device that was created with 1235 * netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine. 1236 */ 1237 if (!test_bit(NBD_BOUND, &config->runtime_flags) || 1238 (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK)) 1239 error = __nbd_ioctl(bdev, nbd, cmd, arg); 1240 else 1241 dev_err(nbd_to_dev(nbd), "Cannot use ioctl interface on a netlink controlled device.\n"); 1242 mutex_unlock(&nbd->config_lock); 1243 return error; 1244 } 1245 1246 static struct nbd_config *nbd_alloc_config(void) 1247 { 1248 struct nbd_config *config; 1249 1250 config = kzalloc(sizeof(struct nbd_config), GFP_NOFS); 1251 if (!config) 1252 return NULL; 1253 atomic_set(&config->recv_threads, 0); 1254 init_waitqueue_head(&config->recv_wq); 1255 init_waitqueue_head(&config->conn_wait); 1256 config->blksize = 1024; 1257 atomic_set(&config->live_connections, 0); 1258 try_module_get(THIS_MODULE); 1259 return config; 1260 } 1261 1262 static int nbd_open(struct block_device *bdev, fmode_t mode) 1263 { 1264 struct nbd_device *nbd; 1265 int ret = 0; 1266 1267 mutex_lock(&nbd_index_mutex); 1268 nbd = bdev->bd_disk->private_data; 1269 if (!nbd) { 1270 ret = -ENXIO; 1271 goto out; 1272 } 1273 if (!refcount_inc_not_zero(&nbd->refs)) { 1274 ret = -ENXIO; 1275 goto out; 1276 } 1277 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1278 struct nbd_config *config; 1279 1280 mutex_lock(&nbd->config_lock); 1281 if (refcount_inc_not_zero(&nbd->config_refs)) { 1282 mutex_unlock(&nbd->config_lock); 1283 goto out; 1284 } 1285 config = nbd->config = nbd_alloc_config(); 1286 if (!config) { 1287 ret = -ENOMEM; 1288 mutex_unlock(&nbd->config_lock); 1289 goto out; 1290 } 1291 refcount_set(&nbd->config_refs, 1); 1292 refcount_inc(&nbd->refs); 1293 mutex_unlock(&nbd->config_lock); 1294 bdev->bd_invalidated = 1; 1295 } else if (nbd_disconnected(nbd->config)) { 1296 bdev->bd_invalidated = 1; 1297 } 1298 out: 1299 mutex_unlock(&nbd_index_mutex); 1300 return ret; 1301 } 1302 1303 static void nbd_release(struct gendisk *disk, fmode_t mode) 1304 { 1305 struct nbd_device *nbd = disk->private_data; 1306 nbd_config_put(nbd); 1307 nbd_put(nbd); 1308 } 1309 1310 static const struct block_device_operations nbd_fops = 1311 { 1312 .owner = THIS_MODULE, 1313 .open = nbd_open, 1314 .release = nbd_release, 1315 .ioctl = nbd_ioctl, 1316 .compat_ioctl = nbd_ioctl, 1317 }; 1318 1319 #if IS_ENABLED(CONFIG_DEBUG_FS) 1320 1321 static int nbd_dbg_tasks_show(struct seq_file *s, void *unused) 1322 { 1323 struct nbd_device *nbd = s->private; 1324 1325 if (nbd->task_recv) 1326 seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv)); 1327 1328 return 0; 1329 } 1330 1331 static int nbd_dbg_tasks_open(struct inode *inode, struct file *file) 1332 { 1333 return single_open(file, nbd_dbg_tasks_show, inode->i_private); 1334 } 1335 1336 static const struct file_operations nbd_dbg_tasks_ops = { 1337 .open = nbd_dbg_tasks_open, 1338 .read = seq_read, 1339 .llseek = seq_lseek, 1340 .release = single_release, 1341 }; 1342 1343 static int nbd_dbg_flags_show(struct seq_file *s, void *unused) 1344 { 1345 struct nbd_device *nbd = s->private; 1346 u32 flags = nbd->config->flags; 1347 1348 seq_printf(s, "Hex: 0x%08x\n\n", flags); 1349 1350 seq_puts(s, "Known flags:\n"); 1351 1352 if (flags & NBD_FLAG_HAS_FLAGS) 1353 seq_puts(s, "NBD_FLAG_HAS_FLAGS\n"); 1354 if (flags & NBD_FLAG_READ_ONLY) 1355 seq_puts(s, "NBD_FLAG_READ_ONLY\n"); 1356 if (flags & NBD_FLAG_SEND_FLUSH) 1357 seq_puts(s, "NBD_FLAG_SEND_FLUSH\n"); 1358 if (flags & NBD_FLAG_SEND_FUA) 1359 seq_puts(s, "NBD_FLAG_SEND_FUA\n"); 1360 if (flags & NBD_FLAG_SEND_TRIM) 1361 seq_puts(s, "NBD_FLAG_SEND_TRIM\n"); 1362 1363 return 0; 1364 } 1365 1366 static int nbd_dbg_flags_open(struct inode *inode, struct file *file) 1367 { 1368 return single_open(file, nbd_dbg_flags_show, inode->i_private); 1369 } 1370 1371 static const struct file_operations nbd_dbg_flags_ops = { 1372 .open = nbd_dbg_flags_open, 1373 .read = seq_read, 1374 .llseek = seq_lseek, 1375 .release = single_release, 1376 }; 1377 1378 static int nbd_dev_dbg_init(struct nbd_device *nbd) 1379 { 1380 struct dentry *dir; 1381 struct nbd_config *config = nbd->config; 1382 1383 if (!nbd_dbg_dir) 1384 return -EIO; 1385 1386 dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir); 1387 if (!dir) { 1388 dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n", 1389 nbd_name(nbd)); 1390 return -EIO; 1391 } 1392 config->dbg_dir = dir; 1393 1394 debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops); 1395 debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize); 1396 debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout); 1397 debugfs_create_u64("blocksize", 0444, dir, &config->blksize); 1398 debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops); 1399 1400 return 0; 1401 } 1402 1403 static void nbd_dev_dbg_close(struct nbd_device *nbd) 1404 { 1405 debugfs_remove_recursive(nbd->config->dbg_dir); 1406 } 1407 1408 static int nbd_dbg_init(void) 1409 { 1410 struct dentry *dbg_dir; 1411 1412 dbg_dir = debugfs_create_dir("nbd", NULL); 1413 if (!dbg_dir) 1414 return -EIO; 1415 1416 nbd_dbg_dir = dbg_dir; 1417 1418 return 0; 1419 } 1420 1421 static void nbd_dbg_close(void) 1422 { 1423 debugfs_remove_recursive(nbd_dbg_dir); 1424 } 1425 1426 #else /* IS_ENABLED(CONFIG_DEBUG_FS) */ 1427 1428 static int nbd_dev_dbg_init(struct nbd_device *nbd) 1429 { 1430 return 0; 1431 } 1432 1433 static void nbd_dev_dbg_close(struct nbd_device *nbd) 1434 { 1435 } 1436 1437 static int nbd_dbg_init(void) 1438 { 1439 return 0; 1440 } 1441 1442 static void nbd_dbg_close(void) 1443 { 1444 } 1445 1446 #endif 1447 1448 static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq, 1449 unsigned int hctx_idx, unsigned int numa_node) 1450 { 1451 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); 1452 cmd->nbd = set->driver_data; 1453 return 0; 1454 } 1455 1456 static const struct blk_mq_ops nbd_mq_ops = { 1457 .queue_rq = nbd_queue_rq, 1458 .complete = nbd_complete_rq, 1459 .init_request = nbd_init_request, 1460 .timeout = nbd_xmit_timeout, 1461 }; 1462 1463 static int nbd_dev_add(int index) 1464 { 1465 struct nbd_device *nbd; 1466 struct gendisk *disk; 1467 struct request_queue *q; 1468 int err = -ENOMEM; 1469 1470 nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL); 1471 if (!nbd) 1472 goto out; 1473 1474 disk = alloc_disk(1 << part_shift); 1475 if (!disk) 1476 goto out_free_nbd; 1477 1478 if (index >= 0) { 1479 err = idr_alloc(&nbd_index_idr, nbd, index, index + 1, 1480 GFP_KERNEL); 1481 if (err == -ENOSPC) 1482 err = -EEXIST; 1483 } else { 1484 err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL); 1485 if (err >= 0) 1486 index = err; 1487 } 1488 if (err < 0) 1489 goto out_free_disk; 1490 1491 nbd->index = index; 1492 nbd->disk = disk; 1493 nbd->tag_set.ops = &nbd_mq_ops; 1494 nbd->tag_set.nr_hw_queues = 1; 1495 nbd->tag_set.queue_depth = 128; 1496 nbd->tag_set.numa_node = NUMA_NO_NODE; 1497 nbd->tag_set.cmd_size = sizeof(struct nbd_cmd); 1498 nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | 1499 BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING; 1500 nbd->tag_set.driver_data = nbd; 1501 1502 err = blk_mq_alloc_tag_set(&nbd->tag_set); 1503 if (err) 1504 goto out_free_idr; 1505 1506 q = blk_mq_init_queue(&nbd->tag_set); 1507 if (IS_ERR(q)) { 1508 err = PTR_ERR(q); 1509 goto out_free_tags; 1510 } 1511 disk->queue = q; 1512 1513 /* 1514 * Tell the block layer that we are not a rotational device 1515 */ 1516 blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); 1517 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); 1518 disk->queue->limits.discard_granularity = 0; 1519 blk_queue_max_discard_sectors(disk->queue, 0); 1520 blk_queue_max_segment_size(disk->queue, UINT_MAX); 1521 blk_queue_max_segments(disk->queue, USHRT_MAX); 1522 blk_queue_max_hw_sectors(disk->queue, 65536); 1523 disk->queue->limits.max_sectors = 256; 1524 1525 mutex_init(&nbd->config_lock); 1526 refcount_set(&nbd->config_refs, 0); 1527 refcount_set(&nbd->refs, 1); 1528 INIT_LIST_HEAD(&nbd->list); 1529 disk->major = NBD_MAJOR; 1530 disk->first_minor = index << part_shift; 1531 disk->fops = &nbd_fops; 1532 disk->private_data = nbd; 1533 sprintf(disk->disk_name, "nbd%d", index); 1534 add_disk(disk); 1535 nbd_total_devices++; 1536 return index; 1537 1538 out_free_tags: 1539 blk_mq_free_tag_set(&nbd->tag_set); 1540 out_free_idr: 1541 idr_remove(&nbd_index_idr, index); 1542 out_free_disk: 1543 put_disk(disk); 1544 out_free_nbd: 1545 kfree(nbd); 1546 out: 1547 return err; 1548 } 1549 1550 static int find_free_cb(int id, void *ptr, void *data) 1551 { 1552 struct nbd_device *nbd = ptr; 1553 struct nbd_device **found = data; 1554 1555 if (!refcount_read(&nbd->config_refs)) { 1556 *found = nbd; 1557 return 1; 1558 } 1559 return 0; 1560 } 1561 1562 /* Netlink interface. */ 1563 static struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = { 1564 [NBD_ATTR_INDEX] = { .type = NLA_U32 }, 1565 [NBD_ATTR_SIZE_BYTES] = { .type = NLA_U64 }, 1566 [NBD_ATTR_BLOCK_SIZE_BYTES] = { .type = NLA_U64 }, 1567 [NBD_ATTR_TIMEOUT] = { .type = NLA_U64 }, 1568 [NBD_ATTR_SERVER_FLAGS] = { .type = NLA_U64 }, 1569 [NBD_ATTR_CLIENT_FLAGS] = { .type = NLA_U64 }, 1570 [NBD_ATTR_SOCKETS] = { .type = NLA_NESTED}, 1571 [NBD_ATTR_DEAD_CONN_TIMEOUT] = { .type = NLA_U64 }, 1572 [NBD_ATTR_DEVICE_LIST] = { .type = NLA_NESTED}, 1573 }; 1574 1575 static struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = { 1576 [NBD_SOCK_FD] = { .type = NLA_U32 }, 1577 }; 1578 1579 /* We don't use this right now since we don't parse the incoming list, but we 1580 * still want it here so userspace knows what to expect. 1581 */ 1582 static struct nla_policy __attribute__((unused)) 1583 nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = { 1584 [NBD_DEVICE_INDEX] = { .type = NLA_U32 }, 1585 [NBD_DEVICE_CONNECTED] = { .type = NLA_U8 }, 1586 }; 1587 1588 static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) 1589 { 1590 struct nbd_device *nbd = NULL; 1591 struct nbd_config *config; 1592 int index = -1; 1593 int ret; 1594 bool put_dev = false; 1595 1596 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1597 return -EPERM; 1598 1599 if (info->attrs[NBD_ATTR_INDEX]) 1600 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1601 if (!info->attrs[NBD_ATTR_SOCKETS]) { 1602 printk(KERN_ERR "nbd: must specify at least one socket\n"); 1603 return -EINVAL; 1604 } 1605 if (!info->attrs[NBD_ATTR_SIZE_BYTES]) { 1606 printk(KERN_ERR "nbd: must specify a size in bytes for the device\n"); 1607 return -EINVAL; 1608 } 1609 again: 1610 mutex_lock(&nbd_index_mutex); 1611 if (index == -1) { 1612 ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd); 1613 if (ret == 0) { 1614 int new_index; 1615 new_index = nbd_dev_add(-1); 1616 if (new_index < 0) { 1617 mutex_unlock(&nbd_index_mutex); 1618 printk(KERN_ERR "nbd: failed to add new device\n"); 1619 return new_index; 1620 } 1621 nbd = idr_find(&nbd_index_idr, new_index); 1622 } 1623 } else { 1624 nbd = idr_find(&nbd_index_idr, index); 1625 if (!nbd) { 1626 ret = nbd_dev_add(index); 1627 if (ret < 0) { 1628 mutex_unlock(&nbd_index_mutex); 1629 printk(KERN_ERR "nbd: failed to add new device\n"); 1630 return ret; 1631 } 1632 nbd = idr_find(&nbd_index_idr, index); 1633 } 1634 } 1635 if (!nbd) { 1636 printk(KERN_ERR "nbd: couldn't find device at index %d\n", 1637 index); 1638 mutex_unlock(&nbd_index_mutex); 1639 return -EINVAL; 1640 } 1641 if (!refcount_inc_not_zero(&nbd->refs)) { 1642 mutex_unlock(&nbd_index_mutex); 1643 if (index == -1) 1644 goto again; 1645 printk(KERN_ERR "nbd: device at index %d is going down\n", 1646 index); 1647 return -EINVAL; 1648 } 1649 mutex_unlock(&nbd_index_mutex); 1650 1651 mutex_lock(&nbd->config_lock); 1652 if (refcount_read(&nbd->config_refs)) { 1653 mutex_unlock(&nbd->config_lock); 1654 nbd_put(nbd); 1655 if (index == -1) 1656 goto again; 1657 printk(KERN_ERR "nbd: nbd%d already in use\n", index); 1658 return -EBUSY; 1659 } 1660 if (WARN_ON(nbd->config)) { 1661 mutex_unlock(&nbd->config_lock); 1662 nbd_put(nbd); 1663 return -EINVAL; 1664 } 1665 config = nbd->config = nbd_alloc_config(); 1666 if (!nbd->config) { 1667 mutex_unlock(&nbd->config_lock); 1668 nbd_put(nbd); 1669 printk(KERN_ERR "nbd: couldn't allocate config\n"); 1670 return -ENOMEM; 1671 } 1672 refcount_set(&nbd->config_refs, 1); 1673 set_bit(NBD_BOUND, &config->runtime_flags); 1674 1675 if (info->attrs[NBD_ATTR_SIZE_BYTES]) { 1676 u64 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]); 1677 nbd_size_set(nbd, config->blksize, 1678 div64_u64(bytes, config->blksize)); 1679 } 1680 if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) { 1681 u64 bsize = 1682 nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]); 1683 nbd_size_set(nbd, bsize, div64_u64(config->bytesize, bsize)); 1684 } 1685 if (info->attrs[NBD_ATTR_TIMEOUT]) { 1686 u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); 1687 nbd->tag_set.timeout = timeout * HZ; 1688 blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); 1689 } 1690 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { 1691 config->dead_conn_timeout = 1692 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); 1693 config->dead_conn_timeout *= HZ; 1694 } 1695 if (info->attrs[NBD_ATTR_SERVER_FLAGS]) 1696 config->flags = 1697 nla_get_u64(info->attrs[NBD_ATTR_SERVER_FLAGS]); 1698 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { 1699 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); 1700 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { 1701 set_bit(NBD_DESTROY_ON_DISCONNECT, 1702 &config->runtime_flags); 1703 put_dev = true; 1704 } 1705 } 1706 1707 if (info->attrs[NBD_ATTR_SOCKETS]) { 1708 struct nlattr *attr; 1709 int rem, fd; 1710 1711 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], 1712 rem) { 1713 struct nlattr *socks[NBD_SOCK_MAX+1]; 1714 1715 if (nla_type(attr) != NBD_SOCK_ITEM) { 1716 printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); 1717 ret = -EINVAL; 1718 goto out; 1719 } 1720 ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, 1721 nbd_sock_policy, info->extack); 1722 if (ret != 0) { 1723 printk(KERN_ERR "nbd: error processing sock list\n"); 1724 ret = -EINVAL; 1725 goto out; 1726 } 1727 if (!socks[NBD_SOCK_FD]) 1728 continue; 1729 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); 1730 ret = nbd_add_socket(nbd, fd, true); 1731 if (ret) 1732 goto out; 1733 } 1734 } 1735 ret = nbd_start_device(nbd); 1736 out: 1737 mutex_unlock(&nbd->config_lock); 1738 if (!ret) { 1739 set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags); 1740 refcount_inc(&nbd->config_refs); 1741 nbd_connect_reply(info, nbd->index); 1742 } 1743 nbd_config_put(nbd); 1744 if (put_dev) 1745 nbd_put(nbd); 1746 return ret; 1747 } 1748 1749 static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) 1750 { 1751 struct nbd_device *nbd; 1752 int index; 1753 1754 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1755 return -EPERM; 1756 1757 if (!info->attrs[NBD_ATTR_INDEX]) { 1758 printk(KERN_ERR "nbd: must specify an index to disconnect\n"); 1759 return -EINVAL; 1760 } 1761 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1762 mutex_lock(&nbd_index_mutex); 1763 nbd = idr_find(&nbd_index_idr, index); 1764 if (!nbd) { 1765 mutex_unlock(&nbd_index_mutex); 1766 printk(KERN_ERR "nbd: couldn't find device at index %d\n", 1767 index); 1768 return -EINVAL; 1769 } 1770 if (!refcount_inc_not_zero(&nbd->refs)) { 1771 mutex_unlock(&nbd_index_mutex); 1772 printk(KERN_ERR "nbd: device at index %d is going down\n", 1773 index); 1774 return -EINVAL; 1775 } 1776 mutex_unlock(&nbd_index_mutex); 1777 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1778 nbd_put(nbd); 1779 return 0; 1780 } 1781 mutex_lock(&nbd->config_lock); 1782 nbd_disconnect(nbd); 1783 nbd_clear_sock(nbd); 1784 mutex_unlock(&nbd->config_lock); 1785 if (test_and_clear_bit(NBD_HAS_CONFIG_REF, 1786 &nbd->config->runtime_flags)) 1787 nbd_config_put(nbd); 1788 nbd_config_put(nbd); 1789 nbd_put(nbd); 1790 return 0; 1791 } 1792 1793 static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) 1794 { 1795 struct nbd_device *nbd = NULL; 1796 struct nbd_config *config; 1797 int index; 1798 int ret = -EINVAL; 1799 bool put_dev = false; 1800 1801 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1802 return -EPERM; 1803 1804 if (!info->attrs[NBD_ATTR_INDEX]) { 1805 printk(KERN_ERR "nbd: must specify a device to reconfigure\n"); 1806 return -EINVAL; 1807 } 1808 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1809 mutex_lock(&nbd_index_mutex); 1810 nbd = idr_find(&nbd_index_idr, index); 1811 if (!nbd) { 1812 mutex_unlock(&nbd_index_mutex); 1813 printk(KERN_ERR "nbd: couldn't find a device at index %d\n", 1814 index); 1815 return -EINVAL; 1816 } 1817 if (!refcount_inc_not_zero(&nbd->refs)) { 1818 mutex_unlock(&nbd_index_mutex); 1819 printk(KERN_ERR "nbd: device at index %d is going down\n", 1820 index); 1821 return -EINVAL; 1822 } 1823 mutex_unlock(&nbd_index_mutex); 1824 1825 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1826 dev_err(nbd_to_dev(nbd), 1827 "not configured, cannot reconfigure\n"); 1828 nbd_put(nbd); 1829 return -EINVAL; 1830 } 1831 1832 mutex_lock(&nbd->config_lock); 1833 config = nbd->config; 1834 if (!test_bit(NBD_BOUND, &config->runtime_flags) || 1835 !nbd->task_recv) { 1836 dev_err(nbd_to_dev(nbd), 1837 "not configured, cannot reconfigure\n"); 1838 goto out; 1839 } 1840 1841 if (info->attrs[NBD_ATTR_TIMEOUT]) { 1842 u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); 1843 nbd->tag_set.timeout = timeout * HZ; 1844 blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); 1845 } 1846 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { 1847 config->dead_conn_timeout = 1848 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); 1849 config->dead_conn_timeout *= HZ; 1850 } 1851 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { 1852 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); 1853 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { 1854 if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT, 1855 &config->runtime_flags)) 1856 put_dev = true; 1857 } else { 1858 if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT, 1859 &config->runtime_flags)) 1860 refcount_inc(&nbd->refs); 1861 } 1862 } 1863 1864 if (info->attrs[NBD_ATTR_SOCKETS]) { 1865 struct nlattr *attr; 1866 int rem, fd; 1867 1868 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], 1869 rem) { 1870 struct nlattr *socks[NBD_SOCK_MAX+1]; 1871 1872 if (nla_type(attr) != NBD_SOCK_ITEM) { 1873 printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); 1874 ret = -EINVAL; 1875 goto out; 1876 } 1877 ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, 1878 nbd_sock_policy, info->extack); 1879 if (ret != 0) { 1880 printk(KERN_ERR "nbd: error processing sock list\n"); 1881 ret = -EINVAL; 1882 goto out; 1883 } 1884 if (!socks[NBD_SOCK_FD]) 1885 continue; 1886 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); 1887 ret = nbd_reconnect_socket(nbd, fd); 1888 if (ret) { 1889 if (ret == -ENOSPC) 1890 ret = 0; 1891 goto out; 1892 } 1893 dev_info(nbd_to_dev(nbd), "reconnected socket\n"); 1894 } 1895 } 1896 out: 1897 mutex_unlock(&nbd->config_lock); 1898 nbd_config_put(nbd); 1899 nbd_put(nbd); 1900 if (put_dev) 1901 nbd_put(nbd); 1902 return ret; 1903 } 1904 1905 static const struct genl_ops nbd_connect_genl_ops[] = { 1906 { 1907 .cmd = NBD_CMD_CONNECT, 1908 .policy = nbd_attr_policy, 1909 .doit = nbd_genl_connect, 1910 }, 1911 { 1912 .cmd = NBD_CMD_DISCONNECT, 1913 .policy = nbd_attr_policy, 1914 .doit = nbd_genl_disconnect, 1915 }, 1916 { 1917 .cmd = NBD_CMD_RECONFIGURE, 1918 .policy = nbd_attr_policy, 1919 .doit = nbd_genl_reconfigure, 1920 }, 1921 { 1922 .cmd = NBD_CMD_STATUS, 1923 .policy = nbd_attr_policy, 1924 .doit = nbd_genl_status, 1925 }, 1926 }; 1927 1928 static const struct genl_multicast_group nbd_mcast_grps[] = { 1929 { .name = NBD_GENL_MCAST_GROUP_NAME, }, 1930 }; 1931 1932 static struct genl_family nbd_genl_family __ro_after_init = { 1933 .hdrsize = 0, 1934 .name = NBD_GENL_FAMILY_NAME, 1935 .version = NBD_GENL_VERSION, 1936 .module = THIS_MODULE, 1937 .ops = nbd_connect_genl_ops, 1938 .n_ops = ARRAY_SIZE(nbd_connect_genl_ops), 1939 .maxattr = NBD_ATTR_MAX, 1940 .mcgrps = nbd_mcast_grps, 1941 .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps), 1942 }; 1943 1944 static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply) 1945 { 1946 struct nlattr *dev_opt; 1947 u8 connected = 0; 1948 int ret; 1949 1950 /* This is a little racey, but for status it's ok. The 1951 * reason we don't take a ref here is because we can't 1952 * take a ref in the index == -1 case as we would need 1953 * to put under the nbd_index_mutex, which could 1954 * deadlock if we are configured to remove ourselves 1955 * once we're disconnected. 1956 */ 1957 if (refcount_read(&nbd->config_refs)) 1958 connected = 1; 1959 dev_opt = nla_nest_start(reply, NBD_DEVICE_ITEM); 1960 if (!dev_opt) 1961 return -EMSGSIZE; 1962 ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index); 1963 if (ret) 1964 return -EMSGSIZE; 1965 ret = nla_put_u8(reply, NBD_DEVICE_CONNECTED, 1966 connected); 1967 if (ret) 1968 return -EMSGSIZE; 1969 nla_nest_end(reply, dev_opt); 1970 return 0; 1971 } 1972 1973 static int status_cb(int id, void *ptr, void *data) 1974 { 1975 struct nbd_device *nbd = ptr; 1976 return populate_nbd_status(nbd, (struct sk_buff *)data); 1977 } 1978 1979 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) 1980 { 1981 struct nlattr *dev_list; 1982 struct sk_buff *reply; 1983 void *reply_head; 1984 size_t msg_size; 1985 int index = -1; 1986 int ret = -ENOMEM; 1987 1988 if (info->attrs[NBD_ATTR_INDEX]) 1989 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1990 1991 mutex_lock(&nbd_index_mutex); 1992 1993 msg_size = nla_total_size(nla_attr_size(sizeof(u32)) + 1994 nla_attr_size(sizeof(u8))); 1995 msg_size *= (index == -1) ? nbd_total_devices : 1; 1996 1997 reply = genlmsg_new(msg_size, GFP_KERNEL); 1998 if (!reply) 1999 goto out; 2000 reply_head = genlmsg_put_reply(reply, info, &nbd_genl_family, 0, 2001 NBD_CMD_STATUS); 2002 if (!reply_head) { 2003 nlmsg_free(reply); 2004 goto out; 2005 } 2006 2007 dev_list = nla_nest_start(reply, NBD_ATTR_DEVICE_LIST); 2008 if (index == -1) { 2009 ret = idr_for_each(&nbd_index_idr, &status_cb, reply); 2010 if (ret) { 2011 nlmsg_free(reply); 2012 goto out; 2013 } 2014 } else { 2015 struct nbd_device *nbd; 2016 nbd = idr_find(&nbd_index_idr, index); 2017 if (nbd) { 2018 ret = populate_nbd_status(nbd, reply); 2019 if (ret) { 2020 nlmsg_free(reply); 2021 goto out; 2022 } 2023 } 2024 } 2025 nla_nest_end(reply, dev_list); 2026 genlmsg_end(reply, reply_head); 2027 genlmsg_reply(reply, info); 2028 ret = 0; 2029 out: 2030 mutex_unlock(&nbd_index_mutex); 2031 return ret; 2032 } 2033 2034 static void nbd_connect_reply(struct genl_info *info, int index) 2035 { 2036 struct sk_buff *skb; 2037 void *msg_head; 2038 int ret; 2039 2040 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); 2041 if (!skb) 2042 return; 2043 msg_head = genlmsg_put_reply(skb, info, &nbd_genl_family, 0, 2044 NBD_CMD_CONNECT); 2045 if (!msg_head) { 2046 nlmsg_free(skb); 2047 return; 2048 } 2049 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); 2050 if (ret) { 2051 nlmsg_free(skb); 2052 return; 2053 } 2054 genlmsg_end(skb, msg_head); 2055 genlmsg_reply(skb, info); 2056 } 2057 2058 static void nbd_mcast_index(int index) 2059 { 2060 struct sk_buff *skb; 2061 void *msg_head; 2062 int ret; 2063 2064 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); 2065 if (!skb) 2066 return; 2067 msg_head = genlmsg_put(skb, 0, 0, &nbd_genl_family, 0, 2068 NBD_CMD_LINK_DEAD); 2069 if (!msg_head) { 2070 nlmsg_free(skb); 2071 return; 2072 } 2073 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); 2074 if (ret) { 2075 nlmsg_free(skb); 2076 return; 2077 } 2078 genlmsg_end(skb, msg_head); 2079 genlmsg_multicast(&nbd_genl_family, skb, 0, 0, GFP_KERNEL); 2080 } 2081 2082 static void nbd_dead_link_work(struct work_struct *work) 2083 { 2084 struct link_dead_args *args = container_of(work, struct link_dead_args, 2085 work); 2086 nbd_mcast_index(args->index); 2087 kfree(args); 2088 } 2089 2090 static int __init nbd_init(void) 2091 { 2092 int i; 2093 2094 BUILD_BUG_ON(sizeof(struct nbd_request) != 28); 2095 2096 if (max_part < 0) { 2097 printk(KERN_ERR "nbd: max_part must be >= 0\n"); 2098 return -EINVAL; 2099 } 2100 2101 part_shift = 0; 2102 if (max_part > 0) { 2103 part_shift = fls(max_part); 2104 2105 /* 2106 * Adjust max_part according to part_shift as it is exported 2107 * to user space so that user can know the max number of 2108 * partition kernel should be able to manage. 2109 * 2110 * Note that -1 is required because partition 0 is reserved 2111 * for the whole disk. 2112 */ 2113 max_part = (1UL << part_shift) - 1; 2114 } 2115 2116 if ((1UL << part_shift) > DISK_MAX_PARTS) 2117 return -EINVAL; 2118 2119 if (nbds_max > 1UL << (MINORBITS - part_shift)) 2120 return -EINVAL; 2121 recv_workqueue = alloc_workqueue("knbd-recv", 2122 WQ_MEM_RECLAIM | WQ_HIGHPRI | 2123 WQ_UNBOUND, 0); 2124 if (!recv_workqueue) 2125 return -ENOMEM; 2126 2127 if (register_blkdev(NBD_MAJOR, "nbd")) { 2128 destroy_workqueue(recv_workqueue); 2129 return -EIO; 2130 } 2131 2132 if (genl_register_family(&nbd_genl_family)) { 2133 unregister_blkdev(NBD_MAJOR, "nbd"); 2134 destroy_workqueue(recv_workqueue); 2135 return -EINVAL; 2136 } 2137 nbd_dbg_init(); 2138 2139 mutex_lock(&nbd_index_mutex); 2140 for (i = 0; i < nbds_max; i++) 2141 nbd_dev_add(i); 2142 mutex_unlock(&nbd_index_mutex); 2143 return 0; 2144 } 2145 2146 static int nbd_exit_cb(int id, void *ptr, void *data) 2147 { 2148 struct list_head *list = (struct list_head *)data; 2149 struct nbd_device *nbd = ptr; 2150 2151 list_add_tail(&nbd->list, list); 2152 return 0; 2153 } 2154 2155 static void __exit nbd_cleanup(void) 2156 { 2157 struct nbd_device *nbd; 2158 LIST_HEAD(del_list); 2159 2160 nbd_dbg_close(); 2161 2162 mutex_lock(&nbd_index_mutex); 2163 idr_for_each(&nbd_index_idr, &nbd_exit_cb, &del_list); 2164 mutex_unlock(&nbd_index_mutex); 2165 2166 while (!list_empty(&del_list)) { 2167 nbd = list_first_entry(&del_list, struct nbd_device, list); 2168 list_del_init(&nbd->list); 2169 if (refcount_read(&nbd->refs) != 1) 2170 printk(KERN_ERR "nbd: possibly leaking a device\n"); 2171 nbd_put(nbd); 2172 } 2173 2174 idr_destroy(&nbd_index_idr); 2175 genl_unregister_family(&nbd_genl_family); 2176 destroy_workqueue(recv_workqueue); 2177 unregister_blkdev(NBD_MAJOR, "nbd"); 2178 } 2179 2180 module_init(nbd_init); 2181 module_exit(nbd_cleanup); 2182 2183 MODULE_DESCRIPTION("Network Block Device"); 2184 MODULE_LICENSE("GPL"); 2185 2186 module_param(nbds_max, int, 0444); 2187 MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)"); 2188 module_param(max_part, int, 0444); 2189 MODULE_PARM_DESC(max_part, "number of partitions per device (default: 16)"); 2190