1 /* 2 * Network block device - make block devices work over TCP 3 * 4 * Note that you can not swap over this thing, yet. Seems to work but 5 * deadlocks sometimes - you can not swap over TCP in general. 6 * 7 * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz> 8 * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com> 9 * 10 * This file is released under GPLv2 or later. 11 * 12 * (part of code stolen from loop.c) 13 */ 14 15 #include <linux/major.h> 16 17 #include <linux/blkdev.h> 18 #include <linux/module.h> 19 #include <linux/init.h> 20 #include <linux/sched.h> 21 #include <linux/sched/mm.h> 22 #include <linux/fs.h> 23 #include <linux/bio.h> 24 #include <linux/stat.h> 25 #include <linux/errno.h> 26 #include <linux/file.h> 27 #include <linux/ioctl.h> 28 #include <linux/mutex.h> 29 #include <linux/compiler.h> 30 #include <linux/err.h> 31 #include <linux/kernel.h> 32 #include <linux/slab.h> 33 #include <net/sock.h> 34 #include <linux/net.h> 35 #include <linux/kthread.h> 36 #include <linux/types.h> 37 #include <linux/debugfs.h> 38 #include <linux/blk-mq.h> 39 40 #include <linux/uaccess.h> 41 #include <asm/types.h> 42 43 #include <linux/nbd.h> 44 #include <linux/nbd-netlink.h> 45 #include <net/genetlink.h> 46 47 static DEFINE_IDR(nbd_index_idr); 48 static DEFINE_MUTEX(nbd_index_mutex); 49 static int nbd_total_devices = 0; 50 51 struct nbd_sock { 52 struct socket *sock; 53 struct mutex tx_lock; 54 struct request *pending; 55 int sent; 56 bool dead; 57 int fallback_index; 58 int cookie; 59 }; 60 61 struct recv_thread_args { 62 struct work_struct work; 63 struct nbd_device *nbd; 64 int index; 65 }; 66 67 struct link_dead_args { 68 struct work_struct work; 69 int index; 70 }; 71 72 #define NBD_TIMEDOUT 0 73 #define NBD_DISCONNECT_REQUESTED 1 74 #define NBD_DISCONNECTED 2 75 #define NBD_HAS_PID_FILE 3 76 #define NBD_HAS_CONFIG_REF 4 77 #define NBD_BOUND 5 78 #define NBD_DESTROY_ON_DISCONNECT 6 79 #define NBD_DISCONNECT_ON_CLOSE 7 80 81 struct nbd_config { 82 u32 flags; 83 unsigned long runtime_flags; 84 u64 dead_conn_timeout; 85 86 struct nbd_sock **socks; 87 int num_connections; 88 atomic_t live_connections; 89 wait_queue_head_t conn_wait; 90 91 atomic_t recv_threads; 92 wait_queue_head_t recv_wq; 93 loff_t blksize; 94 loff_t bytesize; 95 #if IS_ENABLED(CONFIG_DEBUG_FS) 96 struct dentry *dbg_dir; 97 #endif 98 }; 99 100 struct nbd_device { 101 struct blk_mq_tag_set tag_set; 102 103 int index; 104 refcount_t config_refs; 105 refcount_t refs; 106 struct nbd_config *config; 107 struct mutex config_lock; 108 struct gendisk *disk; 109 110 struct list_head list; 111 struct task_struct *task_recv; 112 struct task_struct *task_setup; 113 }; 114 115 #define NBD_CMD_REQUEUED 1 116 117 struct nbd_cmd { 118 struct nbd_device *nbd; 119 struct mutex lock; 120 int index; 121 int cookie; 122 blk_status_t status; 123 unsigned long flags; 124 u32 cmd_cookie; 125 }; 126 127 #if IS_ENABLED(CONFIG_DEBUG_FS) 128 static struct dentry *nbd_dbg_dir; 129 #endif 130 131 #define nbd_name(nbd) ((nbd)->disk->disk_name) 132 133 #define NBD_MAGIC 0x68797548 134 135 static unsigned int nbds_max = 16; 136 static int max_part = 16; 137 static struct workqueue_struct *recv_workqueue; 138 static int part_shift; 139 140 static int nbd_dev_dbg_init(struct nbd_device *nbd); 141 static void nbd_dev_dbg_close(struct nbd_device *nbd); 142 static void nbd_config_put(struct nbd_device *nbd); 143 static void nbd_connect_reply(struct genl_info *info, int index); 144 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info); 145 static void nbd_dead_link_work(struct work_struct *work); 146 static void nbd_disconnect_and_put(struct nbd_device *nbd); 147 148 static inline struct device *nbd_to_dev(struct nbd_device *nbd) 149 { 150 return disk_to_dev(nbd->disk); 151 } 152 153 static void nbd_requeue_cmd(struct nbd_cmd *cmd) 154 { 155 struct request *req = blk_mq_rq_from_pdu(cmd); 156 157 if (!test_and_set_bit(NBD_CMD_REQUEUED, &cmd->flags)) 158 blk_mq_requeue_request(req, true); 159 } 160 161 #define NBD_COOKIE_BITS 32 162 163 static u64 nbd_cmd_handle(struct nbd_cmd *cmd) 164 { 165 struct request *req = blk_mq_rq_from_pdu(cmd); 166 u32 tag = blk_mq_unique_tag(req); 167 u64 cookie = cmd->cmd_cookie; 168 169 return (cookie << NBD_COOKIE_BITS) | tag; 170 } 171 172 static u32 nbd_handle_to_tag(u64 handle) 173 { 174 return (u32)handle; 175 } 176 177 static u32 nbd_handle_to_cookie(u64 handle) 178 { 179 return (u32)(handle >> NBD_COOKIE_BITS); 180 } 181 182 static const char *nbdcmd_to_ascii(int cmd) 183 { 184 switch (cmd) { 185 case NBD_CMD_READ: return "read"; 186 case NBD_CMD_WRITE: return "write"; 187 case NBD_CMD_DISC: return "disconnect"; 188 case NBD_CMD_FLUSH: return "flush"; 189 case NBD_CMD_TRIM: return "trim/discard"; 190 } 191 return "invalid"; 192 } 193 194 static ssize_t pid_show(struct device *dev, 195 struct device_attribute *attr, char *buf) 196 { 197 struct gendisk *disk = dev_to_disk(dev); 198 struct nbd_device *nbd = (struct nbd_device *)disk->private_data; 199 200 return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv)); 201 } 202 203 static const struct device_attribute pid_attr = { 204 .attr = { .name = "pid", .mode = 0444}, 205 .show = pid_show, 206 }; 207 208 static void nbd_dev_remove(struct nbd_device *nbd) 209 { 210 struct gendisk *disk = nbd->disk; 211 struct request_queue *q; 212 213 if (disk) { 214 q = disk->queue; 215 del_gendisk(disk); 216 blk_cleanup_queue(q); 217 blk_mq_free_tag_set(&nbd->tag_set); 218 disk->private_data = NULL; 219 put_disk(disk); 220 } 221 kfree(nbd); 222 } 223 224 static void nbd_put(struct nbd_device *nbd) 225 { 226 if (refcount_dec_and_mutex_lock(&nbd->refs, 227 &nbd_index_mutex)) { 228 idr_remove(&nbd_index_idr, nbd->index); 229 mutex_unlock(&nbd_index_mutex); 230 nbd_dev_remove(nbd); 231 } 232 } 233 234 static int nbd_disconnected(struct nbd_config *config) 235 { 236 return test_bit(NBD_DISCONNECTED, &config->runtime_flags) || 237 test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags); 238 } 239 240 static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, 241 int notify) 242 { 243 if (!nsock->dead && notify && !nbd_disconnected(nbd->config)) { 244 struct link_dead_args *args; 245 args = kmalloc(sizeof(struct link_dead_args), GFP_NOIO); 246 if (args) { 247 INIT_WORK(&args->work, nbd_dead_link_work); 248 args->index = nbd->index; 249 queue_work(system_wq, &args->work); 250 } 251 } 252 if (!nsock->dead) { 253 kernel_sock_shutdown(nsock->sock, SHUT_RDWR); 254 if (atomic_dec_return(&nbd->config->live_connections) == 0) { 255 if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED, 256 &nbd->config->runtime_flags)) { 257 set_bit(NBD_DISCONNECTED, 258 &nbd->config->runtime_flags); 259 dev_info(nbd_to_dev(nbd), 260 "Disconnected due to user request.\n"); 261 } 262 } 263 } 264 nsock->dead = true; 265 nsock->pending = NULL; 266 nsock->sent = 0; 267 } 268 269 static void nbd_size_clear(struct nbd_device *nbd) 270 { 271 if (nbd->config->bytesize) { 272 set_capacity(nbd->disk, 0); 273 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); 274 } 275 } 276 277 static void nbd_size_update(struct nbd_device *nbd) 278 { 279 struct nbd_config *config = nbd->config; 280 struct block_device *bdev = bdget_disk(nbd->disk, 0); 281 282 if (config->flags & NBD_FLAG_SEND_TRIM) { 283 nbd->disk->queue->limits.discard_granularity = config->blksize; 284 nbd->disk->queue->limits.discard_alignment = config->blksize; 285 blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); 286 } 287 blk_queue_logical_block_size(nbd->disk->queue, config->blksize); 288 blk_queue_physical_block_size(nbd->disk->queue, config->blksize); 289 set_capacity(nbd->disk, config->bytesize >> 9); 290 if (bdev) { 291 if (bdev->bd_disk) 292 bd_set_size(bdev, config->bytesize); 293 else 294 bdev->bd_invalidated = 1; 295 bdput(bdev); 296 } 297 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); 298 } 299 300 static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, 301 loff_t nr_blocks) 302 { 303 struct nbd_config *config = nbd->config; 304 config->blksize = blocksize; 305 config->bytesize = blocksize * nr_blocks; 306 if (nbd->task_recv != NULL) 307 nbd_size_update(nbd); 308 } 309 310 static void nbd_complete_rq(struct request *req) 311 { 312 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 313 314 dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", req, 315 cmd->status ? "failed" : "done"); 316 317 blk_mq_end_request(req, cmd->status); 318 } 319 320 /* 321 * Forcibly shutdown the socket causing all listeners to error 322 */ 323 static void sock_shutdown(struct nbd_device *nbd) 324 { 325 struct nbd_config *config = nbd->config; 326 int i; 327 328 if (config->num_connections == 0) 329 return; 330 if (test_and_set_bit(NBD_DISCONNECTED, &config->runtime_flags)) 331 return; 332 333 for (i = 0; i < config->num_connections; i++) { 334 struct nbd_sock *nsock = config->socks[i]; 335 mutex_lock(&nsock->tx_lock); 336 nbd_mark_nsock_dead(nbd, nsock, 0); 337 mutex_unlock(&nsock->tx_lock); 338 } 339 dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n"); 340 } 341 342 static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, 343 bool reserved) 344 { 345 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 346 struct nbd_device *nbd = cmd->nbd; 347 struct nbd_config *config; 348 349 if (!refcount_inc_not_zero(&nbd->config_refs)) { 350 cmd->status = BLK_STS_TIMEOUT; 351 goto done; 352 } 353 config = nbd->config; 354 355 if (!mutex_trylock(&cmd->lock)) 356 return BLK_EH_RESET_TIMER; 357 358 if (config->num_connections > 1) { 359 dev_err_ratelimited(nbd_to_dev(nbd), 360 "Connection timed out, retrying (%d/%d alive)\n", 361 atomic_read(&config->live_connections), 362 config->num_connections); 363 /* 364 * Hooray we have more connections, requeue this IO, the submit 365 * path will put it on a real connection. 366 */ 367 if (config->socks && config->num_connections > 1) { 368 if (cmd->index < config->num_connections) { 369 struct nbd_sock *nsock = 370 config->socks[cmd->index]; 371 mutex_lock(&nsock->tx_lock); 372 /* We can have multiple outstanding requests, so 373 * we don't want to mark the nsock dead if we've 374 * already reconnected with a new socket, so 375 * only mark it dead if its the same socket we 376 * were sent out on. 377 */ 378 if (cmd->cookie == nsock->cookie) 379 nbd_mark_nsock_dead(nbd, nsock, 1); 380 mutex_unlock(&nsock->tx_lock); 381 } 382 mutex_unlock(&cmd->lock); 383 nbd_requeue_cmd(cmd); 384 nbd_config_put(nbd); 385 return BLK_EH_DONE; 386 } 387 } else { 388 dev_err_ratelimited(nbd_to_dev(nbd), 389 "Connection timed out\n"); 390 } 391 set_bit(NBD_TIMEDOUT, &config->runtime_flags); 392 cmd->status = BLK_STS_IOERR; 393 mutex_unlock(&cmd->lock); 394 sock_shutdown(nbd); 395 nbd_config_put(nbd); 396 done: 397 blk_mq_complete_request(req); 398 return BLK_EH_DONE; 399 } 400 401 /* 402 * Send or receive packet. 403 */ 404 static int sock_xmit(struct nbd_device *nbd, int index, int send, 405 struct iov_iter *iter, int msg_flags, int *sent) 406 { 407 struct nbd_config *config = nbd->config; 408 struct socket *sock = config->socks[index]->sock; 409 int result; 410 struct msghdr msg; 411 unsigned int noreclaim_flag; 412 413 if (unlikely(!sock)) { 414 dev_err_ratelimited(disk_to_dev(nbd->disk), 415 "Attempted %s on closed socket in sock_xmit\n", 416 (send ? "send" : "recv")); 417 return -EINVAL; 418 } 419 420 msg.msg_iter = *iter; 421 422 noreclaim_flag = memalloc_noreclaim_save(); 423 do { 424 sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC; 425 msg.msg_name = NULL; 426 msg.msg_namelen = 0; 427 msg.msg_control = NULL; 428 msg.msg_controllen = 0; 429 msg.msg_flags = msg_flags | MSG_NOSIGNAL; 430 431 if (send) 432 result = sock_sendmsg(sock, &msg); 433 else 434 result = sock_recvmsg(sock, &msg, msg.msg_flags); 435 436 if (result <= 0) { 437 if (result == 0) 438 result = -EPIPE; /* short read */ 439 break; 440 } 441 if (sent) 442 *sent += result; 443 } while (msg_data_left(&msg)); 444 445 memalloc_noreclaim_restore(noreclaim_flag); 446 447 return result; 448 } 449 450 /* 451 * Different settings for sk->sk_sndtimeo can result in different return values 452 * if there is a signal pending when we enter sendmsg, because reasons? 453 */ 454 static inline int was_interrupted(int result) 455 { 456 return result == -ERESTARTSYS || result == -EINTR; 457 } 458 459 /* always call with the tx_lock held */ 460 static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) 461 { 462 struct request *req = blk_mq_rq_from_pdu(cmd); 463 struct nbd_config *config = nbd->config; 464 struct nbd_sock *nsock = config->socks[index]; 465 int result; 466 struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)}; 467 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; 468 struct iov_iter from; 469 unsigned long size = blk_rq_bytes(req); 470 struct bio *bio; 471 u64 handle; 472 u32 type; 473 u32 nbd_cmd_flags = 0; 474 int sent = nsock->sent, skip = 0; 475 476 iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); 477 478 switch (req_op(req)) { 479 case REQ_OP_DISCARD: 480 type = NBD_CMD_TRIM; 481 break; 482 case REQ_OP_FLUSH: 483 type = NBD_CMD_FLUSH; 484 break; 485 case REQ_OP_WRITE: 486 type = NBD_CMD_WRITE; 487 break; 488 case REQ_OP_READ: 489 type = NBD_CMD_READ; 490 break; 491 default: 492 return -EIO; 493 } 494 495 if (rq_data_dir(req) == WRITE && 496 (config->flags & NBD_FLAG_READ_ONLY)) { 497 dev_err_ratelimited(disk_to_dev(nbd->disk), 498 "Write on read-only\n"); 499 return -EIO; 500 } 501 502 if (req->cmd_flags & REQ_FUA) 503 nbd_cmd_flags |= NBD_CMD_FLAG_FUA; 504 505 /* We did a partial send previously, and we at least sent the whole 506 * request struct, so just go and send the rest of the pages in the 507 * request. 508 */ 509 if (sent) { 510 if (sent >= sizeof(request)) { 511 skip = sent - sizeof(request); 512 goto send_pages; 513 } 514 iov_iter_advance(&from, sent); 515 } else { 516 cmd->cmd_cookie++; 517 } 518 cmd->index = index; 519 cmd->cookie = nsock->cookie; 520 request.type = htonl(type | nbd_cmd_flags); 521 if (type != NBD_CMD_FLUSH) { 522 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); 523 request.len = htonl(size); 524 } 525 handle = nbd_cmd_handle(cmd); 526 memcpy(request.handle, &handle, sizeof(handle)); 527 528 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", 529 req, nbdcmd_to_ascii(type), 530 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); 531 result = sock_xmit(nbd, index, 1, &from, 532 (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent); 533 if (result <= 0) { 534 if (was_interrupted(result)) { 535 /* If we havne't sent anything we can just return BUSY, 536 * however if we have sent something we need to make 537 * sure we only allow this req to be sent until we are 538 * completely done. 539 */ 540 if (sent) { 541 nsock->pending = req; 542 nsock->sent = sent; 543 } 544 set_bit(NBD_CMD_REQUEUED, &cmd->flags); 545 return BLK_STS_RESOURCE; 546 } 547 dev_err_ratelimited(disk_to_dev(nbd->disk), 548 "Send control failed (result %d)\n", result); 549 return -EAGAIN; 550 } 551 send_pages: 552 if (type != NBD_CMD_WRITE) 553 goto out; 554 555 bio = req->bio; 556 while (bio) { 557 struct bio *next = bio->bi_next; 558 struct bvec_iter iter; 559 struct bio_vec bvec; 560 561 bio_for_each_segment(bvec, bio, iter) { 562 bool is_last = !next && bio_iter_last(bvec, iter); 563 int flags = is_last ? 0 : MSG_MORE; 564 565 dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", 566 req, bvec.bv_len); 567 iov_iter_bvec(&from, ITER_BVEC | WRITE, 568 &bvec, 1, bvec.bv_len); 569 if (skip) { 570 if (skip >= iov_iter_count(&from)) { 571 skip -= iov_iter_count(&from); 572 continue; 573 } 574 iov_iter_advance(&from, skip); 575 skip = 0; 576 } 577 result = sock_xmit(nbd, index, 1, &from, flags, &sent); 578 if (result <= 0) { 579 if (was_interrupted(result)) { 580 /* We've already sent the header, we 581 * have no choice but to set pending and 582 * return BUSY. 583 */ 584 nsock->pending = req; 585 nsock->sent = sent; 586 set_bit(NBD_CMD_REQUEUED, &cmd->flags); 587 return BLK_STS_RESOURCE; 588 } 589 dev_err(disk_to_dev(nbd->disk), 590 "Send data failed (result %d)\n", 591 result); 592 return -EAGAIN; 593 } 594 /* 595 * The completion might already have come in, 596 * so break for the last one instead of letting 597 * the iterator do it. This prevents use-after-free 598 * of the bio. 599 */ 600 if (is_last) 601 break; 602 } 603 bio = next; 604 } 605 out: 606 nsock->pending = NULL; 607 nsock->sent = 0; 608 return 0; 609 } 610 611 /* NULL returned = something went wrong, inform userspace */ 612 static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) 613 { 614 struct nbd_config *config = nbd->config; 615 int result; 616 struct nbd_reply reply; 617 struct nbd_cmd *cmd; 618 struct request *req = NULL; 619 u64 handle; 620 u16 hwq; 621 u32 tag; 622 struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)}; 623 struct iov_iter to; 624 int ret = 0; 625 626 reply.magic = 0; 627 iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); 628 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); 629 if (result <= 0) { 630 if (!nbd_disconnected(config)) 631 dev_err(disk_to_dev(nbd->disk), 632 "Receive control failed (result %d)\n", result); 633 return ERR_PTR(result); 634 } 635 636 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { 637 dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n", 638 (unsigned long)ntohl(reply.magic)); 639 return ERR_PTR(-EPROTO); 640 } 641 642 memcpy(&handle, reply.handle, sizeof(handle)); 643 tag = nbd_handle_to_tag(handle); 644 hwq = blk_mq_unique_tag_to_hwq(tag); 645 if (hwq < nbd->tag_set.nr_hw_queues) 646 req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq], 647 blk_mq_unique_tag_to_tag(tag)); 648 if (!req || !blk_mq_request_started(req)) { 649 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n", 650 tag, req); 651 return ERR_PTR(-ENOENT); 652 } 653 cmd = blk_mq_rq_to_pdu(req); 654 655 mutex_lock(&cmd->lock); 656 if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) { 657 dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n", 658 req, cmd->cmd_cookie, nbd_handle_to_cookie(handle)); 659 ret = -ENOENT; 660 goto out; 661 } 662 if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) { 663 dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n", 664 req); 665 ret = -ENOENT; 666 goto out; 667 } 668 if (ntohl(reply.error)) { 669 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", 670 ntohl(reply.error)); 671 cmd->status = BLK_STS_IOERR; 672 goto out; 673 } 674 675 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req); 676 if (rq_data_dir(req) != WRITE) { 677 struct req_iterator iter; 678 struct bio_vec bvec; 679 680 rq_for_each_segment(bvec, req, iter) { 681 iov_iter_bvec(&to, ITER_BVEC | READ, 682 &bvec, 1, bvec.bv_len); 683 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); 684 if (result <= 0) { 685 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", 686 result); 687 /* 688 * If we've disconnected or we only have 1 689 * connection then we need to make sure we 690 * complete this request, otherwise error out 691 * and let the timeout stuff handle resubmitting 692 * this request onto another connection. 693 */ 694 if (nbd_disconnected(config) || 695 config->num_connections <= 1) { 696 cmd->status = BLK_STS_IOERR; 697 goto out; 698 } 699 ret = -EIO; 700 goto out; 701 } 702 dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", 703 req, bvec.bv_len); 704 } 705 } 706 out: 707 mutex_unlock(&cmd->lock); 708 return ret ? ERR_PTR(ret) : cmd; 709 } 710 711 static void recv_work(struct work_struct *work) 712 { 713 struct recv_thread_args *args = container_of(work, 714 struct recv_thread_args, 715 work); 716 struct nbd_device *nbd = args->nbd; 717 struct nbd_config *config = nbd->config; 718 struct nbd_cmd *cmd; 719 720 while (1) { 721 cmd = nbd_read_stat(nbd, args->index); 722 if (IS_ERR(cmd)) { 723 struct nbd_sock *nsock = config->socks[args->index]; 724 725 mutex_lock(&nsock->tx_lock); 726 nbd_mark_nsock_dead(nbd, nsock, 1); 727 mutex_unlock(&nsock->tx_lock); 728 break; 729 } 730 731 blk_mq_complete_request(blk_mq_rq_from_pdu(cmd)); 732 } 733 atomic_dec(&config->recv_threads); 734 wake_up(&config->recv_wq); 735 nbd_config_put(nbd); 736 kfree(args); 737 } 738 739 static void nbd_clear_req(struct request *req, void *data, bool reserved) 740 { 741 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 742 743 cmd->status = BLK_STS_IOERR; 744 blk_mq_complete_request(req); 745 } 746 747 static void nbd_clear_que(struct nbd_device *nbd) 748 { 749 blk_mq_quiesce_queue(nbd->disk->queue); 750 blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL); 751 blk_mq_unquiesce_queue(nbd->disk->queue); 752 dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n"); 753 } 754 755 static int find_fallback(struct nbd_device *nbd, int index) 756 { 757 struct nbd_config *config = nbd->config; 758 int new_index = -1; 759 struct nbd_sock *nsock = config->socks[index]; 760 int fallback = nsock->fallback_index; 761 762 if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) 763 return new_index; 764 765 if (config->num_connections <= 1) { 766 dev_err_ratelimited(disk_to_dev(nbd->disk), 767 "Attempted send on invalid socket\n"); 768 return new_index; 769 } 770 771 if (fallback >= 0 && fallback < config->num_connections && 772 !config->socks[fallback]->dead) 773 return fallback; 774 775 if (nsock->fallback_index < 0 || 776 nsock->fallback_index >= config->num_connections || 777 config->socks[nsock->fallback_index]->dead) { 778 int i; 779 for (i = 0; i < config->num_connections; i++) { 780 if (i == index) 781 continue; 782 if (!config->socks[i]->dead) { 783 new_index = i; 784 break; 785 } 786 } 787 nsock->fallback_index = new_index; 788 if (new_index < 0) { 789 dev_err_ratelimited(disk_to_dev(nbd->disk), 790 "Dead connection, failed to find a fallback\n"); 791 return new_index; 792 } 793 } 794 new_index = nsock->fallback_index; 795 return new_index; 796 } 797 798 static int wait_for_reconnect(struct nbd_device *nbd) 799 { 800 struct nbd_config *config = nbd->config; 801 if (!config->dead_conn_timeout) 802 return 0; 803 if (test_bit(NBD_DISCONNECTED, &config->runtime_flags)) 804 return 0; 805 return wait_event_timeout(config->conn_wait, 806 atomic_read(&config->live_connections) > 0, 807 config->dead_conn_timeout) > 0; 808 } 809 810 static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) 811 { 812 struct request *req = blk_mq_rq_from_pdu(cmd); 813 struct nbd_device *nbd = cmd->nbd; 814 struct nbd_config *config; 815 struct nbd_sock *nsock; 816 int ret; 817 818 if (!refcount_inc_not_zero(&nbd->config_refs)) { 819 dev_err_ratelimited(disk_to_dev(nbd->disk), 820 "Socks array is empty\n"); 821 blk_mq_start_request(req); 822 return -EINVAL; 823 } 824 config = nbd->config; 825 826 if (index >= config->num_connections) { 827 dev_err_ratelimited(disk_to_dev(nbd->disk), 828 "Attempted send on invalid socket\n"); 829 nbd_config_put(nbd); 830 blk_mq_start_request(req); 831 return -EINVAL; 832 } 833 cmd->status = BLK_STS_OK; 834 again: 835 nsock = config->socks[index]; 836 mutex_lock(&nsock->tx_lock); 837 if (nsock->dead) { 838 int old_index = index; 839 index = find_fallback(nbd, index); 840 mutex_unlock(&nsock->tx_lock); 841 if (index < 0) { 842 if (wait_for_reconnect(nbd)) { 843 index = old_index; 844 goto again; 845 } 846 /* All the sockets should already be down at this point, 847 * we just want to make sure that DISCONNECTED is set so 848 * any requests that come in that were queue'ed waiting 849 * for the reconnect timer don't trigger the timer again 850 * and instead just error out. 851 */ 852 sock_shutdown(nbd); 853 nbd_config_put(nbd); 854 blk_mq_start_request(req); 855 return -EIO; 856 } 857 goto again; 858 } 859 860 /* Handle the case that we have a pending request that was partially 861 * transmitted that _has_ to be serviced first. We need to call requeue 862 * here so that it gets put _after_ the request that is already on the 863 * dispatch list. 864 */ 865 blk_mq_start_request(req); 866 if (unlikely(nsock->pending && nsock->pending != req)) { 867 nbd_requeue_cmd(cmd); 868 ret = 0; 869 goto out; 870 } 871 /* 872 * Some failures are related to the link going down, so anything that 873 * returns EAGAIN can be retried on a different socket. 874 */ 875 ret = nbd_send_cmd(nbd, cmd, index); 876 if (ret == -EAGAIN) { 877 dev_err_ratelimited(disk_to_dev(nbd->disk), 878 "Request send failed, requeueing\n"); 879 nbd_mark_nsock_dead(nbd, nsock, 1); 880 nbd_requeue_cmd(cmd); 881 ret = 0; 882 } 883 out: 884 mutex_unlock(&nsock->tx_lock); 885 nbd_config_put(nbd); 886 return ret; 887 } 888 889 static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, 890 const struct blk_mq_queue_data *bd) 891 { 892 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); 893 int ret; 894 895 /* 896 * Since we look at the bio's to send the request over the network we 897 * need to make sure the completion work doesn't mark this request done 898 * before we are done doing our send. This keeps us from dereferencing 899 * freed data if we have particularly fast completions (ie we get the 900 * completion before we exit sock_xmit on the last bvec) or in the case 901 * that the server is misbehaving (or there was an error) before we're 902 * done sending everything over the wire. 903 */ 904 mutex_lock(&cmd->lock); 905 clear_bit(NBD_CMD_REQUEUED, &cmd->flags); 906 907 /* We can be called directly from the user space process, which means we 908 * could possibly have signals pending so our sendmsg will fail. In 909 * this case we need to return that we are busy, otherwise error out as 910 * appropriate. 911 */ 912 ret = nbd_handle_cmd(cmd, hctx->queue_num); 913 if (ret < 0) 914 ret = BLK_STS_IOERR; 915 else if (!ret) 916 ret = BLK_STS_OK; 917 mutex_unlock(&cmd->lock); 918 919 return ret; 920 } 921 922 static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, 923 bool netlink) 924 { 925 struct nbd_config *config = nbd->config; 926 struct socket *sock; 927 struct nbd_sock **socks; 928 struct nbd_sock *nsock; 929 int err; 930 931 sock = sockfd_lookup(arg, &err); 932 if (!sock) 933 return err; 934 935 if (!netlink && !nbd->task_setup && 936 !test_bit(NBD_BOUND, &config->runtime_flags)) 937 nbd->task_setup = current; 938 939 if (!netlink && 940 (nbd->task_setup != current || 941 test_bit(NBD_BOUND, &config->runtime_flags))) { 942 dev_err(disk_to_dev(nbd->disk), 943 "Device being setup by another task"); 944 sockfd_put(sock); 945 return -EBUSY; 946 } 947 948 socks = krealloc(config->socks, (config->num_connections + 1) * 949 sizeof(struct nbd_sock *), GFP_KERNEL); 950 if (!socks) { 951 sockfd_put(sock); 952 return -ENOMEM; 953 } 954 nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL); 955 if (!nsock) { 956 sockfd_put(sock); 957 return -ENOMEM; 958 } 959 960 config->socks = socks; 961 962 nsock->fallback_index = -1; 963 nsock->dead = false; 964 mutex_init(&nsock->tx_lock); 965 nsock->sock = sock; 966 nsock->pending = NULL; 967 nsock->sent = 0; 968 nsock->cookie = 0; 969 socks[config->num_connections++] = nsock; 970 atomic_inc(&config->live_connections); 971 972 return 0; 973 } 974 975 static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) 976 { 977 struct nbd_config *config = nbd->config; 978 struct socket *sock, *old; 979 struct recv_thread_args *args; 980 int i; 981 int err; 982 983 sock = sockfd_lookup(arg, &err); 984 if (!sock) 985 return err; 986 987 args = kzalloc(sizeof(*args), GFP_KERNEL); 988 if (!args) { 989 sockfd_put(sock); 990 return -ENOMEM; 991 } 992 993 for (i = 0; i < config->num_connections; i++) { 994 struct nbd_sock *nsock = config->socks[i]; 995 996 if (!nsock->dead) 997 continue; 998 999 mutex_lock(&nsock->tx_lock); 1000 if (!nsock->dead) { 1001 mutex_unlock(&nsock->tx_lock); 1002 continue; 1003 } 1004 sk_set_memalloc(sock->sk); 1005 if (nbd->tag_set.timeout) 1006 sock->sk->sk_sndtimeo = nbd->tag_set.timeout; 1007 atomic_inc(&config->recv_threads); 1008 refcount_inc(&nbd->config_refs); 1009 old = nsock->sock; 1010 nsock->fallback_index = -1; 1011 nsock->sock = sock; 1012 nsock->dead = false; 1013 INIT_WORK(&args->work, recv_work); 1014 args->index = i; 1015 args->nbd = nbd; 1016 nsock->cookie++; 1017 mutex_unlock(&nsock->tx_lock); 1018 sockfd_put(old); 1019 1020 clear_bit(NBD_DISCONNECTED, &config->runtime_flags); 1021 1022 /* We take the tx_mutex in an error path in the recv_work, so we 1023 * need to queue_work outside of the tx_mutex. 1024 */ 1025 queue_work(recv_workqueue, &args->work); 1026 1027 atomic_inc(&config->live_connections); 1028 wake_up(&config->conn_wait); 1029 return 0; 1030 } 1031 sockfd_put(sock); 1032 kfree(args); 1033 return -ENOSPC; 1034 } 1035 1036 static void nbd_bdev_reset(struct block_device *bdev) 1037 { 1038 if (bdev->bd_openers > 1) 1039 return; 1040 bd_set_size(bdev, 0); 1041 } 1042 1043 static void nbd_parse_flags(struct nbd_device *nbd) 1044 { 1045 struct nbd_config *config = nbd->config; 1046 if (config->flags & NBD_FLAG_READ_ONLY) 1047 set_disk_ro(nbd->disk, true); 1048 else 1049 set_disk_ro(nbd->disk, false); 1050 if (config->flags & NBD_FLAG_SEND_TRIM) 1051 blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue); 1052 if (config->flags & NBD_FLAG_SEND_FLUSH) { 1053 if (config->flags & NBD_FLAG_SEND_FUA) 1054 blk_queue_write_cache(nbd->disk->queue, true, true); 1055 else 1056 blk_queue_write_cache(nbd->disk->queue, true, false); 1057 } 1058 else 1059 blk_queue_write_cache(nbd->disk->queue, false, false); 1060 } 1061 1062 static void send_disconnects(struct nbd_device *nbd) 1063 { 1064 struct nbd_config *config = nbd->config; 1065 struct nbd_request request = { 1066 .magic = htonl(NBD_REQUEST_MAGIC), 1067 .type = htonl(NBD_CMD_DISC), 1068 }; 1069 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; 1070 struct iov_iter from; 1071 int i, ret; 1072 1073 for (i = 0; i < config->num_connections; i++) { 1074 struct nbd_sock *nsock = config->socks[i]; 1075 1076 iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); 1077 mutex_lock(&nsock->tx_lock); 1078 ret = sock_xmit(nbd, i, 1, &from, 0, NULL); 1079 if (ret <= 0) 1080 dev_err(disk_to_dev(nbd->disk), 1081 "Send disconnect failed %d\n", ret); 1082 mutex_unlock(&nsock->tx_lock); 1083 } 1084 } 1085 1086 static int nbd_disconnect(struct nbd_device *nbd) 1087 { 1088 struct nbd_config *config = nbd->config; 1089 1090 dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); 1091 set_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags); 1092 send_disconnects(nbd); 1093 return 0; 1094 } 1095 1096 static void nbd_clear_sock(struct nbd_device *nbd) 1097 { 1098 sock_shutdown(nbd); 1099 nbd_clear_que(nbd); 1100 nbd->task_setup = NULL; 1101 } 1102 1103 static void nbd_config_put(struct nbd_device *nbd) 1104 { 1105 if (refcount_dec_and_mutex_lock(&nbd->config_refs, 1106 &nbd->config_lock)) { 1107 struct nbd_config *config = nbd->config; 1108 nbd_dev_dbg_close(nbd); 1109 nbd_size_clear(nbd); 1110 if (test_and_clear_bit(NBD_HAS_PID_FILE, 1111 &config->runtime_flags)) 1112 device_remove_file(disk_to_dev(nbd->disk), &pid_attr); 1113 nbd->task_recv = NULL; 1114 nbd_clear_sock(nbd); 1115 if (config->num_connections) { 1116 int i; 1117 for (i = 0; i < config->num_connections; i++) { 1118 sockfd_put(config->socks[i]->sock); 1119 kfree(config->socks[i]); 1120 } 1121 kfree(config->socks); 1122 } 1123 kfree(nbd->config); 1124 nbd->config = NULL; 1125 1126 nbd->tag_set.timeout = 0; 1127 nbd->disk->queue->limits.discard_granularity = 0; 1128 nbd->disk->queue->limits.discard_alignment = 0; 1129 blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); 1130 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue); 1131 1132 mutex_unlock(&nbd->config_lock); 1133 nbd_put(nbd); 1134 module_put(THIS_MODULE); 1135 } 1136 } 1137 1138 static int nbd_start_device(struct nbd_device *nbd) 1139 { 1140 struct nbd_config *config = nbd->config; 1141 int num_connections = config->num_connections; 1142 int error = 0, i; 1143 1144 if (nbd->task_recv) 1145 return -EBUSY; 1146 if (!config->socks) 1147 return -EINVAL; 1148 if (num_connections > 1 && 1149 !(config->flags & NBD_FLAG_CAN_MULTI_CONN)) { 1150 dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n"); 1151 return -EINVAL; 1152 } 1153 1154 blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections); 1155 nbd->task_recv = current; 1156 1157 nbd_parse_flags(nbd); 1158 1159 error = device_create_file(disk_to_dev(nbd->disk), &pid_attr); 1160 if (error) { 1161 dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); 1162 return error; 1163 } 1164 set_bit(NBD_HAS_PID_FILE, &config->runtime_flags); 1165 1166 nbd_dev_dbg_init(nbd); 1167 for (i = 0; i < num_connections; i++) { 1168 struct recv_thread_args *args; 1169 1170 args = kzalloc(sizeof(*args), GFP_KERNEL); 1171 if (!args) { 1172 sock_shutdown(nbd); 1173 return -ENOMEM; 1174 } 1175 sk_set_memalloc(config->socks[i]->sock->sk); 1176 if (nbd->tag_set.timeout) 1177 config->socks[i]->sock->sk->sk_sndtimeo = 1178 nbd->tag_set.timeout; 1179 atomic_inc(&config->recv_threads); 1180 refcount_inc(&nbd->config_refs); 1181 INIT_WORK(&args->work, recv_work); 1182 args->nbd = nbd; 1183 args->index = i; 1184 queue_work(recv_workqueue, &args->work); 1185 } 1186 nbd_size_update(nbd); 1187 return error; 1188 } 1189 1190 static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev) 1191 { 1192 struct nbd_config *config = nbd->config; 1193 int ret; 1194 1195 ret = nbd_start_device(nbd); 1196 if (ret) 1197 return ret; 1198 1199 if (max_part) 1200 bdev->bd_invalidated = 1; 1201 mutex_unlock(&nbd->config_lock); 1202 ret = wait_event_interruptible(config->recv_wq, 1203 atomic_read(&config->recv_threads) == 0); 1204 if (ret) 1205 sock_shutdown(nbd); 1206 mutex_lock(&nbd->config_lock); 1207 nbd_bdev_reset(bdev); 1208 /* user requested, ignore socket errors */ 1209 if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags)) 1210 ret = 0; 1211 if (test_bit(NBD_TIMEDOUT, &config->runtime_flags)) 1212 ret = -ETIMEDOUT; 1213 return ret; 1214 } 1215 1216 static void nbd_clear_sock_ioctl(struct nbd_device *nbd, 1217 struct block_device *bdev) 1218 { 1219 sock_shutdown(nbd); 1220 kill_bdev(bdev); 1221 nbd_bdev_reset(bdev); 1222 if (test_and_clear_bit(NBD_HAS_CONFIG_REF, 1223 &nbd->config->runtime_flags)) 1224 nbd_config_put(nbd); 1225 } 1226 1227 /* Must be called with config_lock held */ 1228 static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, 1229 unsigned int cmd, unsigned long arg) 1230 { 1231 struct nbd_config *config = nbd->config; 1232 1233 switch (cmd) { 1234 case NBD_DISCONNECT: 1235 return nbd_disconnect(nbd); 1236 case NBD_CLEAR_SOCK: 1237 nbd_clear_sock_ioctl(nbd, bdev); 1238 return 0; 1239 case NBD_SET_SOCK: 1240 return nbd_add_socket(nbd, arg, false); 1241 case NBD_SET_BLKSIZE: 1242 nbd_size_set(nbd, arg, 1243 div_s64(config->bytesize, arg)); 1244 return 0; 1245 case NBD_SET_SIZE: 1246 nbd_size_set(nbd, config->blksize, 1247 div_s64(arg, config->blksize)); 1248 return 0; 1249 case NBD_SET_SIZE_BLOCKS: 1250 nbd_size_set(nbd, config->blksize, arg); 1251 return 0; 1252 case NBD_SET_TIMEOUT: 1253 if (arg) { 1254 nbd->tag_set.timeout = arg * HZ; 1255 blk_queue_rq_timeout(nbd->disk->queue, arg * HZ); 1256 } 1257 return 0; 1258 1259 case NBD_SET_FLAGS: 1260 config->flags = arg; 1261 return 0; 1262 case NBD_DO_IT: 1263 return nbd_start_device_ioctl(nbd, bdev); 1264 case NBD_CLEAR_QUE: 1265 /* 1266 * This is for compatibility only. The queue is always cleared 1267 * by NBD_DO_IT or NBD_CLEAR_SOCK. 1268 */ 1269 return 0; 1270 case NBD_PRINT_DEBUG: 1271 /* 1272 * For compatibility only, we no longer keep a list of 1273 * outstanding requests. 1274 */ 1275 return 0; 1276 } 1277 return -ENOTTY; 1278 } 1279 1280 static int nbd_ioctl(struct block_device *bdev, fmode_t mode, 1281 unsigned int cmd, unsigned long arg) 1282 { 1283 struct nbd_device *nbd = bdev->bd_disk->private_data; 1284 struct nbd_config *config = nbd->config; 1285 int error = -EINVAL; 1286 1287 if (!capable(CAP_SYS_ADMIN)) 1288 return -EPERM; 1289 1290 /* The block layer will pass back some non-nbd ioctls in case we have 1291 * special handling for them, but we don't so just return an error. 1292 */ 1293 if (_IOC_TYPE(cmd) != 0xab) 1294 return -EINVAL; 1295 1296 mutex_lock(&nbd->config_lock); 1297 1298 /* Don't allow ioctl operations on a nbd device that was created with 1299 * netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine. 1300 */ 1301 if (!test_bit(NBD_BOUND, &config->runtime_flags) || 1302 (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK)) 1303 error = __nbd_ioctl(bdev, nbd, cmd, arg); 1304 else 1305 dev_err(nbd_to_dev(nbd), "Cannot use ioctl interface on a netlink controlled device.\n"); 1306 mutex_unlock(&nbd->config_lock); 1307 return error; 1308 } 1309 1310 static struct nbd_config *nbd_alloc_config(void) 1311 { 1312 struct nbd_config *config; 1313 1314 config = kzalloc(sizeof(struct nbd_config), GFP_NOFS); 1315 if (!config) 1316 return NULL; 1317 atomic_set(&config->recv_threads, 0); 1318 init_waitqueue_head(&config->recv_wq); 1319 init_waitqueue_head(&config->conn_wait); 1320 config->blksize = 1024; 1321 atomic_set(&config->live_connections, 0); 1322 try_module_get(THIS_MODULE); 1323 return config; 1324 } 1325 1326 static int nbd_open(struct block_device *bdev, fmode_t mode) 1327 { 1328 struct nbd_device *nbd; 1329 int ret = 0; 1330 1331 mutex_lock(&nbd_index_mutex); 1332 nbd = bdev->bd_disk->private_data; 1333 if (!nbd) { 1334 ret = -ENXIO; 1335 goto out; 1336 } 1337 if (!refcount_inc_not_zero(&nbd->refs)) { 1338 ret = -ENXIO; 1339 goto out; 1340 } 1341 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1342 struct nbd_config *config; 1343 1344 mutex_lock(&nbd->config_lock); 1345 if (refcount_inc_not_zero(&nbd->config_refs)) { 1346 mutex_unlock(&nbd->config_lock); 1347 goto out; 1348 } 1349 config = nbd->config = nbd_alloc_config(); 1350 if (!config) { 1351 ret = -ENOMEM; 1352 mutex_unlock(&nbd->config_lock); 1353 goto out; 1354 } 1355 refcount_set(&nbd->config_refs, 1); 1356 refcount_inc(&nbd->refs); 1357 mutex_unlock(&nbd->config_lock); 1358 bdev->bd_invalidated = 1; 1359 } else if (nbd_disconnected(nbd->config)) { 1360 bdev->bd_invalidated = 1; 1361 } 1362 out: 1363 mutex_unlock(&nbd_index_mutex); 1364 return ret; 1365 } 1366 1367 static void nbd_release(struct gendisk *disk, fmode_t mode) 1368 { 1369 struct nbd_device *nbd = disk->private_data; 1370 struct block_device *bdev = bdget_disk(disk, 0); 1371 1372 if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && 1373 bdev->bd_openers == 0) 1374 nbd_disconnect_and_put(nbd); 1375 1376 nbd_config_put(nbd); 1377 nbd_put(nbd); 1378 } 1379 1380 static const struct block_device_operations nbd_fops = 1381 { 1382 .owner = THIS_MODULE, 1383 .open = nbd_open, 1384 .release = nbd_release, 1385 .ioctl = nbd_ioctl, 1386 .compat_ioctl = nbd_ioctl, 1387 }; 1388 1389 #if IS_ENABLED(CONFIG_DEBUG_FS) 1390 1391 static int nbd_dbg_tasks_show(struct seq_file *s, void *unused) 1392 { 1393 struct nbd_device *nbd = s->private; 1394 1395 if (nbd->task_recv) 1396 seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv)); 1397 1398 return 0; 1399 } 1400 1401 static int nbd_dbg_tasks_open(struct inode *inode, struct file *file) 1402 { 1403 return single_open(file, nbd_dbg_tasks_show, inode->i_private); 1404 } 1405 1406 static const struct file_operations nbd_dbg_tasks_ops = { 1407 .open = nbd_dbg_tasks_open, 1408 .read = seq_read, 1409 .llseek = seq_lseek, 1410 .release = single_release, 1411 }; 1412 1413 static int nbd_dbg_flags_show(struct seq_file *s, void *unused) 1414 { 1415 struct nbd_device *nbd = s->private; 1416 u32 flags = nbd->config->flags; 1417 1418 seq_printf(s, "Hex: 0x%08x\n\n", flags); 1419 1420 seq_puts(s, "Known flags:\n"); 1421 1422 if (flags & NBD_FLAG_HAS_FLAGS) 1423 seq_puts(s, "NBD_FLAG_HAS_FLAGS\n"); 1424 if (flags & NBD_FLAG_READ_ONLY) 1425 seq_puts(s, "NBD_FLAG_READ_ONLY\n"); 1426 if (flags & NBD_FLAG_SEND_FLUSH) 1427 seq_puts(s, "NBD_FLAG_SEND_FLUSH\n"); 1428 if (flags & NBD_FLAG_SEND_FUA) 1429 seq_puts(s, "NBD_FLAG_SEND_FUA\n"); 1430 if (flags & NBD_FLAG_SEND_TRIM) 1431 seq_puts(s, "NBD_FLAG_SEND_TRIM\n"); 1432 1433 return 0; 1434 } 1435 1436 static int nbd_dbg_flags_open(struct inode *inode, struct file *file) 1437 { 1438 return single_open(file, nbd_dbg_flags_show, inode->i_private); 1439 } 1440 1441 static const struct file_operations nbd_dbg_flags_ops = { 1442 .open = nbd_dbg_flags_open, 1443 .read = seq_read, 1444 .llseek = seq_lseek, 1445 .release = single_release, 1446 }; 1447 1448 static int nbd_dev_dbg_init(struct nbd_device *nbd) 1449 { 1450 struct dentry *dir; 1451 struct nbd_config *config = nbd->config; 1452 1453 if (!nbd_dbg_dir) 1454 return -EIO; 1455 1456 dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir); 1457 if (!dir) { 1458 dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n", 1459 nbd_name(nbd)); 1460 return -EIO; 1461 } 1462 config->dbg_dir = dir; 1463 1464 debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops); 1465 debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize); 1466 debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout); 1467 debugfs_create_u64("blocksize", 0444, dir, &config->blksize); 1468 debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops); 1469 1470 return 0; 1471 } 1472 1473 static void nbd_dev_dbg_close(struct nbd_device *nbd) 1474 { 1475 debugfs_remove_recursive(nbd->config->dbg_dir); 1476 } 1477 1478 static int nbd_dbg_init(void) 1479 { 1480 struct dentry *dbg_dir; 1481 1482 dbg_dir = debugfs_create_dir("nbd", NULL); 1483 if (!dbg_dir) 1484 return -EIO; 1485 1486 nbd_dbg_dir = dbg_dir; 1487 1488 return 0; 1489 } 1490 1491 static void nbd_dbg_close(void) 1492 { 1493 debugfs_remove_recursive(nbd_dbg_dir); 1494 } 1495 1496 #else /* IS_ENABLED(CONFIG_DEBUG_FS) */ 1497 1498 static int nbd_dev_dbg_init(struct nbd_device *nbd) 1499 { 1500 return 0; 1501 } 1502 1503 static void nbd_dev_dbg_close(struct nbd_device *nbd) 1504 { 1505 } 1506 1507 static int nbd_dbg_init(void) 1508 { 1509 return 0; 1510 } 1511 1512 static void nbd_dbg_close(void) 1513 { 1514 } 1515 1516 #endif 1517 1518 static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq, 1519 unsigned int hctx_idx, unsigned int numa_node) 1520 { 1521 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); 1522 cmd->nbd = set->driver_data; 1523 cmd->flags = 0; 1524 mutex_init(&cmd->lock); 1525 return 0; 1526 } 1527 1528 static const struct blk_mq_ops nbd_mq_ops = { 1529 .queue_rq = nbd_queue_rq, 1530 .complete = nbd_complete_rq, 1531 .init_request = nbd_init_request, 1532 .timeout = nbd_xmit_timeout, 1533 }; 1534 1535 static int nbd_dev_add(int index) 1536 { 1537 struct nbd_device *nbd; 1538 struct gendisk *disk; 1539 struct request_queue *q; 1540 int err = -ENOMEM; 1541 1542 nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL); 1543 if (!nbd) 1544 goto out; 1545 1546 disk = alloc_disk(1 << part_shift); 1547 if (!disk) 1548 goto out_free_nbd; 1549 1550 if (index >= 0) { 1551 err = idr_alloc(&nbd_index_idr, nbd, index, index + 1, 1552 GFP_KERNEL); 1553 if (err == -ENOSPC) 1554 err = -EEXIST; 1555 } else { 1556 err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL); 1557 if (err >= 0) 1558 index = err; 1559 } 1560 if (err < 0) 1561 goto out_free_disk; 1562 1563 nbd->index = index; 1564 nbd->disk = disk; 1565 nbd->tag_set.ops = &nbd_mq_ops; 1566 nbd->tag_set.nr_hw_queues = 1; 1567 nbd->tag_set.queue_depth = 128; 1568 nbd->tag_set.numa_node = NUMA_NO_NODE; 1569 nbd->tag_set.cmd_size = sizeof(struct nbd_cmd); 1570 nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | 1571 BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING; 1572 nbd->tag_set.driver_data = nbd; 1573 1574 err = blk_mq_alloc_tag_set(&nbd->tag_set); 1575 if (err) 1576 goto out_free_idr; 1577 1578 q = blk_mq_init_queue(&nbd->tag_set); 1579 if (IS_ERR(q)) { 1580 err = PTR_ERR(q); 1581 goto out_free_tags; 1582 } 1583 disk->queue = q; 1584 1585 /* 1586 * Tell the block layer that we are not a rotational device 1587 */ 1588 blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); 1589 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); 1590 disk->queue->limits.discard_granularity = 0; 1591 disk->queue->limits.discard_alignment = 0; 1592 blk_queue_max_discard_sectors(disk->queue, 0); 1593 blk_queue_max_segment_size(disk->queue, UINT_MAX); 1594 blk_queue_max_segments(disk->queue, USHRT_MAX); 1595 blk_queue_max_hw_sectors(disk->queue, 65536); 1596 disk->queue->limits.max_sectors = 256; 1597 1598 mutex_init(&nbd->config_lock); 1599 refcount_set(&nbd->config_refs, 0); 1600 refcount_set(&nbd->refs, 1); 1601 INIT_LIST_HEAD(&nbd->list); 1602 disk->major = NBD_MAJOR; 1603 disk->first_minor = index << part_shift; 1604 disk->fops = &nbd_fops; 1605 disk->private_data = nbd; 1606 sprintf(disk->disk_name, "nbd%d", index); 1607 add_disk(disk); 1608 nbd_total_devices++; 1609 return index; 1610 1611 out_free_tags: 1612 blk_mq_free_tag_set(&nbd->tag_set); 1613 out_free_idr: 1614 idr_remove(&nbd_index_idr, index); 1615 out_free_disk: 1616 put_disk(disk); 1617 out_free_nbd: 1618 kfree(nbd); 1619 out: 1620 return err; 1621 } 1622 1623 static int find_free_cb(int id, void *ptr, void *data) 1624 { 1625 struct nbd_device *nbd = ptr; 1626 struct nbd_device **found = data; 1627 1628 if (!refcount_read(&nbd->config_refs)) { 1629 *found = nbd; 1630 return 1; 1631 } 1632 return 0; 1633 } 1634 1635 /* Netlink interface. */ 1636 static struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = { 1637 [NBD_ATTR_INDEX] = { .type = NLA_U32 }, 1638 [NBD_ATTR_SIZE_BYTES] = { .type = NLA_U64 }, 1639 [NBD_ATTR_BLOCK_SIZE_BYTES] = { .type = NLA_U64 }, 1640 [NBD_ATTR_TIMEOUT] = { .type = NLA_U64 }, 1641 [NBD_ATTR_SERVER_FLAGS] = { .type = NLA_U64 }, 1642 [NBD_ATTR_CLIENT_FLAGS] = { .type = NLA_U64 }, 1643 [NBD_ATTR_SOCKETS] = { .type = NLA_NESTED}, 1644 [NBD_ATTR_DEAD_CONN_TIMEOUT] = { .type = NLA_U64 }, 1645 [NBD_ATTR_DEVICE_LIST] = { .type = NLA_NESTED}, 1646 }; 1647 1648 static struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = { 1649 [NBD_SOCK_FD] = { .type = NLA_U32 }, 1650 }; 1651 1652 /* We don't use this right now since we don't parse the incoming list, but we 1653 * still want it here so userspace knows what to expect. 1654 */ 1655 static struct nla_policy __attribute__((unused)) 1656 nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = { 1657 [NBD_DEVICE_INDEX] = { .type = NLA_U32 }, 1658 [NBD_DEVICE_CONNECTED] = { .type = NLA_U8 }, 1659 }; 1660 1661 static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) 1662 { 1663 struct nbd_device *nbd = NULL; 1664 struct nbd_config *config; 1665 int index = -1; 1666 int ret; 1667 bool put_dev = false; 1668 1669 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1670 return -EPERM; 1671 1672 if (info->attrs[NBD_ATTR_INDEX]) 1673 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1674 if (!info->attrs[NBD_ATTR_SOCKETS]) { 1675 printk(KERN_ERR "nbd: must specify at least one socket\n"); 1676 return -EINVAL; 1677 } 1678 if (!info->attrs[NBD_ATTR_SIZE_BYTES]) { 1679 printk(KERN_ERR "nbd: must specify a size in bytes for the device\n"); 1680 return -EINVAL; 1681 } 1682 again: 1683 mutex_lock(&nbd_index_mutex); 1684 if (index == -1) { 1685 ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd); 1686 if (ret == 0) { 1687 int new_index; 1688 new_index = nbd_dev_add(-1); 1689 if (new_index < 0) { 1690 mutex_unlock(&nbd_index_mutex); 1691 printk(KERN_ERR "nbd: failed to add new device\n"); 1692 return new_index; 1693 } 1694 nbd = idr_find(&nbd_index_idr, new_index); 1695 } 1696 } else { 1697 nbd = idr_find(&nbd_index_idr, index); 1698 if (!nbd) { 1699 ret = nbd_dev_add(index); 1700 if (ret < 0) { 1701 mutex_unlock(&nbd_index_mutex); 1702 printk(KERN_ERR "nbd: failed to add new device\n"); 1703 return ret; 1704 } 1705 nbd = idr_find(&nbd_index_idr, index); 1706 } 1707 } 1708 if (!nbd) { 1709 printk(KERN_ERR "nbd: couldn't find device at index %d\n", 1710 index); 1711 mutex_unlock(&nbd_index_mutex); 1712 return -EINVAL; 1713 } 1714 if (!refcount_inc_not_zero(&nbd->refs)) { 1715 mutex_unlock(&nbd_index_mutex); 1716 if (index == -1) 1717 goto again; 1718 printk(KERN_ERR "nbd: device at index %d is going down\n", 1719 index); 1720 return -EINVAL; 1721 } 1722 mutex_unlock(&nbd_index_mutex); 1723 1724 mutex_lock(&nbd->config_lock); 1725 if (refcount_read(&nbd->config_refs)) { 1726 mutex_unlock(&nbd->config_lock); 1727 nbd_put(nbd); 1728 if (index == -1) 1729 goto again; 1730 printk(KERN_ERR "nbd: nbd%d already in use\n", index); 1731 return -EBUSY; 1732 } 1733 if (WARN_ON(nbd->config)) { 1734 mutex_unlock(&nbd->config_lock); 1735 nbd_put(nbd); 1736 return -EINVAL; 1737 } 1738 config = nbd->config = nbd_alloc_config(); 1739 if (!nbd->config) { 1740 mutex_unlock(&nbd->config_lock); 1741 nbd_put(nbd); 1742 printk(KERN_ERR "nbd: couldn't allocate config\n"); 1743 return -ENOMEM; 1744 } 1745 refcount_set(&nbd->config_refs, 1); 1746 set_bit(NBD_BOUND, &config->runtime_flags); 1747 1748 if (info->attrs[NBD_ATTR_SIZE_BYTES]) { 1749 u64 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]); 1750 nbd_size_set(nbd, config->blksize, 1751 div64_u64(bytes, config->blksize)); 1752 } 1753 if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) { 1754 u64 bsize = 1755 nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]); 1756 nbd_size_set(nbd, bsize, div64_u64(config->bytesize, bsize)); 1757 } 1758 if (info->attrs[NBD_ATTR_TIMEOUT]) { 1759 u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); 1760 nbd->tag_set.timeout = timeout * HZ; 1761 blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); 1762 } 1763 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { 1764 config->dead_conn_timeout = 1765 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); 1766 config->dead_conn_timeout *= HZ; 1767 } 1768 if (info->attrs[NBD_ATTR_SERVER_FLAGS]) 1769 config->flags = 1770 nla_get_u64(info->attrs[NBD_ATTR_SERVER_FLAGS]); 1771 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { 1772 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); 1773 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { 1774 set_bit(NBD_DESTROY_ON_DISCONNECT, 1775 &config->runtime_flags); 1776 put_dev = true; 1777 } 1778 if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { 1779 set_bit(NBD_DISCONNECT_ON_CLOSE, 1780 &config->runtime_flags); 1781 } 1782 } 1783 1784 if (info->attrs[NBD_ATTR_SOCKETS]) { 1785 struct nlattr *attr; 1786 int rem, fd; 1787 1788 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], 1789 rem) { 1790 struct nlattr *socks[NBD_SOCK_MAX+1]; 1791 1792 if (nla_type(attr) != NBD_SOCK_ITEM) { 1793 printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); 1794 ret = -EINVAL; 1795 goto out; 1796 } 1797 ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, 1798 nbd_sock_policy, info->extack); 1799 if (ret != 0) { 1800 printk(KERN_ERR "nbd: error processing sock list\n"); 1801 ret = -EINVAL; 1802 goto out; 1803 } 1804 if (!socks[NBD_SOCK_FD]) 1805 continue; 1806 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); 1807 ret = nbd_add_socket(nbd, fd, true); 1808 if (ret) 1809 goto out; 1810 } 1811 } 1812 ret = nbd_start_device(nbd); 1813 out: 1814 mutex_unlock(&nbd->config_lock); 1815 if (!ret) { 1816 set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags); 1817 refcount_inc(&nbd->config_refs); 1818 nbd_connect_reply(info, nbd->index); 1819 } 1820 nbd_config_put(nbd); 1821 if (put_dev) 1822 nbd_put(nbd); 1823 return ret; 1824 } 1825 1826 static void nbd_disconnect_and_put(struct nbd_device *nbd) 1827 { 1828 mutex_lock(&nbd->config_lock); 1829 nbd_disconnect(nbd); 1830 nbd_clear_sock(nbd); 1831 mutex_unlock(&nbd->config_lock); 1832 if (test_and_clear_bit(NBD_HAS_CONFIG_REF, 1833 &nbd->config->runtime_flags)) 1834 nbd_config_put(nbd); 1835 } 1836 1837 static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) 1838 { 1839 struct nbd_device *nbd; 1840 int index; 1841 1842 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1843 return -EPERM; 1844 1845 if (!info->attrs[NBD_ATTR_INDEX]) { 1846 printk(KERN_ERR "nbd: must specify an index to disconnect\n"); 1847 return -EINVAL; 1848 } 1849 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1850 mutex_lock(&nbd_index_mutex); 1851 nbd = idr_find(&nbd_index_idr, index); 1852 if (!nbd) { 1853 mutex_unlock(&nbd_index_mutex); 1854 printk(KERN_ERR "nbd: couldn't find device at index %d\n", 1855 index); 1856 return -EINVAL; 1857 } 1858 if (!refcount_inc_not_zero(&nbd->refs)) { 1859 mutex_unlock(&nbd_index_mutex); 1860 printk(KERN_ERR "nbd: device at index %d is going down\n", 1861 index); 1862 return -EINVAL; 1863 } 1864 mutex_unlock(&nbd_index_mutex); 1865 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1866 nbd_put(nbd); 1867 return 0; 1868 } 1869 nbd_disconnect_and_put(nbd); 1870 nbd_config_put(nbd); 1871 nbd_put(nbd); 1872 return 0; 1873 } 1874 1875 static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) 1876 { 1877 struct nbd_device *nbd = NULL; 1878 struct nbd_config *config; 1879 int index; 1880 int ret = 0; 1881 bool put_dev = false; 1882 1883 if (!netlink_capable(skb, CAP_SYS_ADMIN)) 1884 return -EPERM; 1885 1886 if (!info->attrs[NBD_ATTR_INDEX]) { 1887 printk(KERN_ERR "nbd: must specify a device to reconfigure\n"); 1888 return -EINVAL; 1889 } 1890 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 1891 mutex_lock(&nbd_index_mutex); 1892 nbd = idr_find(&nbd_index_idr, index); 1893 if (!nbd) { 1894 mutex_unlock(&nbd_index_mutex); 1895 printk(KERN_ERR "nbd: couldn't find a device at index %d\n", 1896 index); 1897 return -EINVAL; 1898 } 1899 if (!refcount_inc_not_zero(&nbd->refs)) { 1900 mutex_unlock(&nbd_index_mutex); 1901 printk(KERN_ERR "nbd: device at index %d is going down\n", 1902 index); 1903 return -EINVAL; 1904 } 1905 mutex_unlock(&nbd_index_mutex); 1906 1907 if (!refcount_inc_not_zero(&nbd->config_refs)) { 1908 dev_err(nbd_to_dev(nbd), 1909 "not configured, cannot reconfigure\n"); 1910 nbd_put(nbd); 1911 return -EINVAL; 1912 } 1913 1914 mutex_lock(&nbd->config_lock); 1915 config = nbd->config; 1916 if (!test_bit(NBD_BOUND, &config->runtime_flags) || 1917 !nbd->task_recv) { 1918 dev_err(nbd_to_dev(nbd), 1919 "not configured, cannot reconfigure\n"); 1920 ret = -EINVAL; 1921 goto out; 1922 } 1923 1924 if (info->attrs[NBD_ATTR_TIMEOUT]) { 1925 u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); 1926 nbd->tag_set.timeout = timeout * HZ; 1927 blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); 1928 } 1929 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { 1930 config->dead_conn_timeout = 1931 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); 1932 config->dead_conn_timeout *= HZ; 1933 } 1934 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { 1935 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); 1936 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { 1937 if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT, 1938 &config->runtime_flags)) 1939 put_dev = true; 1940 } else { 1941 if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT, 1942 &config->runtime_flags)) 1943 refcount_inc(&nbd->refs); 1944 } 1945 1946 if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { 1947 set_bit(NBD_DISCONNECT_ON_CLOSE, 1948 &config->runtime_flags); 1949 } else { 1950 clear_bit(NBD_DISCONNECT_ON_CLOSE, 1951 &config->runtime_flags); 1952 } 1953 } 1954 1955 if (info->attrs[NBD_ATTR_SOCKETS]) { 1956 struct nlattr *attr; 1957 int rem, fd; 1958 1959 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], 1960 rem) { 1961 struct nlattr *socks[NBD_SOCK_MAX+1]; 1962 1963 if (nla_type(attr) != NBD_SOCK_ITEM) { 1964 printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); 1965 ret = -EINVAL; 1966 goto out; 1967 } 1968 ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, 1969 nbd_sock_policy, info->extack); 1970 if (ret != 0) { 1971 printk(KERN_ERR "nbd: error processing sock list\n"); 1972 ret = -EINVAL; 1973 goto out; 1974 } 1975 if (!socks[NBD_SOCK_FD]) 1976 continue; 1977 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); 1978 ret = nbd_reconnect_socket(nbd, fd); 1979 if (ret) { 1980 if (ret == -ENOSPC) 1981 ret = 0; 1982 goto out; 1983 } 1984 dev_info(nbd_to_dev(nbd), "reconnected socket\n"); 1985 } 1986 } 1987 out: 1988 mutex_unlock(&nbd->config_lock); 1989 nbd_config_put(nbd); 1990 nbd_put(nbd); 1991 if (put_dev) 1992 nbd_put(nbd); 1993 return ret; 1994 } 1995 1996 static const struct genl_ops nbd_connect_genl_ops[] = { 1997 { 1998 .cmd = NBD_CMD_CONNECT, 1999 .policy = nbd_attr_policy, 2000 .doit = nbd_genl_connect, 2001 }, 2002 { 2003 .cmd = NBD_CMD_DISCONNECT, 2004 .policy = nbd_attr_policy, 2005 .doit = nbd_genl_disconnect, 2006 }, 2007 { 2008 .cmd = NBD_CMD_RECONFIGURE, 2009 .policy = nbd_attr_policy, 2010 .doit = nbd_genl_reconfigure, 2011 }, 2012 { 2013 .cmd = NBD_CMD_STATUS, 2014 .policy = nbd_attr_policy, 2015 .doit = nbd_genl_status, 2016 }, 2017 }; 2018 2019 static const struct genl_multicast_group nbd_mcast_grps[] = { 2020 { .name = NBD_GENL_MCAST_GROUP_NAME, }, 2021 }; 2022 2023 static struct genl_family nbd_genl_family __ro_after_init = { 2024 .hdrsize = 0, 2025 .name = NBD_GENL_FAMILY_NAME, 2026 .version = NBD_GENL_VERSION, 2027 .module = THIS_MODULE, 2028 .ops = nbd_connect_genl_ops, 2029 .n_ops = ARRAY_SIZE(nbd_connect_genl_ops), 2030 .maxattr = NBD_ATTR_MAX, 2031 .mcgrps = nbd_mcast_grps, 2032 .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps), 2033 }; 2034 2035 static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply) 2036 { 2037 struct nlattr *dev_opt; 2038 u8 connected = 0; 2039 int ret; 2040 2041 /* This is a little racey, but for status it's ok. The 2042 * reason we don't take a ref here is because we can't 2043 * take a ref in the index == -1 case as we would need 2044 * to put under the nbd_index_mutex, which could 2045 * deadlock if we are configured to remove ourselves 2046 * once we're disconnected. 2047 */ 2048 if (refcount_read(&nbd->config_refs)) 2049 connected = 1; 2050 dev_opt = nla_nest_start(reply, NBD_DEVICE_ITEM); 2051 if (!dev_opt) 2052 return -EMSGSIZE; 2053 ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index); 2054 if (ret) 2055 return -EMSGSIZE; 2056 ret = nla_put_u8(reply, NBD_DEVICE_CONNECTED, 2057 connected); 2058 if (ret) 2059 return -EMSGSIZE; 2060 nla_nest_end(reply, dev_opt); 2061 return 0; 2062 } 2063 2064 static int status_cb(int id, void *ptr, void *data) 2065 { 2066 struct nbd_device *nbd = ptr; 2067 return populate_nbd_status(nbd, (struct sk_buff *)data); 2068 } 2069 2070 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) 2071 { 2072 struct nlattr *dev_list; 2073 struct sk_buff *reply; 2074 void *reply_head; 2075 size_t msg_size; 2076 int index = -1; 2077 int ret = -ENOMEM; 2078 2079 if (info->attrs[NBD_ATTR_INDEX]) 2080 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); 2081 2082 mutex_lock(&nbd_index_mutex); 2083 2084 msg_size = nla_total_size(nla_attr_size(sizeof(u32)) + 2085 nla_attr_size(sizeof(u8))); 2086 msg_size *= (index == -1) ? nbd_total_devices : 1; 2087 2088 reply = genlmsg_new(msg_size, GFP_KERNEL); 2089 if (!reply) 2090 goto out; 2091 reply_head = genlmsg_put_reply(reply, info, &nbd_genl_family, 0, 2092 NBD_CMD_STATUS); 2093 if (!reply_head) { 2094 nlmsg_free(reply); 2095 goto out; 2096 } 2097 2098 dev_list = nla_nest_start(reply, NBD_ATTR_DEVICE_LIST); 2099 if (index == -1) { 2100 ret = idr_for_each(&nbd_index_idr, &status_cb, reply); 2101 if (ret) { 2102 nlmsg_free(reply); 2103 goto out; 2104 } 2105 } else { 2106 struct nbd_device *nbd; 2107 nbd = idr_find(&nbd_index_idr, index); 2108 if (nbd) { 2109 ret = populate_nbd_status(nbd, reply); 2110 if (ret) { 2111 nlmsg_free(reply); 2112 goto out; 2113 } 2114 } 2115 } 2116 nla_nest_end(reply, dev_list); 2117 genlmsg_end(reply, reply_head); 2118 genlmsg_reply(reply, info); 2119 ret = 0; 2120 out: 2121 mutex_unlock(&nbd_index_mutex); 2122 return ret; 2123 } 2124 2125 static void nbd_connect_reply(struct genl_info *info, int index) 2126 { 2127 struct sk_buff *skb; 2128 void *msg_head; 2129 int ret; 2130 2131 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); 2132 if (!skb) 2133 return; 2134 msg_head = genlmsg_put_reply(skb, info, &nbd_genl_family, 0, 2135 NBD_CMD_CONNECT); 2136 if (!msg_head) { 2137 nlmsg_free(skb); 2138 return; 2139 } 2140 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); 2141 if (ret) { 2142 nlmsg_free(skb); 2143 return; 2144 } 2145 genlmsg_end(skb, msg_head); 2146 genlmsg_reply(skb, info); 2147 } 2148 2149 static void nbd_mcast_index(int index) 2150 { 2151 struct sk_buff *skb; 2152 void *msg_head; 2153 int ret; 2154 2155 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); 2156 if (!skb) 2157 return; 2158 msg_head = genlmsg_put(skb, 0, 0, &nbd_genl_family, 0, 2159 NBD_CMD_LINK_DEAD); 2160 if (!msg_head) { 2161 nlmsg_free(skb); 2162 return; 2163 } 2164 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); 2165 if (ret) { 2166 nlmsg_free(skb); 2167 return; 2168 } 2169 genlmsg_end(skb, msg_head); 2170 genlmsg_multicast(&nbd_genl_family, skb, 0, 0, GFP_KERNEL); 2171 } 2172 2173 static void nbd_dead_link_work(struct work_struct *work) 2174 { 2175 struct link_dead_args *args = container_of(work, struct link_dead_args, 2176 work); 2177 nbd_mcast_index(args->index); 2178 kfree(args); 2179 } 2180 2181 static int __init nbd_init(void) 2182 { 2183 int i; 2184 2185 BUILD_BUG_ON(sizeof(struct nbd_request) != 28); 2186 2187 if (max_part < 0) { 2188 printk(KERN_ERR "nbd: max_part must be >= 0\n"); 2189 return -EINVAL; 2190 } 2191 2192 part_shift = 0; 2193 if (max_part > 0) { 2194 part_shift = fls(max_part); 2195 2196 /* 2197 * Adjust max_part according to part_shift as it is exported 2198 * to user space so that user can know the max number of 2199 * partition kernel should be able to manage. 2200 * 2201 * Note that -1 is required because partition 0 is reserved 2202 * for the whole disk. 2203 */ 2204 max_part = (1UL << part_shift) - 1; 2205 } 2206 2207 if ((1UL << part_shift) > DISK_MAX_PARTS) 2208 return -EINVAL; 2209 2210 if (nbds_max > 1UL << (MINORBITS - part_shift)) 2211 return -EINVAL; 2212 recv_workqueue = alloc_workqueue("knbd-recv", 2213 WQ_MEM_RECLAIM | WQ_HIGHPRI | 2214 WQ_UNBOUND, 0); 2215 if (!recv_workqueue) 2216 return -ENOMEM; 2217 2218 if (register_blkdev(NBD_MAJOR, "nbd")) { 2219 destroy_workqueue(recv_workqueue); 2220 return -EIO; 2221 } 2222 2223 if (genl_register_family(&nbd_genl_family)) { 2224 unregister_blkdev(NBD_MAJOR, "nbd"); 2225 destroy_workqueue(recv_workqueue); 2226 return -EINVAL; 2227 } 2228 nbd_dbg_init(); 2229 2230 mutex_lock(&nbd_index_mutex); 2231 for (i = 0; i < nbds_max; i++) 2232 nbd_dev_add(i); 2233 mutex_unlock(&nbd_index_mutex); 2234 return 0; 2235 } 2236 2237 static int nbd_exit_cb(int id, void *ptr, void *data) 2238 { 2239 struct list_head *list = (struct list_head *)data; 2240 struct nbd_device *nbd = ptr; 2241 2242 list_add_tail(&nbd->list, list); 2243 return 0; 2244 } 2245 2246 static void __exit nbd_cleanup(void) 2247 { 2248 struct nbd_device *nbd; 2249 LIST_HEAD(del_list); 2250 2251 nbd_dbg_close(); 2252 2253 mutex_lock(&nbd_index_mutex); 2254 idr_for_each(&nbd_index_idr, &nbd_exit_cb, &del_list); 2255 mutex_unlock(&nbd_index_mutex); 2256 2257 while (!list_empty(&del_list)) { 2258 nbd = list_first_entry(&del_list, struct nbd_device, list); 2259 list_del_init(&nbd->list); 2260 if (refcount_read(&nbd->refs) != 1) 2261 printk(KERN_ERR "nbd: possibly leaking a device\n"); 2262 nbd_put(nbd); 2263 } 2264 2265 idr_destroy(&nbd_index_idr); 2266 genl_unregister_family(&nbd_genl_family); 2267 destroy_workqueue(recv_workqueue); 2268 unregister_blkdev(NBD_MAJOR, "nbd"); 2269 } 2270 2271 module_init(nbd_init); 2272 module_exit(nbd_cleanup); 2273 2274 MODULE_DESCRIPTION("Network Block Device"); 2275 MODULE_LICENSE("GPL"); 2276 2277 module_param(nbds_max, int, 0444); 2278 MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)"); 2279 module_param(max_part, int, 0444); 2280 MODULE_PARM_DESC(max_part, "number of partitions per device (default: 16)"); 2281