1 /* 2 * NVM Express device driver 3 * Copyright (c) 2011-2014, Intel Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 */ 14 15 #include <linux/blkdev.h> 16 #include <linux/blk-mq.h> 17 #include <linux/delay.h> 18 #include <linux/errno.h> 19 #include <linux/hdreg.h> 20 #include <linux/kernel.h> 21 #include <linux/module.h> 22 #include <linux/list_sort.h> 23 #include <linux/slab.h> 24 #include <linux/types.h> 25 #include <linux/pr.h> 26 #include <linux/ptrace.h> 27 #include <linux/nvme_ioctl.h> 28 #include <linux/t10-pi.h> 29 #include <scsi/sg.h> 30 #include <asm/unaligned.h> 31 32 #include "nvme.h" 33 34 #define NVME_MINORS (1U << MINORBITS) 35 36 static int nvme_major; 37 module_param(nvme_major, int, 0); 38 39 static int nvme_char_major; 40 module_param(nvme_char_major, int, 0); 41 42 static LIST_HEAD(nvme_ctrl_list); 43 DEFINE_SPINLOCK(dev_list_lock); 44 45 static struct class *nvme_class; 46 47 static void nvme_free_ns(struct kref *kref) 48 { 49 struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); 50 51 if (ns->type == NVME_NS_LIGHTNVM) 52 nvme_nvm_unregister(ns->queue, ns->disk->disk_name); 53 54 spin_lock(&dev_list_lock); 55 ns->disk->private_data = NULL; 56 spin_unlock(&dev_list_lock); 57 58 put_disk(ns->disk); 59 ida_simple_remove(&ns->ctrl->ns_ida, ns->instance); 60 nvme_put_ctrl(ns->ctrl); 61 kfree(ns); 62 } 63 64 static void nvme_put_ns(struct nvme_ns *ns) 65 { 66 kref_put(&ns->kref, nvme_free_ns); 67 } 68 69 static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk) 70 { 71 struct nvme_ns *ns; 72 73 spin_lock(&dev_list_lock); 74 ns = disk->private_data; 75 if (ns && !kref_get_unless_zero(&ns->kref)) 76 ns = NULL; 77 spin_unlock(&dev_list_lock); 78 79 return ns; 80 } 81 82 void nvme_requeue_req(struct request *req) 83 { 84 unsigned long flags; 85 86 blk_mq_requeue_request(req); 87 spin_lock_irqsave(req->q->queue_lock, flags); 88 if (!blk_queue_stopped(req->q)) 89 blk_mq_kick_requeue_list(req->q); 90 spin_unlock_irqrestore(req->q->queue_lock, flags); 91 } 92 93 struct request *nvme_alloc_request(struct request_queue *q, 94 struct nvme_command *cmd, unsigned int flags) 95 { 96 bool write = cmd->common.opcode & 1; 97 struct request *req; 98 99 req = blk_mq_alloc_request(q, write, flags); 100 if (IS_ERR(req)) 101 return req; 102 103 req->cmd_type = REQ_TYPE_DRV_PRIV; 104 req->cmd_flags |= REQ_FAILFAST_DRIVER; 105 req->__data_len = 0; 106 req->__sector = (sector_t) -1; 107 req->bio = req->biotail = NULL; 108 109 req->cmd = (unsigned char *)cmd; 110 req->cmd_len = sizeof(struct nvme_command); 111 req->special = (void *)0; 112 113 return req; 114 } 115 116 /* 117 * Returns 0 on success. If the result is negative, it's a Linux error code; 118 * if the result is positive, it's an NVM Express status code 119 */ 120 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, 121 void *buffer, unsigned bufflen, u32 *result, unsigned timeout) 122 { 123 struct request *req; 124 int ret; 125 126 req = nvme_alloc_request(q, cmd, 0); 127 if (IS_ERR(req)) 128 return PTR_ERR(req); 129 130 req->timeout = timeout ? timeout : ADMIN_TIMEOUT; 131 132 if (buffer && bufflen) { 133 ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL); 134 if (ret) 135 goto out; 136 } 137 138 blk_execute_rq(req->q, NULL, req, 0); 139 if (result) 140 *result = (u32)(uintptr_t)req->special; 141 ret = req->errors; 142 out: 143 blk_mq_free_request(req); 144 return ret; 145 } 146 147 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, 148 void *buffer, unsigned bufflen) 149 { 150 return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0); 151 } 152 153 int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, 154 void __user *ubuffer, unsigned bufflen, 155 void __user *meta_buffer, unsigned meta_len, u32 meta_seed, 156 u32 *result, unsigned timeout) 157 { 158 bool write = cmd->common.opcode & 1; 159 struct nvme_ns *ns = q->queuedata; 160 struct gendisk *disk = ns ? ns->disk : NULL; 161 struct request *req; 162 struct bio *bio = NULL; 163 void *meta = NULL; 164 int ret; 165 166 req = nvme_alloc_request(q, cmd, 0); 167 if (IS_ERR(req)) 168 return PTR_ERR(req); 169 170 req->timeout = timeout ? timeout : ADMIN_TIMEOUT; 171 172 if (ubuffer && bufflen) { 173 ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, 174 GFP_KERNEL); 175 if (ret) 176 goto out; 177 bio = req->bio; 178 179 if (!disk) 180 goto submit; 181 bio->bi_bdev = bdget_disk(disk, 0); 182 if (!bio->bi_bdev) { 183 ret = -ENODEV; 184 goto out_unmap; 185 } 186 187 if (meta_buffer && meta_len) { 188 struct bio_integrity_payload *bip; 189 190 meta = kmalloc(meta_len, GFP_KERNEL); 191 if (!meta) { 192 ret = -ENOMEM; 193 goto out_unmap; 194 } 195 196 if (write) { 197 if (copy_from_user(meta, meta_buffer, 198 meta_len)) { 199 ret = -EFAULT; 200 goto out_free_meta; 201 } 202 } 203 204 bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); 205 if (IS_ERR(bip)) { 206 ret = PTR_ERR(bip); 207 goto out_free_meta; 208 } 209 210 bip->bip_iter.bi_size = meta_len; 211 bip->bip_iter.bi_sector = meta_seed; 212 213 ret = bio_integrity_add_page(bio, virt_to_page(meta), 214 meta_len, offset_in_page(meta)); 215 if (ret != meta_len) { 216 ret = -ENOMEM; 217 goto out_free_meta; 218 } 219 } 220 } 221 submit: 222 blk_execute_rq(req->q, disk, req, 0); 223 ret = req->errors; 224 if (result) 225 *result = (u32)(uintptr_t)req->special; 226 if (meta && !ret && !write) { 227 if (copy_to_user(meta_buffer, meta, meta_len)) 228 ret = -EFAULT; 229 } 230 out_free_meta: 231 kfree(meta); 232 out_unmap: 233 if (bio) { 234 if (disk && bio->bi_bdev) 235 bdput(bio->bi_bdev); 236 blk_rq_unmap_user(bio); 237 } 238 out: 239 blk_mq_free_request(req); 240 return ret; 241 } 242 243 int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, 244 void __user *ubuffer, unsigned bufflen, u32 *result, 245 unsigned timeout) 246 { 247 return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0, 248 result, timeout); 249 } 250 251 int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) 252 { 253 struct nvme_command c = { }; 254 int error; 255 256 /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ 257 c.identify.opcode = nvme_admin_identify; 258 c.identify.cns = cpu_to_le32(1); 259 260 *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL); 261 if (!*id) 262 return -ENOMEM; 263 264 error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, 265 sizeof(struct nvme_id_ctrl)); 266 if (error) 267 kfree(*id); 268 return error; 269 } 270 271 static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list) 272 { 273 struct nvme_command c = { }; 274 275 c.identify.opcode = nvme_admin_identify; 276 c.identify.cns = cpu_to_le32(2); 277 c.identify.nsid = cpu_to_le32(nsid); 278 return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000); 279 } 280 281 int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, 282 struct nvme_id_ns **id) 283 { 284 struct nvme_command c = { }; 285 int error; 286 287 /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ 288 c.identify.opcode = nvme_admin_identify, 289 c.identify.nsid = cpu_to_le32(nsid), 290 291 *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL); 292 if (!*id) 293 return -ENOMEM; 294 295 error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, 296 sizeof(struct nvme_id_ns)); 297 if (error) 298 kfree(*id); 299 return error; 300 } 301 302 int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, 303 dma_addr_t dma_addr, u32 *result) 304 { 305 struct nvme_command c; 306 307 memset(&c, 0, sizeof(c)); 308 c.features.opcode = nvme_admin_get_features; 309 c.features.nsid = cpu_to_le32(nsid); 310 c.features.prp1 = cpu_to_le64(dma_addr); 311 c.features.fid = cpu_to_le32(fid); 312 313 return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0); 314 } 315 316 int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, 317 dma_addr_t dma_addr, u32 *result) 318 { 319 struct nvme_command c; 320 321 memset(&c, 0, sizeof(c)); 322 c.features.opcode = nvme_admin_set_features; 323 c.features.prp1 = cpu_to_le64(dma_addr); 324 c.features.fid = cpu_to_le32(fid); 325 c.features.dword11 = cpu_to_le32(dword11); 326 327 return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0); 328 } 329 330 int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log) 331 { 332 struct nvme_command c = { }; 333 int error; 334 335 c.common.opcode = nvme_admin_get_log_page, 336 c.common.nsid = cpu_to_le32(0xFFFFFFFF), 337 c.common.cdw10[0] = cpu_to_le32( 338 (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) | 339 NVME_LOG_SMART), 340 341 *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL); 342 if (!*log) 343 return -ENOMEM; 344 345 error = nvme_submit_sync_cmd(dev->admin_q, &c, *log, 346 sizeof(struct nvme_smart_log)); 347 if (error) 348 kfree(*log); 349 return error; 350 } 351 352 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) 353 { 354 u32 q_count = (*count - 1) | ((*count - 1) << 16); 355 u32 result; 356 int status, nr_io_queues; 357 358 status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, 0, 359 &result); 360 if (status) 361 return status; 362 363 nr_io_queues = min(result & 0xffff, result >> 16) + 1; 364 *count = min(*count, nr_io_queues); 365 return 0; 366 } 367 368 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 369 { 370 struct nvme_user_io io; 371 struct nvme_command c; 372 unsigned length, meta_len; 373 void __user *metadata; 374 375 if (copy_from_user(&io, uio, sizeof(io))) 376 return -EFAULT; 377 if (io.flags) 378 return -EINVAL; 379 380 switch (io.opcode) { 381 case nvme_cmd_write: 382 case nvme_cmd_read: 383 case nvme_cmd_compare: 384 break; 385 default: 386 return -EINVAL; 387 } 388 389 length = (io.nblocks + 1) << ns->lba_shift; 390 meta_len = (io.nblocks + 1) * ns->ms; 391 metadata = (void __user *)(uintptr_t)io.metadata; 392 393 if (ns->ext) { 394 length += meta_len; 395 meta_len = 0; 396 } else if (meta_len) { 397 if ((io.metadata & 3) || !io.metadata) 398 return -EINVAL; 399 } 400 401 memset(&c, 0, sizeof(c)); 402 c.rw.opcode = io.opcode; 403 c.rw.flags = io.flags; 404 c.rw.nsid = cpu_to_le32(ns->ns_id); 405 c.rw.slba = cpu_to_le64(io.slba); 406 c.rw.length = cpu_to_le16(io.nblocks); 407 c.rw.control = cpu_to_le16(io.control); 408 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); 409 c.rw.reftag = cpu_to_le32(io.reftag); 410 c.rw.apptag = cpu_to_le16(io.apptag); 411 c.rw.appmask = cpu_to_le16(io.appmask); 412 413 return __nvme_submit_user_cmd(ns->queue, &c, 414 (void __user *)(uintptr_t)io.addr, length, 415 metadata, meta_len, io.slba, NULL, 0); 416 } 417 418 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 419 struct nvme_passthru_cmd __user *ucmd) 420 { 421 struct nvme_passthru_cmd cmd; 422 struct nvme_command c; 423 unsigned timeout = 0; 424 int status; 425 426 if (!capable(CAP_SYS_ADMIN)) 427 return -EACCES; 428 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 429 return -EFAULT; 430 if (cmd.flags) 431 return -EINVAL; 432 433 memset(&c, 0, sizeof(c)); 434 c.common.opcode = cmd.opcode; 435 c.common.flags = cmd.flags; 436 c.common.nsid = cpu_to_le32(cmd.nsid); 437 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 438 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 439 c.common.cdw10[0] = cpu_to_le32(cmd.cdw10); 440 c.common.cdw10[1] = cpu_to_le32(cmd.cdw11); 441 c.common.cdw10[2] = cpu_to_le32(cmd.cdw12); 442 c.common.cdw10[3] = cpu_to_le32(cmd.cdw13); 443 c.common.cdw10[4] = cpu_to_le32(cmd.cdw14); 444 c.common.cdw10[5] = cpu_to_le32(cmd.cdw15); 445 446 if (cmd.timeout_ms) 447 timeout = msecs_to_jiffies(cmd.timeout_ms); 448 449 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, 450 (void __user *)(uintptr_t)cmd.addr, cmd.data_len, 451 &cmd.result, timeout); 452 if (status >= 0) { 453 if (put_user(cmd.result, &ucmd->result)) 454 return -EFAULT; 455 } 456 457 return status; 458 } 459 460 static int nvme_ioctl(struct block_device *bdev, fmode_t mode, 461 unsigned int cmd, unsigned long arg) 462 { 463 struct nvme_ns *ns = bdev->bd_disk->private_data; 464 465 switch (cmd) { 466 case NVME_IOCTL_ID: 467 force_successful_syscall_return(); 468 return ns->ns_id; 469 case NVME_IOCTL_ADMIN_CMD: 470 return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg); 471 case NVME_IOCTL_IO_CMD: 472 return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg); 473 case NVME_IOCTL_SUBMIT_IO: 474 return nvme_submit_io(ns, (void __user *)arg); 475 #ifdef CONFIG_BLK_DEV_NVME_SCSI 476 case SG_GET_VERSION_NUM: 477 return nvme_sg_get_version_num((void __user *)arg); 478 case SG_IO: 479 return nvme_sg_io(ns, (void __user *)arg); 480 #endif 481 default: 482 return -ENOTTY; 483 } 484 } 485 486 #ifdef CONFIG_COMPAT 487 static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, 488 unsigned int cmd, unsigned long arg) 489 { 490 switch (cmd) { 491 case SG_IO: 492 return -ENOIOCTLCMD; 493 } 494 return nvme_ioctl(bdev, mode, cmd, arg); 495 } 496 #else 497 #define nvme_compat_ioctl NULL 498 #endif 499 500 static int nvme_open(struct block_device *bdev, fmode_t mode) 501 { 502 return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO; 503 } 504 505 static void nvme_release(struct gendisk *disk, fmode_t mode) 506 { 507 nvme_put_ns(disk->private_data); 508 } 509 510 static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) 511 { 512 /* some standard values */ 513 geo->heads = 1 << 6; 514 geo->sectors = 1 << 5; 515 geo->cylinders = get_capacity(bdev->bd_disk) >> 11; 516 return 0; 517 } 518 519 #ifdef CONFIG_BLK_DEV_INTEGRITY 520 static void nvme_init_integrity(struct nvme_ns *ns) 521 { 522 struct blk_integrity integrity; 523 524 switch (ns->pi_type) { 525 case NVME_NS_DPS_PI_TYPE3: 526 integrity.profile = &t10_pi_type3_crc; 527 break; 528 case NVME_NS_DPS_PI_TYPE1: 529 case NVME_NS_DPS_PI_TYPE2: 530 integrity.profile = &t10_pi_type1_crc; 531 break; 532 default: 533 integrity.profile = NULL; 534 break; 535 } 536 integrity.tuple_size = ns->ms; 537 blk_integrity_register(ns->disk, &integrity); 538 blk_queue_max_integrity_segments(ns->queue, 1); 539 } 540 #else 541 static void nvme_init_integrity(struct nvme_ns *ns) 542 { 543 } 544 #endif /* CONFIG_BLK_DEV_INTEGRITY */ 545 546 static void nvme_config_discard(struct nvme_ns *ns) 547 { 548 u32 logical_block_size = queue_logical_block_size(ns->queue); 549 ns->queue->limits.discard_zeroes_data = 0; 550 ns->queue->limits.discard_alignment = logical_block_size; 551 ns->queue->limits.discard_granularity = logical_block_size; 552 blk_queue_max_discard_sectors(ns->queue, 0xffffffff); 553 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); 554 } 555 556 static int nvme_revalidate_disk(struct gendisk *disk) 557 { 558 struct nvme_ns *ns = disk->private_data; 559 struct nvme_id_ns *id; 560 u8 lbaf, pi_type; 561 u16 old_ms; 562 unsigned short bs; 563 564 if (test_bit(NVME_NS_DEAD, &ns->flags)) { 565 set_capacity(disk, 0); 566 return -ENODEV; 567 } 568 if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) { 569 dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n", 570 __func__, ns->ctrl->instance, ns->ns_id); 571 return -ENODEV; 572 } 573 if (id->ncap == 0) { 574 kfree(id); 575 return -ENODEV; 576 } 577 578 if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) { 579 if (nvme_nvm_register(ns->queue, disk->disk_name)) { 580 dev_warn(ns->ctrl->dev, 581 "%s: LightNVM init failure\n", __func__); 582 kfree(id); 583 return -ENODEV; 584 } 585 ns->type = NVME_NS_LIGHTNVM; 586 } 587 588 if (ns->ctrl->vs >= NVME_VS(1, 1)) 589 memcpy(ns->eui, id->eui64, sizeof(ns->eui)); 590 if (ns->ctrl->vs >= NVME_VS(1, 2)) 591 memcpy(ns->uuid, id->nguid, sizeof(ns->uuid)); 592 593 old_ms = ns->ms; 594 lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; 595 ns->lba_shift = id->lbaf[lbaf].ds; 596 ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); 597 ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); 598 599 /* 600 * If identify namespace failed, use default 512 byte block size so 601 * block layer can use before failing read/write for 0 capacity. 602 */ 603 if (ns->lba_shift == 0) 604 ns->lba_shift = 9; 605 bs = 1 << ns->lba_shift; 606 /* XXX: PI implementation requires metadata equal t10 pi tuple size */ 607 pi_type = ns->ms == sizeof(struct t10_pi_tuple) ? 608 id->dps & NVME_NS_DPS_PI_MASK : 0; 609 610 blk_mq_freeze_queue(disk->queue); 611 if (blk_get_integrity(disk) && (ns->pi_type != pi_type || 612 ns->ms != old_ms || 613 bs != queue_logical_block_size(disk->queue) || 614 (ns->ms && ns->ext))) 615 blk_integrity_unregister(disk); 616 617 ns->pi_type = pi_type; 618 blk_queue_logical_block_size(ns->queue, bs); 619 620 if (ns->ms && !blk_get_integrity(disk) && !ns->ext) 621 nvme_init_integrity(ns); 622 if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk)) 623 set_capacity(disk, 0); 624 else 625 set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); 626 627 if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM) 628 nvme_config_discard(ns); 629 blk_mq_unfreeze_queue(disk->queue); 630 631 kfree(id); 632 return 0; 633 } 634 635 static char nvme_pr_type(enum pr_type type) 636 { 637 switch (type) { 638 case PR_WRITE_EXCLUSIVE: 639 return 1; 640 case PR_EXCLUSIVE_ACCESS: 641 return 2; 642 case PR_WRITE_EXCLUSIVE_REG_ONLY: 643 return 3; 644 case PR_EXCLUSIVE_ACCESS_REG_ONLY: 645 return 4; 646 case PR_WRITE_EXCLUSIVE_ALL_REGS: 647 return 5; 648 case PR_EXCLUSIVE_ACCESS_ALL_REGS: 649 return 6; 650 default: 651 return 0; 652 } 653 }; 654 655 static int nvme_pr_command(struct block_device *bdev, u32 cdw10, 656 u64 key, u64 sa_key, u8 op) 657 { 658 struct nvme_ns *ns = bdev->bd_disk->private_data; 659 struct nvme_command c; 660 u8 data[16] = { 0, }; 661 662 put_unaligned_le64(key, &data[0]); 663 put_unaligned_le64(sa_key, &data[8]); 664 665 memset(&c, 0, sizeof(c)); 666 c.common.opcode = op; 667 c.common.nsid = cpu_to_le32(ns->ns_id); 668 c.common.cdw10[0] = cpu_to_le32(cdw10); 669 670 return nvme_submit_sync_cmd(ns->queue, &c, data, 16); 671 } 672 673 static int nvme_pr_register(struct block_device *bdev, u64 old, 674 u64 new, unsigned flags) 675 { 676 u32 cdw10; 677 678 if (flags & ~PR_FL_IGNORE_KEY) 679 return -EOPNOTSUPP; 680 681 cdw10 = old ? 2 : 0; 682 cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0; 683 cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */ 684 return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register); 685 } 686 687 static int nvme_pr_reserve(struct block_device *bdev, u64 key, 688 enum pr_type type, unsigned flags) 689 { 690 u32 cdw10; 691 692 if (flags & ~PR_FL_IGNORE_KEY) 693 return -EOPNOTSUPP; 694 695 cdw10 = nvme_pr_type(type) << 8; 696 cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0); 697 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire); 698 } 699 700 static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, 701 enum pr_type type, bool abort) 702 { 703 u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1; 704 return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); 705 } 706 707 static int nvme_pr_clear(struct block_device *bdev, u64 key) 708 { 709 u32 cdw10 = 1 | (key ? 1 << 3 : 0); 710 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register); 711 } 712 713 static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) 714 { 715 u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0; 716 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); 717 } 718 719 static const struct pr_ops nvme_pr_ops = { 720 .pr_register = nvme_pr_register, 721 .pr_reserve = nvme_pr_reserve, 722 .pr_release = nvme_pr_release, 723 .pr_preempt = nvme_pr_preempt, 724 .pr_clear = nvme_pr_clear, 725 }; 726 727 static const struct block_device_operations nvme_fops = { 728 .owner = THIS_MODULE, 729 .ioctl = nvme_ioctl, 730 .compat_ioctl = nvme_compat_ioctl, 731 .open = nvme_open, 732 .release = nvme_release, 733 .getgeo = nvme_getgeo, 734 .revalidate_disk= nvme_revalidate_disk, 735 .pr_ops = &nvme_pr_ops, 736 }; 737 738 static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) 739 { 740 unsigned long timeout = 741 ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; 742 u32 csts, bit = enabled ? NVME_CSTS_RDY : 0; 743 int ret; 744 745 while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) { 746 if ((csts & NVME_CSTS_RDY) == bit) 747 break; 748 749 msleep(100); 750 if (fatal_signal_pending(current)) 751 return -EINTR; 752 if (time_after(jiffies, timeout)) { 753 dev_err(ctrl->dev, 754 "Device not ready; aborting %s\n", enabled ? 755 "initialisation" : "reset"); 756 return -ENODEV; 757 } 758 } 759 760 return ret; 761 } 762 763 /* 764 * If the device has been passed off to us in an enabled state, just clear 765 * the enabled bit. The spec says we should set the 'shutdown notification 766 * bits', but doing so may cause the device to complete commands to the 767 * admin queue ... and we don't know what memory that might be pointing at! 768 */ 769 int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap) 770 { 771 int ret; 772 773 ctrl->ctrl_config &= ~NVME_CC_SHN_MASK; 774 ctrl->ctrl_config &= ~NVME_CC_ENABLE; 775 776 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); 777 if (ret) 778 return ret; 779 return nvme_wait_ready(ctrl, cap, false); 780 } 781 782 int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap) 783 { 784 /* 785 * Default to a 4K page size, with the intention to update this 786 * path in the future to accomodate architectures with differing 787 * kernel and IO page sizes. 788 */ 789 unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12, page_shift = 12; 790 int ret; 791 792 if (page_shift < dev_page_min) { 793 dev_err(ctrl->dev, 794 "Minimum device page size %u too large for host (%u)\n", 795 1 << dev_page_min, 1 << page_shift); 796 return -ENODEV; 797 } 798 799 ctrl->page_size = 1 << page_shift; 800 801 ctrl->ctrl_config = NVME_CC_CSS_NVM; 802 ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; 803 ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE; 804 ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; 805 ctrl->ctrl_config |= NVME_CC_ENABLE; 806 807 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); 808 if (ret) 809 return ret; 810 return nvme_wait_ready(ctrl, cap, true); 811 } 812 813 int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl) 814 { 815 unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies; 816 u32 csts; 817 int ret; 818 819 ctrl->ctrl_config &= ~NVME_CC_SHN_MASK; 820 ctrl->ctrl_config |= NVME_CC_SHN_NORMAL; 821 822 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); 823 if (ret) 824 return ret; 825 826 while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) { 827 if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT) 828 break; 829 830 msleep(100); 831 if (fatal_signal_pending(current)) 832 return -EINTR; 833 if (time_after(jiffies, timeout)) { 834 dev_err(ctrl->dev, 835 "Device shutdown incomplete; abort shutdown\n"); 836 return -ENODEV; 837 } 838 } 839 840 return ret; 841 } 842 843 static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, 844 struct request_queue *q) 845 { 846 if (ctrl->max_hw_sectors) { 847 u32 max_segments = 848 (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1; 849 850 blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); 851 blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); 852 } 853 if (ctrl->stripe_size) 854 blk_queue_chunk_sectors(q, ctrl->stripe_size >> 9); 855 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) 856 blk_queue_flush(q, REQ_FLUSH | REQ_FUA); 857 blk_queue_virt_boundary(q, ctrl->page_size - 1); 858 } 859 860 /* 861 * Initialize the cached copies of the Identify data and various controller 862 * register in our nvme_ctrl structure. This should be called as soon as 863 * the admin queue is fully up and running. 864 */ 865 int nvme_init_identify(struct nvme_ctrl *ctrl) 866 { 867 struct nvme_id_ctrl *id; 868 u64 cap; 869 int ret, page_shift; 870 871 ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs); 872 if (ret) { 873 dev_err(ctrl->dev, "Reading VS failed (%d)\n", ret); 874 return ret; 875 } 876 877 ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap); 878 if (ret) { 879 dev_err(ctrl->dev, "Reading CAP failed (%d)\n", ret); 880 return ret; 881 } 882 page_shift = NVME_CAP_MPSMIN(cap) + 12; 883 884 if (ctrl->vs >= NVME_VS(1, 1)) 885 ctrl->subsystem = NVME_CAP_NSSRC(cap); 886 887 ret = nvme_identify_ctrl(ctrl, &id); 888 if (ret) { 889 dev_err(ctrl->dev, "Identify Controller failed (%d)\n", ret); 890 return -EIO; 891 } 892 893 ctrl->oncs = le16_to_cpup(&id->oncs); 894 atomic_set(&ctrl->abort_limit, id->acl + 1); 895 ctrl->vwc = id->vwc; 896 memcpy(ctrl->serial, id->sn, sizeof(id->sn)); 897 memcpy(ctrl->model, id->mn, sizeof(id->mn)); 898 memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr)); 899 if (id->mdts) 900 ctrl->max_hw_sectors = 1 << (id->mdts + page_shift - 9); 901 else 902 ctrl->max_hw_sectors = UINT_MAX; 903 904 if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) { 905 unsigned int max_hw_sectors; 906 907 ctrl->stripe_size = 1 << (id->vs[3] + page_shift); 908 max_hw_sectors = ctrl->stripe_size >> (page_shift - 9); 909 if (ctrl->max_hw_sectors) { 910 ctrl->max_hw_sectors = min(max_hw_sectors, 911 ctrl->max_hw_sectors); 912 } else { 913 ctrl->max_hw_sectors = max_hw_sectors; 914 } 915 } 916 917 nvme_set_queue_limits(ctrl, ctrl->admin_q); 918 919 kfree(id); 920 return 0; 921 } 922 923 static int nvme_dev_open(struct inode *inode, struct file *file) 924 { 925 struct nvme_ctrl *ctrl; 926 int instance = iminor(inode); 927 int ret = -ENODEV; 928 929 spin_lock(&dev_list_lock); 930 list_for_each_entry(ctrl, &nvme_ctrl_list, node) { 931 if (ctrl->instance != instance) 932 continue; 933 934 if (!ctrl->admin_q) { 935 ret = -EWOULDBLOCK; 936 break; 937 } 938 if (!kref_get_unless_zero(&ctrl->kref)) 939 break; 940 file->private_data = ctrl; 941 ret = 0; 942 break; 943 } 944 spin_unlock(&dev_list_lock); 945 946 return ret; 947 } 948 949 static int nvme_dev_release(struct inode *inode, struct file *file) 950 { 951 nvme_put_ctrl(file->private_data); 952 return 0; 953 } 954 955 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) 956 { 957 struct nvme_ns *ns; 958 int ret; 959 960 mutex_lock(&ctrl->namespaces_mutex); 961 if (list_empty(&ctrl->namespaces)) { 962 ret = -ENOTTY; 963 goto out_unlock; 964 } 965 966 ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list); 967 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { 968 dev_warn(ctrl->dev, 969 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); 970 ret = -EINVAL; 971 goto out_unlock; 972 } 973 974 dev_warn(ctrl->dev, 975 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); 976 kref_get(&ns->kref); 977 mutex_unlock(&ctrl->namespaces_mutex); 978 979 ret = nvme_user_cmd(ctrl, ns, argp); 980 nvme_put_ns(ns); 981 return ret; 982 983 out_unlock: 984 mutex_unlock(&ctrl->namespaces_mutex); 985 return ret; 986 } 987 988 static long nvme_dev_ioctl(struct file *file, unsigned int cmd, 989 unsigned long arg) 990 { 991 struct nvme_ctrl *ctrl = file->private_data; 992 void __user *argp = (void __user *)arg; 993 994 switch (cmd) { 995 case NVME_IOCTL_ADMIN_CMD: 996 return nvme_user_cmd(ctrl, NULL, argp); 997 case NVME_IOCTL_IO_CMD: 998 return nvme_dev_user_cmd(ctrl, argp); 999 case NVME_IOCTL_RESET: 1000 dev_warn(ctrl->dev, "resetting controller\n"); 1001 return ctrl->ops->reset_ctrl(ctrl); 1002 case NVME_IOCTL_SUBSYS_RESET: 1003 return nvme_reset_subsystem(ctrl); 1004 default: 1005 return -ENOTTY; 1006 } 1007 } 1008 1009 static const struct file_operations nvme_dev_fops = { 1010 .owner = THIS_MODULE, 1011 .open = nvme_dev_open, 1012 .release = nvme_dev_release, 1013 .unlocked_ioctl = nvme_dev_ioctl, 1014 .compat_ioctl = nvme_dev_ioctl, 1015 }; 1016 1017 static ssize_t nvme_sysfs_reset(struct device *dev, 1018 struct device_attribute *attr, const char *buf, 1019 size_t count) 1020 { 1021 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); 1022 int ret; 1023 1024 ret = ctrl->ops->reset_ctrl(ctrl); 1025 if (ret < 0) 1026 return ret; 1027 return count; 1028 } 1029 static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset); 1030 1031 static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, 1032 char *buf) 1033 { 1034 struct nvme_ns *ns = dev_to_disk(dev)->private_data; 1035 return sprintf(buf, "%pU\n", ns->uuid); 1036 } 1037 static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL); 1038 1039 static ssize_t eui_show(struct device *dev, struct device_attribute *attr, 1040 char *buf) 1041 { 1042 struct nvme_ns *ns = dev_to_disk(dev)->private_data; 1043 return sprintf(buf, "%8phd\n", ns->eui); 1044 } 1045 static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL); 1046 1047 static ssize_t nsid_show(struct device *dev, struct device_attribute *attr, 1048 char *buf) 1049 { 1050 struct nvme_ns *ns = dev_to_disk(dev)->private_data; 1051 return sprintf(buf, "%d\n", ns->ns_id); 1052 } 1053 static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL); 1054 1055 static struct attribute *nvme_ns_attrs[] = { 1056 &dev_attr_uuid.attr, 1057 &dev_attr_eui.attr, 1058 &dev_attr_nsid.attr, 1059 NULL, 1060 }; 1061 1062 static umode_t nvme_attrs_are_visible(struct kobject *kobj, 1063 struct attribute *a, int n) 1064 { 1065 struct device *dev = container_of(kobj, struct device, kobj); 1066 struct nvme_ns *ns = dev_to_disk(dev)->private_data; 1067 1068 if (a == &dev_attr_uuid.attr) { 1069 if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid))) 1070 return 0; 1071 } 1072 if (a == &dev_attr_eui.attr) { 1073 if (!memchr_inv(ns->eui, 0, sizeof(ns->eui))) 1074 return 0; 1075 } 1076 return a->mode; 1077 } 1078 1079 static const struct attribute_group nvme_ns_attr_group = { 1080 .attrs = nvme_ns_attrs, 1081 .is_visible = nvme_attrs_are_visible, 1082 }; 1083 1084 #define nvme_show_function(field) \ 1085 static ssize_t field##_show(struct device *dev, \ 1086 struct device_attribute *attr, char *buf) \ 1087 { \ 1088 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \ 1089 return sprintf(buf, "%.*s\n", (int)sizeof(ctrl->field), ctrl->field); \ 1090 } \ 1091 static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); 1092 1093 nvme_show_function(model); 1094 nvme_show_function(serial); 1095 nvme_show_function(firmware_rev); 1096 1097 static struct attribute *nvme_dev_attrs[] = { 1098 &dev_attr_reset_controller.attr, 1099 &dev_attr_model.attr, 1100 &dev_attr_serial.attr, 1101 &dev_attr_firmware_rev.attr, 1102 NULL 1103 }; 1104 1105 static struct attribute_group nvme_dev_attrs_group = { 1106 .attrs = nvme_dev_attrs, 1107 }; 1108 1109 static const struct attribute_group *nvme_dev_attr_groups[] = { 1110 &nvme_dev_attrs_group, 1111 NULL, 1112 }; 1113 1114 static int ns_cmp(void *priv, struct list_head *a, struct list_head *b) 1115 { 1116 struct nvme_ns *nsa = container_of(a, struct nvme_ns, list); 1117 struct nvme_ns *nsb = container_of(b, struct nvme_ns, list); 1118 1119 return nsa->ns_id - nsb->ns_id; 1120 } 1121 1122 static struct nvme_ns *nvme_find_ns(struct nvme_ctrl *ctrl, unsigned nsid) 1123 { 1124 struct nvme_ns *ns; 1125 1126 lockdep_assert_held(&ctrl->namespaces_mutex); 1127 1128 list_for_each_entry(ns, &ctrl->namespaces, list) { 1129 if (ns->ns_id == nsid) 1130 return ns; 1131 if (ns->ns_id > nsid) 1132 break; 1133 } 1134 return NULL; 1135 } 1136 1137 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) 1138 { 1139 struct nvme_ns *ns; 1140 struct gendisk *disk; 1141 int node = dev_to_node(ctrl->dev); 1142 1143 lockdep_assert_held(&ctrl->namespaces_mutex); 1144 1145 ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); 1146 if (!ns) 1147 return; 1148 1149 ns->instance = ida_simple_get(&ctrl->ns_ida, 1, 0, GFP_KERNEL); 1150 if (ns->instance < 0) 1151 goto out_free_ns; 1152 1153 ns->queue = blk_mq_init_queue(ctrl->tagset); 1154 if (IS_ERR(ns->queue)) 1155 goto out_release_instance; 1156 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); 1157 ns->queue->queuedata = ns; 1158 ns->ctrl = ctrl; 1159 1160 disk = alloc_disk_node(0, node); 1161 if (!disk) 1162 goto out_free_queue; 1163 1164 kref_init(&ns->kref); 1165 ns->ns_id = nsid; 1166 ns->disk = disk; 1167 ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */ 1168 1169 1170 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); 1171 nvme_set_queue_limits(ctrl, ns->queue); 1172 1173 disk->major = nvme_major; 1174 disk->first_minor = 0; 1175 disk->fops = &nvme_fops; 1176 disk->private_data = ns; 1177 disk->queue = ns->queue; 1178 disk->driverfs_dev = ctrl->device; 1179 disk->flags = GENHD_FL_EXT_DEVT; 1180 sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, ns->instance); 1181 1182 if (nvme_revalidate_disk(ns->disk)) 1183 goto out_free_disk; 1184 1185 list_add_tail(&ns->list, &ctrl->namespaces); 1186 kref_get(&ctrl->kref); 1187 if (ns->type == NVME_NS_LIGHTNVM) 1188 return; 1189 1190 add_disk(ns->disk); 1191 if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj, 1192 &nvme_ns_attr_group)) 1193 pr_warn("%s: failed to create sysfs group for identification\n", 1194 ns->disk->disk_name); 1195 return; 1196 out_free_disk: 1197 kfree(disk); 1198 out_free_queue: 1199 blk_cleanup_queue(ns->queue); 1200 out_release_instance: 1201 ida_simple_remove(&ctrl->ns_ida, ns->instance); 1202 out_free_ns: 1203 kfree(ns); 1204 } 1205 1206 static void nvme_ns_remove(struct nvme_ns *ns) 1207 { 1208 if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) 1209 return; 1210 1211 if (ns->disk->flags & GENHD_FL_UP) { 1212 if (blk_get_integrity(ns->disk)) 1213 blk_integrity_unregister(ns->disk); 1214 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, 1215 &nvme_ns_attr_group); 1216 del_gendisk(ns->disk); 1217 blk_mq_abort_requeue_list(ns->queue); 1218 blk_cleanup_queue(ns->queue); 1219 } 1220 mutex_lock(&ns->ctrl->namespaces_mutex); 1221 list_del_init(&ns->list); 1222 mutex_unlock(&ns->ctrl->namespaces_mutex); 1223 nvme_put_ns(ns); 1224 } 1225 1226 static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid) 1227 { 1228 struct nvme_ns *ns; 1229 1230 ns = nvme_find_ns(ctrl, nsid); 1231 if (ns) { 1232 if (revalidate_disk(ns->disk)) 1233 nvme_ns_remove(ns); 1234 } else 1235 nvme_alloc_ns(ctrl, nsid); 1236 } 1237 1238 static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) 1239 { 1240 struct nvme_ns *ns; 1241 __le32 *ns_list; 1242 unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024); 1243 int ret = 0; 1244 1245 ns_list = kzalloc(0x1000, GFP_KERNEL); 1246 if (!ns_list) 1247 return -ENOMEM; 1248 1249 for (i = 0; i < num_lists; i++) { 1250 ret = nvme_identify_ns_list(ctrl, prev, ns_list); 1251 if (ret) 1252 goto out; 1253 1254 for (j = 0; j < min(nn, 1024U); j++) { 1255 nsid = le32_to_cpu(ns_list[j]); 1256 if (!nsid) 1257 goto out; 1258 1259 nvme_validate_ns(ctrl, nsid); 1260 1261 while (++prev < nsid) { 1262 ns = nvme_find_ns(ctrl, prev); 1263 if (ns) 1264 nvme_ns_remove(ns); 1265 } 1266 } 1267 nn -= j; 1268 } 1269 out: 1270 kfree(ns_list); 1271 return ret; 1272 } 1273 1274 static void __nvme_scan_namespaces(struct nvme_ctrl *ctrl, unsigned nn) 1275 { 1276 struct nvme_ns *ns, *next; 1277 unsigned i; 1278 1279 lockdep_assert_held(&ctrl->namespaces_mutex); 1280 1281 for (i = 1; i <= nn; i++) 1282 nvme_validate_ns(ctrl, i); 1283 1284 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) { 1285 if (ns->ns_id > nn) 1286 nvme_ns_remove(ns); 1287 } 1288 } 1289 1290 void nvme_scan_namespaces(struct nvme_ctrl *ctrl) 1291 { 1292 struct nvme_id_ctrl *id; 1293 unsigned nn; 1294 1295 if (nvme_identify_ctrl(ctrl, &id)) 1296 return; 1297 1298 mutex_lock(&ctrl->namespaces_mutex); 1299 nn = le32_to_cpu(id->nn); 1300 if (ctrl->vs >= NVME_VS(1, 1) && 1301 !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) { 1302 if (!nvme_scan_ns_list(ctrl, nn)) 1303 goto done; 1304 } 1305 __nvme_scan_namespaces(ctrl, le32_to_cpup(&id->nn)); 1306 done: 1307 list_sort(NULL, &ctrl->namespaces, ns_cmp); 1308 mutex_unlock(&ctrl->namespaces_mutex); 1309 kfree(id); 1310 } 1311 1312 void nvme_remove_namespaces(struct nvme_ctrl *ctrl) 1313 { 1314 struct nvme_ns *ns, *next; 1315 1316 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) 1317 nvme_ns_remove(ns); 1318 } 1319 1320 static DEFINE_IDA(nvme_instance_ida); 1321 1322 static int nvme_set_instance(struct nvme_ctrl *ctrl) 1323 { 1324 int instance, error; 1325 1326 do { 1327 if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL)) 1328 return -ENODEV; 1329 1330 spin_lock(&dev_list_lock); 1331 error = ida_get_new(&nvme_instance_ida, &instance); 1332 spin_unlock(&dev_list_lock); 1333 } while (error == -EAGAIN); 1334 1335 if (error) 1336 return -ENODEV; 1337 1338 ctrl->instance = instance; 1339 return 0; 1340 } 1341 1342 static void nvme_release_instance(struct nvme_ctrl *ctrl) 1343 { 1344 spin_lock(&dev_list_lock); 1345 ida_remove(&nvme_instance_ida, ctrl->instance); 1346 spin_unlock(&dev_list_lock); 1347 } 1348 1349 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) 1350 { 1351 device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance)); 1352 1353 spin_lock(&dev_list_lock); 1354 list_del(&ctrl->node); 1355 spin_unlock(&dev_list_lock); 1356 } 1357 1358 static void nvme_free_ctrl(struct kref *kref) 1359 { 1360 struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref); 1361 1362 put_device(ctrl->device); 1363 nvme_release_instance(ctrl); 1364 ida_destroy(&ctrl->ns_ida); 1365 1366 ctrl->ops->free_ctrl(ctrl); 1367 } 1368 1369 void nvme_put_ctrl(struct nvme_ctrl *ctrl) 1370 { 1371 kref_put(&ctrl->kref, nvme_free_ctrl); 1372 } 1373 1374 /* 1375 * Initialize a NVMe controller structures. This needs to be called during 1376 * earliest initialization so that we have the initialized structured around 1377 * during probing. 1378 */ 1379 int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, 1380 const struct nvme_ctrl_ops *ops, unsigned long quirks) 1381 { 1382 int ret; 1383 1384 INIT_LIST_HEAD(&ctrl->namespaces); 1385 mutex_init(&ctrl->namespaces_mutex); 1386 kref_init(&ctrl->kref); 1387 ctrl->dev = dev; 1388 ctrl->ops = ops; 1389 ctrl->quirks = quirks; 1390 1391 ret = nvme_set_instance(ctrl); 1392 if (ret) 1393 goto out; 1394 1395 ctrl->device = device_create_with_groups(nvme_class, ctrl->dev, 1396 MKDEV(nvme_char_major, ctrl->instance), 1397 dev, nvme_dev_attr_groups, 1398 "nvme%d", ctrl->instance); 1399 if (IS_ERR(ctrl->device)) { 1400 ret = PTR_ERR(ctrl->device); 1401 goto out_release_instance; 1402 } 1403 get_device(ctrl->device); 1404 dev_set_drvdata(ctrl->device, ctrl); 1405 ida_init(&ctrl->ns_ida); 1406 1407 spin_lock(&dev_list_lock); 1408 list_add_tail(&ctrl->node, &nvme_ctrl_list); 1409 spin_unlock(&dev_list_lock); 1410 1411 return 0; 1412 out_release_instance: 1413 nvme_release_instance(ctrl); 1414 out: 1415 return ret; 1416 } 1417 1418 /** 1419 * nvme_kill_queues(): Ends all namespace queues 1420 * @ctrl: the dead controller that needs to end 1421 * 1422 * Call this function when the driver determines it is unable to get the 1423 * controller in a state capable of servicing IO. 1424 */ 1425 void nvme_kill_queues(struct nvme_ctrl *ctrl) 1426 { 1427 struct nvme_ns *ns; 1428 1429 mutex_lock(&ctrl->namespaces_mutex); 1430 list_for_each_entry(ns, &ctrl->namespaces, list) { 1431 if (!kref_get_unless_zero(&ns->kref)) 1432 continue; 1433 1434 /* 1435 * Revalidating a dead namespace sets capacity to 0. This will 1436 * end buffered writers dirtying pages that can't be synced. 1437 */ 1438 if (!test_and_set_bit(NVME_NS_DEAD, &ns->flags)) 1439 revalidate_disk(ns->disk); 1440 1441 blk_set_queue_dying(ns->queue); 1442 blk_mq_abort_requeue_list(ns->queue); 1443 blk_mq_start_stopped_hw_queues(ns->queue, true); 1444 1445 nvme_put_ns(ns); 1446 } 1447 mutex_unlock(&ctrl->namespaces_mutex); 1448 } 1449 1450 void nvme_stop_queues(struct nvme_ctrl *ctrl) 1451 { 1452 struct nvme_ns *ns; 1453 1454 mutex_lock(&ctrl->namespaces_mutex); 1455 list_for_each_entry(ns, &ctrl->namespaces, list) { 1456 spin_lock_irq(ns->queue->queue_lock); 1457 queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue); 1458 spin_unlock_irq(ns->queue->queue_lock); 1459 1460 blk_mq_cancel_requeue_work(ns->queue); 1461 blk_mq_stop_hw_queues(ns->queue); 1462 } 1463 mutex_unlock(&ctrl->namespaces_mutex); 1464 } 1465 1466 void nvme_start_queues(struct nvme_ctrl *ctrl) 1467 { 1468 struct nvme_ns *ns; 1469 1470 mutex_lock(&ctrl->namespaces_mutex); 1471 list_for_each_entry(ns, &ctrl->namespaces, list) { 1472 queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue); 1473 blk_mq_start_stopped_hw_queues(ns->queue, true); 1474 blk_mq_kick_requeue_list(ns->queue); 1475 } 1476 mutex_unlock(&ctrl->namespaces_mutex); 1477 } 1478 1479 int __init nvme_core_init(void) 1480 { 1481 int result; 1482 1483 result = register_blkdev(nvme_major, "nvme"); 1484 if (result < 0) 1485 return result; 1486 else if (result > 0) 1487 nvme_major = result; 1488 1489 result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme", 1490 &nvme_dev_fops); 1491 if (result < 0) 1492 goto unregister_blkdev; 1493 else if (result > 0) 1494 nvme_char_major = result; 1495 1496 nvme_class = class_create(THIS_MODULE, "nvme"); 1497 if (IS_ERR(nvme_class)) { 1498 result = PTR_ERR(nvme_class); 1499 goto unregister_chrdev; 1500 } 1501 1502 return 0; 1503 1504 unregister_chrdev: 1505 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); 1506 unregister_blkdev: 1507 unregister_blkdev(nvme_major, "nvme"); 1508 return result; 1509 } 1510 1511 void nvme_core_exit(void) 1512 { 1513 unregister_blkdev(nvme_major, "nvme"); 1514 class_destroy(nvme_class); 1515 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); 1516 } 1517