1 /*- 2 * Copyright (C) 2012 Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/bus.h> 32 #include <sys/conf.h> 33 #include <sys/ioccom.h> 34 #include <sys/smp.h> 35 36 #include <dev/pci/pcireg.h> 37 #include <dev/pci/pcivar.h> 38 39 #include "nvme_private.h" 40 41 static void 42 nvme_ctrlr_cb(void *arg, const struct nvme_completion *status) 43 { 44 struct nvme_completion *cpl = arg; 45 struct mtx *mtx; 46 47 /* 48 * Copy status into the argument passed by the caller, so that 49 * the caller can check the status to determine if the 50 * the request passed or failed. 51 */ 52 memcpy(cpl, status, sizeof(*cpl)); 53 mtx = mtx_pool_find(mtxpool_sleep, cpl); 54 mtx_lock(mtx); 55 wakeup(cpl); 56 mtx_unlock(mtx); 57 } 58 59 static int 60 nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr) 61 { 62 63 /* Chatham puts the NVMe MMRs behind BAR 2/3, not BAR 0/1. */ 64 if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) 65 ctrlr->resource_id = PCIR_BAR(2); 66 else 67 ctrlr->resource_id = PCIR_BAR(0); 68 69 ctrlr->resource = bus_alloc_resource(ctrlr->dev, SYS_RES_MEMORY, 70 &ctrlr->resource_id, 0, ~0, 1, RF_ACTIVE); 71 72 if(ctrlr->resource == NULL) { 73 device_printf(ctrlr->dev, "unable to allocate pci resource\n"); 74 return (ENOMEM); 75 } 76 77 ctrlr->bus_tag = rman_get_bustag(ctrlr->resource); 78 ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource); 79 ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle; 80 81 return (0); 82 } 83 84 #ifdef CHATHAM2 85 static int 86 nvme_ctrlr_allocate_chatham_bar(struct nvme_controller *ctrlr) 87 { 88 89 ctrlr->chatham_resource_id = PCIR_BAR(CHATHAM_CONTROL_BAR); 90 ctrlr->chatham_resource = bus_alloc_resource(ctrlr->dev, 91 SYS_RES_MEMORY, &ctrlr->chatham_resource_id, 0, ~0, 1, 92 RF_ACTIVE); 93 94 if(ctrlr->chatham_resource == NULL) { 95 device_printf(ctrlr->dev, "unable to alloc pci resource\n"); 96 return (ENOMEM); 97 } 98 99 ctrlr->chatham_bus_tag = rman_get_bustag(ctrlr->chatham_resource); 100 ctrlr->chatham_bus_handle = 101 rman_get_bushandle(ctrlr->chatham_resource); 102 103 return (0); 104 } 105 106 static void 107 nvme_ctrlr_setup_chatham(struct nvme_controller *ctrlr) 108 { 109 uint64_t reg1, reg2, reg3; 110 uint64_t temp1, temp2; 111 uint32_t temp3; 112 uint32_t use_flash_timings = 0; 113 114 DELAY(10000); 115 116 temp3 = chatham_read_4(ctrlr, 0x8080); 117 118 device_printf(ctrlr->dev, "Chatham version: 0x%x\n", temp3); 119 120 ctrlr->chatham_lbas = chatham_read_4(ctrlr, 0x8068) - 0x110; 121 ctrlr->chatham_size = ctrlr->chatham_lbas * 512; 122 123 device_printf(ctrlr->dev, "Chatham size: %lld\n", 124 (long long)ctrlr->chatham_size); 125 126 reg1 = reg2 = reg3 = ctrlr->chatham_size - 1; 127 128 TUNABLE_INT_FETCH("hw.nvme.use_flash_timings", &use_flash_timings); 129 if (use_flash_timings) { 130 device_printf(ctrlr->dev, "Chatham: using flash timings\n"); 131 temp1 = 0x00001b58000007d0LL; 132 temp2 = 0x000000cb00000131LL; 133 } else { 134 device_printf(ctrlr->dev, "Chatham: using DDR timings\n"); 135 temp1 = temp2 = 0x0LL; 136 } 137 138 chatham_write_8(ctrlr, 0x8000, reg1); 139 chatham_write_8(ctrlr, 0x8008, reg2); 140 chatham_write_8(ctrlr, 0x8010, reg3); 141 142 chatham_write_8(ctrlr, 0x8020, temp1); 143 temp3 = chatham_read_4(ctrlr, 0x8020); 144 145 chatham_write_8(ctrlr, 0x8028, temp2); 146 temp3 = chatham_read_4(ctrlr, 0x8028); 147 148 chatham_write_8(ctrlr, 0x8030, temp1); 149 chatham_write_8(ctrlr, 0x8038, temp2); 150 chatham_write_8(ctrlr, 0x8040, temp1); 151 chatham_write_8(ctrlr, 0x8048, temp2); 152 chatham_write_8(ctrlr, 0x8050, temp1); 153 chatham_write_8(ctrlr, 0x8058, temp2); 154 155 DELAY(10000); 156 } 157 158 static void 159 nvme_chatham_populate_cdata(struct nvme_controller *ctrlr) 160 { 161 struct nvme_controller_data *cdata; 162 163 cdata = &ctrlr->cdata; 164 165 cdata->vid = 0x8086; 166 cdata->ssvid = 0x2011; 167 168 /* 169 * Chatham2 puts garbage data in these fields when we 170 * invoke IDENTIFY_CONTROLLER, so we need to re-zero 171 * the fields before calling bcopy(). 172 */ 173 memset(cdata->sn, 0, sizeof(cdata->sn)); 174 memcpy(cdata->sn, "2012", strlen("2012")); 175 memset(cdata->mn, 0, sizeof(cdata->mn)); 176 memcpy(cdata->mn, "CHATHAM2", strlen("CHATHAM2")); 177 memset(cdata->fr, 0, sizeof(cdata->fr)); 178 memcpy(cdata->fr, "0", strlen("0")); 179 cdata->rab = 8; 180 cdata->aerl = 3; 181 cdata->lpa.ns_smart = 1; 182 cdata->sqes.min = 6; 183 cdata->sqes.max = 6; 184 cdata->sqes.min = 4; 185 cdata->sqes.max = 4; 186 cdata->nn = 1; 187 188 /* Chatham2 doesn't support DSM command */ 189 cdata->oncs.dsm = 0; 190 191 cdata->vwc.present = 1; 192 } 193 #endif /* CHATHAM2 */ 194 195 static void 196 nvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr) 197 { 198 struct nvme_qpair *qpair; 199 uint32_t num_entries; 200 201 qpair = &ctrlr->adminq; 202 203 num_entries = NVME_ADMIN_ENTRIES; 204 TUNABLE_INT_FETCH("hw.nvme.admin_entries", &num_entries); 205 /* 206 * If admin_entries was overridden to an invalid value, revert it 207 * back to our default value. 208 */ 209 if (num_entries < NVME_MIN_ADMIN_ENTRIES || 210 num_entries > NVME_MAX_ADMIN_ENTRIES) { 211 printf("nvme: invalid hw.nvme.admin_entries=%d specified\n", 212 num_entries); 213 num_entries = NVME_ADMIN_ENTRIES; 214 } 215 216 /* 217 * The admin queue's max xfer size is treated differently than the 218 * max I/O xfer size. 16KB is sufficient here - maybe even less? 219 */ 220 nvme_qpair_construct(qpair, 0, 0, num_entries, 16*1024, ctrlr); 221 } 222 223 static int 224 nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr) 225 { 226 struct nvme_qpair *qpair; 227 union cap_lo_register cap_lo; 228 int i, num_entries; 229 230 num_entries = NVME_IO_ENTRIES; 231 TUNABLE_INT_FETCH("hw.nvme.io_entries", &num_entries); 232 233 num_entries = max(num_entries, NVME_MIN_IO_ENTRIES); 234 235 /* 236 * NVMe spec sets a hard limit of 64K max entries, but 237 * devices may specify a smaller limit, so we need to check 238 * the MQES field in the capabilities register. 239 */ 240 cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo); 241 num_entries = min(num_entries, cap_lo.bits.mqes+1); 242 243 ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE; 244 TUNABLE_INT_FETCH("hw.nvme.max_xfer_size", &ctrlr->max_xfer_size); 245 /* 246 * Check that tunable doesn't specify a size greater than what our 247 * driver supports, and is an even PAGE_SIZE multiple. 248 */ 249 if (ctrlr->max_xfer_size > NVME_MAX_XFER_SIZE || 250 ctrlr->max_xfer_size % PAGE_SIZE) 251 ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE; 252 253 ctrlr->ioq = malloc(ctrlr->num_io_queues * sizeof(struct nvme_qpair), 254 M_NVME, M_ZERO | M_NOWAIT); 255 256 if (ctrlr->ioq == NULL) 257 return (ENOMEM); 258 259 for (i = 0; i < ctrlr->num_io_queues; i++) { 260 qpair = &ctrlr->ioq[i]; 261 262 /* 263 * Admin queue has ID=0. IO queues start at ID=1 - 264 * hence the 'i+1' here. 265 * 266 * For I/O queues, use the controller-wide max_xfer_size 267 * calculated in nvme_attach(). 268 */ 269 nvme_qpair_construct(qpair, 270 i+1, /* qpair ID */ 271 ctrlr->msix_enabled ? i+1 : 0, /* vector */ 272 num_entries, 273 ctrlr->max_xfer_size, 274 ctrlr); 275 276 if (ctrlr->per_cpu_io_queues) 277 bus_bind_intr(ctrlr->dev, qpair->res, i); 278 } 279 280 return (0); 281 } 282 283 static int 284 nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr) 285 { 286 int ms_waited; 287 union cc_register cc; 288 union csts_register csts; 289 290 cc.raw = nvme_mmio_read_4(ctrlr, cc); 291 csts.raw = nvme_mmio_read_4(ctrlr, csts); 292 293 if (!cc.bits.en) { 294 device_printf(ctrlr->dev, "%s called with cc.en = 0\n", 295 __func__); 296 return (ENXIO); 297 } 298 299 ms_waited = 0; 300 301 while (!csts.bits.rdy) { 302 DELAY(1000); 303 if (ms_waited++ > ctrlr->ready_timeout_in_ms) { 304 device_printf(ctrlr->dev, "controller did not become " 305 "ready within %d ms\n", ctrlr->ready_timeout_in_ms); 306 return (ENXIO); 307 } 308 csts.raw = nvme_mmio_read_4(ctrlr, csts); 309 } 310 311 return (0); 312 } 313 314 static void 315 nvme_ctrlr_disable(struct nvme_controller *ctrlr) 316 { 317 union cc_register cc; 318 union csts_register csts; 319 320 cc.raw = nvme_mmio_read_4(ctrlr, cc); 321 csts.raw = nvme_mmio_read_4(ctrlr, csts); 322 323 if (cc.bits.en == 1 && csts.bits.rdy == 0) 324 nvme_ctrlr_wait_for_ready(ctrlr); 325 326 cc.bits.en = 0; 327 nvme_mmio_write_4(ctrlr, cc, cc.raw); 328 DELAY(5000); 329 } 330 331 static int 332 nvme_ctrlr_enable(struct nvme_controller *ctrlr) 333 { 334 union cc_register cc; 335 union csts_register csts; 336 union aqa_register aqa; 337 338 cc.raw = nvme_mmio_read_4(ctrlr, cc); 339 csts.raw = nvme_mmio_read_4(ctrlr, csts); 340 341 if (cc.bits.en == 1) { 342 if (csts.bits.rdy == 1) 343 return (0); 344 else 345 return (nvme_ctrlr_wait_for_ready(ctrlr)); 346 } 347 348 nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr); 349 DELAY(5000); 350 nvme_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr); 351 DELAY(5000); 352 353 aqa.raw = 0; 354 /* acqs and asqs are 0-based. */ 355 aqa.bits.acqs = ctrlr->adminq.num_entries-1; 356 aqa.bits.asqs = ctrlr->adminq.num_entries-1; 357 nvme_mmio_write_4(ctrlr, aqa, aqa.raw); 358 DELAY(5000); 359 360 cc.bits.en = 1; 361 cc.bits.css = 0; 362 cc.bits.ams = 0; 363 cc.bits.shn = 0; 364 cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */ 365 cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */ 366 367 /* This evaluates to 0, which is according to spec. */ 368 cc.bits.mps = (PAGE_SIZE >> 13); 369 370 nvme_mmio_write_4(ctrlr, cc, cc.raw); 371 DELAY(5000); 372 373 return (nvme_ctrlr_wait_for_ready(ctrlr)); 374 } 375 376 int 377 nvme_ctrlr_reset(struct nvme_controller *ctrlr) 378 { 379 380 nvme_ctrlr_disable(ctrlr); 381 return (nvme_ctrlr_enable(ctrlr)); 382 } 383 384 /* 385 * Disable this code for now, since Chatham doesn't support 386 * AERs so I have no good way to test them. 387 */ 388 #if 0 389 static void 390 nvme_async_event_cb(void *arg, const struct nvme_completion *status) 391 { 392 struct nvme_controller *ctrlr = arg; 393 394 printf("Asynchronous event occurred.\n"); 395 396 /* TODO: decode async event type based on status */ 397 /* TODO: check status for any error bits */ 398 399 /* 400 * Repost an asynchronous event request so that it can be 401 * used again by the controller. 402 */ 403 nvme_ctrlr_cmd_asynchronous_event_request(ctrlr, nvme_async_event_cb, 404 ctrlr); 405 } 406 #endif 407 408 static int 409 nvme_ctrlr_identify(struct nvme_controller *ctrlr) 410 { 411 struct mtx *mtx; 412 struct nvme_completion cpl; 413 int status; 414 415 mtx = mtx_pool_find(mtxpool_sleep, &cpl); 416 417 mtx_lock(mtx); 418 nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata, 419 nvme_ctrlr_cb, &cpl); 420 status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5); 421 mtx_unlock(mtx); 422 if ((status != 0) || cpl.sf_sc || cpl.sf_sct) { 423 printf("nvme_identify_controller failed!\n"); 424 return (ENXIO); 425 } 426 427 #ifdef CHATHAM2 428 if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) 429 nvme_chatham_populate_cdata(ctrlr); 430 #endif 431 432 return (0); 433 } 434 435 static int 436 nvme_ctrlr_set_num_qpairs(struct nvme_controller *ctrlr) 437 { 438 struct mtx *mtx; 439 struct nvme_completion cpl; 440 int cq_allocated, sq_allocated, status; 441 442 mtx = mtx_pool_find(mtxpool_sleep, &cpl); 443 444 mtx_lock(mtx); 445 nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->num_io_queues, 446 nvme_ctrlr_cb, &cpl); 447 status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5); 448 mtx_unlock(mtx); 449 if ((status != 0) || cpl.sf_sc || cpl.sf_sct) { 450 printf("nvme_set_num_queues failed!\n"); 451 return (ENXIO); 452 } 453 454 /* 455 * Data in cdw0 is 0-based. 456 * Lower 16-bits indicate number of submission queues allocated. 457 * Upper 16-bits indicate number of completion queues allocated. 458 */ 459 sq_allocated = (cpl.cdw0 & 0xFFFF) + 1; 460 cq_allocated = (cpl.cdw0 >> 16) + 1; 461 462 /* 463 * Check that the controller was able to allocate the number of 464 * queues we requested. If not, revert to one IO queue. 465 */ 466 if (sq_allocated < ctrlr->num_io_queues || 467 cq_allocated < ctrlr->num_io_queues) { 468 ctrlr->num_io_queues = 1; 469 ctrlr->per_cpu_io_queues = 0; 470 471 /* TODO: destroy extra queues that were created 472 * previously but now found to be not needed. 473 */ 474 } 475 476 return (0); 477 } 478 479 static int 480 nvme_ctrlr_create_qpairs(struct nvme_controller *ctrlr) 481 { 482 struct mtx *mtx; 483 struct nvme_qpair *qpair; 484 struct nvme_completion cpl; 485 int i, status; 486 487 mtx = mtx_pool_find(mtxpool_sleep, &cpl); 488 489 for (i = 0; i < ctrlr->num_io_queues; i++) { 490 qpair = &ctrlr->ioq[i]; 491 492 mtx_lock(mtx); 493 nvme_ctrlr_cmd_create_io_cq(ctrlr, qpair, qpair->vector, 494 nvme_ctrlr_cb, &cpl); 495 status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5); 496 mtx_unlock(mtx); 497 if ((status != 0) || cpl.sf_sc || cpl.sf_sct) { 498 printf("nvme_create_io_cq failed!\n"); 499 return (ENXIO); 500 } 501 502 mtx_lock(mtx); 503 nvme_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair, 504 nvme_ctrlr_cb, &cpl); 505 status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5); 506 mtx_unlock(mtx); 507 if ((status != 0) || cpl.sf_sc || cpl.sf_sct) { 508 printf("nvme_create_io_sq failed!\n"); 509 return (ENXIO); 510 } 511 } 512 513 return (0); 514 } 515 516 static int 517 nvme_ctrlr_construct_namespaces(struct nvme_controller *ctrlr) 518 { 519 struct nvme_namespace *ns; 520 int i, status; 521 522 for (i = 0; i < ctrlr->cdata.nn; i++) { 523 ns = &ctrlr->ns[i]; 524 status = nvme_ns_construct(ns, i+1, ctrlr); 525 if (status != 0) 526 return (status); 527 } 528 529 return (0); 530 } 531 532 static void 533 nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr) 534 { 535 union nvme_critical_warning_state state; 536 uint8_t num_async_events; 537 538 state.raw = 0xFF; 539 state.bits.reserved = 0; 540 nvme_ctrlr_cmd_set_asynchronous_event_config(ctrlr, state, NULL, NULL); 541 542 /* aerl is a zero-based value, so we need to add 1 here. */ 543 num_async_events = min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl+1)); 544 545 /* 546 * Disable this code for now, since Chatham doesn't support 547 * AERs so I have no good way to test them. 548 */ 549 #if 0 550 for (int i = 0; i < num_async_events; i++) 551 nvme_ctrlr_cmd_asynchronous_event_request(ctrlr, 552 nvme_async_event_cb, ctrlr); 553 #endif 554 } 555 556 static void 557 nvme_ctrlr_configure_int_coalescing(struct nvme_controller *ctrlr) 558 { 559 560 ctrlr->int_coal_time = 0; 561 TUNABLE_INT_FETCH("hw.nvme.int_coal_time", 562 &ctrlr->int_coal_time); 563 564 ctrlr->int_coal_threshold = 0; 565 TUNABLE_INT_FETCH("hw.nvme.int_coal_threshold", 566 &ctrlr->int_coal_threshold); 567 568 nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, ctrlr->int_coal_time, 569 ctrlr->int_coal_threshold, NULL, NULL); 570 } 571 572 void 573 nvme_ctrlr_start(void *ctrlr_arg) 574 { 575 struct nvme_controller *ctrlr = ctrlr_arg; 576 577 if (nvme_ctrlr_identify(ctrlr) != 0) 578 goto err; 579 580 if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) 581 goto err; 582 583 if (nvme_ctrlr_create_qpairs(ctrlr) != 0) 584 goto err; 585 586 if (nvme_ctrlr_construct_namespaces(ctrlr) != 0) 587 goto err; 588 589 nvme_ctrlr_configure_aer(ctrlr); 590 nvme_ctrlr_configure_int_coalescing(ctrlr); 591 592 ctrlr->is_started = TRUE; 593 594 err: 595 596 /* 597 * Initialize sysctls, even if controller failed to start, to 598 * assist with debugging admin queue pair. 599 */ 600 nvme_sysctl_initialize_ctrlr(ctrlr); 601 config_intrhook_disestablish(&ctrlr->config_hook); 602 } 603 604 static void 605 nvme_ctrlr_intx_task(void *arg, int pending) 606 { 607 struct nvme_controller *ctrlr = arg; 608 609 nvme_qpair_process_completions(&ctrlr->adminq); 610 611 if (ctrlr->ioq[0].cpl) 612 nvme_qpair_process_completions(&ctrlr->ioq[0]); 613 614 nvme_mmio_write_4(ctrlr, intmc, 1); 615 } 616 617 static void 618 nvme_ctrlr_intx_handler(void *arg) 619 { 620 struct nvme_controller *ctrlr = arg; 621 622 nvme_mmio_write_4(ctrlr, intms, 1); 623 taskqueue_enqueue_fast(ctrlr->taskqueue, &ctrlr->task); 624 } 625 626 static int 627 nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr) 628 { 629 630 ctrlr->num_io_queues = 1; 631 ctrlr->per_cpu_io_queues = 0; 632 ctrlr->rid = 0; 633 ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ, 634 &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE); 635 636 if (ctrlr->res == NULL) { 637 device_printf(ctrlr->dev, "unable to allocate shared IRQ\n"); 638 return (ENOMEM); 639 } 640 641 bus_setup_intr(ctrlr->dev, ctrlr->res, 642 INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler, 643 ctrlr, &ctrlr->tag); 644 645 if (ctrlr->tag == NULL) { 646 device_printf(ctrlr->dev, 647 "unable to setup legacy interrupt handler\n"); 648 return (ENOMEM); 649 } 650 651 TASK_INIT(&ctrlr->task, 0, nvme_ctrlr_intx_task, ctrlr); 652 ctrlr->taskqueue = taskqueue_create_fast("nvme_taskq", M_NOWAIT, 653 taskqueue_thread_enqueue, &ctrlr->taskqueue); 654 taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_NET, 655 "%s intx taskq", device_get_nameunit(ctrlr->dev)); 656 657 return (0); 658 } 659 660 static int 661 nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, 662 struct thread *td) 663 { 664 struct nvme_controller *ctrlr; 665 struct nvme_completion cpl; 666 struct mtx *mtx; 667 668 ctrlr = cdev->si_drv1; 669 670 switch (cmd) { 671 case NVME_IDENTIFY_CONTROLLER: 672 #ifdef CHATHAM2 673 /* 674 * Don't refresh data on Chatham, since Chatham returns 675 * garbage on IDENTIFY anyways. 676 */ 677 if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) { 678 memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata)); 679 break; 680 } 681 #endif 682 /* Refresh data before returning to user. */ 683 mtx = mtx_pool_find(mtxpool_sleep, &cpl); 684 mtx_lock(mtx); 685 nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata, 686 nvme_ctrlr_cb, &cpl); 687 msleep(&cpl, mtx, PRIBIO, "nvme_ioctl", 0); 688 mtx_unlock(mtx); 689 if (cpl.sf_sc || cpl.sf_sct) 690 return (ENXIO); 691 memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata)); 692 break; 693 default: 694 return (ENOTTY); 695 } 696 697 return (0); 698 } 699 700 static struct cdevsw nvme_ctrlr_cdevsw = { 701 .d_version = D_VERSION, 702 .d_flags = 0, 703 .d_ioctl = nvme_ctrlr_ioctl 704 }; 705 706 int 707 nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev) 708 { 709 union cap_lo_register cap_lo; 710 union cap_hi_register cap_hi; 711 int num_vectors, per_cpu_io_queues, status = 0; 712 713 ctrlr->dev = dev; 714 ctrlr->is_started = FALSE; 715 716 status = nvme_ctrlr_allocate_bar(ctrlr); 717 718 if (status != 0) 719 return (status); 720 721 #ifdef CHATHAM2 722 if (pci_get_devid(dev) == CHATHAM_PCI_ID) { 723 status = nvme_ctrlr_allocate_chatham_bar(ctrlr); 724 if (status != 0) 725 return (status); 726 nvme_ctrlr_setup_chatham(ctrlr); 727 } 728 #endif 729 730 /* 731 * Software emulators may set the doorbell stride to something 732 * other than zero, but this driver is not set up to handle that. 733 */ 734 cap_hi.raw = nvme_mmio_read_4(ctrlr, cap_hi); 735 if (cap_hi.bits.dstrd != 0) 736 return (ENXIO); 737 738 /* Get ready timeout value from controller, in units of 500ms. */ 739 cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo); 740 ctrlr->ready_timeout_in_ms = cap_lo.bits.to * 500; 741 742 per_cpu_io_queues = 1; 743 TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues); 744 ctrlr->per_cpu_io_queues = per_cpu_io_queues ? TRUE : FALSE; 745 746 if (ctrlr->per_cpu_io_queues) 747 ctrlr->num_io_queues = mp_ncpus; 748 else 749 ctrlr->num_io_queues = 1; 750 751 ctrlr->force_intx = 0; 752 TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx); 753 754 ctrlr->msix_enabled = 1; 755 756 if (ctrlr->force_intx) { 757 ctrlr->msix_enabled = 0; 758 goto intx; 759 } 760 761 /* One vector per IO queue, plus one vector for admin queue. */ 762 num_vectors = ctrlr->num_io_queues + 1; 763 764 if (pci_msix_count(dev) < num_vectors) { 765 ctrlr->msix_enabled = 0; 766 goto intx; 767 } 768 769 if (pci_alloc_msix(dev, &num_vectors) != 0) 770 ctrlr->msix_enabled = 0; 771 772 intx: 773 774 if (!ctrlr->msix_enabled) 775 nvme_ctrlr_configure_intx(ctrlr); 776 777 nvme_ctrlr_construct_admin_qpair(ctrlr); 778 779 status = nvme_ctrlr_construct_io_qpairs(ctrlr); 780 781 if (status != 0) 782 return (status); 783 784 ctrlr->cdev = make_dev(&nvme_ctrlr_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, 785 "nvme%d", device_get_unit(dev)); 786 787 if (ctrlr->cdev == NULL) 788 return (ENXIO); 789 790 ctrlr->cdev->si_drv1 = (void *)ctrlr; 791 792 return (0); 793 } 794