1 /*- 2 * Copyright (C) 2012 Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/bus.h> 32 #include <sys/conf.h> 33 #include <sys/ioccom.h> 34 #include <sys/smp.h> 35 36 #include <dev/pci/pcireg.h> 37 #include <dev/pci/pcivar.h> 38 39 #include "nvme_private.h" 40 41 static void 42 nvme_ctrlr_cb(void *arg, const struct nvme_completion *status) 43 { 44 struct nvme_completion *cpl = arg; 45 struct mtx *mtx; 46 47 /* 48 * Copy status into the argument passed by the caller, so that 49 * the caller can check the status to determine if the 50 * the request passed or failed. 51 */ 52 memcpy(cpl, status, sizeof(*cpl)); 53 mtx = mtx_pool_find(mtxpool_sleep, cpl); 54 mtx_lock(mtx); 55 wakeup(cpl); 56 mtx_unlock(mtx); 57 } 58 59 static int 60 nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr) 61 { 62 63 /* Chatham puts the NVMe MMRs behind BAR 2/3, not BAR 0/1. */ 64 if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) 65 ctrlr->resource_id = PCIR_BAR(2); 66 else 67 ctrlr->resource_id = PCIR_BAR(0); 68 69 ctrlr->resource = bus_alloc_resource(ctrlr->dev, SYS_RES_MEMORY, 70 &ctrlr->resource_id, 0, ~0, 1, RF_ACTIVE); 71 72 if(ctrlr->resource == NULL) { 73 device_printf(ctrlr->dev, "unable to allocate pci resource\n"); 74 return (ENOMEM); 75 } 76 77 ctrlr->bus_tag = rman_get_bustag(ctrlr->resource); 78 ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource); 79 ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle; 80 81 return (0); 82 } 83 84 #ifdef CHATHAM2 85 static int 86 nvme_ctrlr_allocate_chatham_bar(struct nvme_controller *ctrlr) 87 { 88 89 ctrlr->chatham_resource_id = PCIR_BAR(CHATHAM_CONTROL_BAR); 90 ctrlr->chatham_resource = bus_alloc_resource(ctrlr->dev, 91 SYS_RES_MEMORY, &ctrlr->chatham_resource_id, 0, ~0, 1, 92 RF_ACTIVE); 93 94 if(ctrlr->chatham_resource == NULL) { 95 device_printf(ctrlr->dev, "unable to alloc pci resource\n"); 96 return (ENOMEM); 97 } 98 99 ctrlr->chatham_bus_tag = rman_get_bustag(ctrlr->chatham_resource); 100 ctrlr->chatham_bus_handle = 101 rman_get_bushandle(ctrlr->chatham_resource); 102 103 return (0); 104 } 105 106 static void 107 nvme_ctrlr_setup_chatham(struct nvme_controller *ctrlr) 108 { 109 uint64_t reg1, reg2, reg3; 110 uint64_t temp1, temp2; 111 uint32_t temp3; 112 uint32_t use_flash_timings = 0; 113 114 DELAY(10000); 115 116 temp3 = chatham_read_4(ctrlr, 0x8080); 117 118 device_printf(ctrlr->dev, "Chatham version: 0x%x\n", temp3); 119 120 ctrlr->chatham_lbas = chatham_read_4(ctrlr, 0x8068) - 0x110; 121 ctrlr->chatham_size = ctrlr->chatham_lbas * 512; 122 123 device_printf(ctrlr->dev, "Chatham size: %lld\n", 124 (long long)ctrlr->chatham_size); 125 126 reg1 = reg2 = reg3 = ctrlr->chatham_size - 1; 127 128 TUNABLE_INT_FETCH("hw.nvme.use_flash_timings", &use_flash_timings); 129 if (use_flash_timings) { 130 device_printf(ctrlr->dev, "Chatham: using flash timings\n"); 131 temp1 = 0x00001b58000007d0LL; 132 temp2 = 0x000000cb00000131LL; 133 } else { 134 device_printf(ctrlr->dev, "Chatham: using DDR timings\n"); 135 temp1 = temp2 = 0x0LL; 136 } 137 138 chatham_write_8(ctrlr, 0x8000, reg1); 139 chatham_write_8(ctrlr, 0x8008, reg2); 140 chatham_write_8(ctrlr, 0x8010, reg3); 141 142 chatham_write_8(ctrlr, 0x8020, temp1); 143 temp3 = chatham_read_4(ctrlr, 0x8020); 144 145 chatham_write_8(ctrlr, 0x8028, temp2); 146 temp3 = chatham_read_4(ctrlr, 0x8028); 147 148 chatham_write_8(ctrlr, 0x8030, temp1); 149 chatham_write_8(ctrlr, 0x8038, temp2); 150 chatham_write_8(ctrlr, 0x8040, temp1); 151 chatham_write_8(ctrlr, 0x8048, temp2); 152 chatham_write_8(ctrlr, 0x8050, temp1); 153 chatham_write_8(ctrlr, 0x8058, temp2); 154 155 DELAY(10000); 156 } 157 158 static void 159 nvme_chatham_populate_cdata(struct nvme_controller *ctrlr) 160 { 161 struct nvme_controller_data *cdata; 162 163 cdata = &ctrlr->cdata; 164 165 cdata->vid = 0x8086; 166 cdata->ssvid = 0x2011; 167 168 /* 169 * Chatham2 puts garbage data in these fields when we 170 * invoke IDENTIFY_CONTROLLER, so we need to re-zero 171 * the fields before calling bcopy(). 172 */ 173 memset(cdata->sn, 0, sizeof(cdata->sn)); 174 memcpy(cdata->sn, "2012", strlen("2012")); 175 memset(cdata->mn, 0, sizeof(cdata->mn)); 176 memcpy(cdata->mn, "CHATHAM2", strlen("CHATHAM2")); 177 memset(cdata->fr, 0, sizeof(cdata->fr)); 178 memcpy(cdata->fr, "0", strlen("0")); 179 cdata->rab = 8; 180 cdata->aerl = 3; 181 cdata->lpa.ns_smart = 1; 182 cdata->sqes.min = 6; 183 cdata->sqes.max = 6; 184 cdata->sqes.min = 4; 185 cdata->sqes.max = 4; 186 cdata->nn = 1; 187 188 /* Chatham2 doesn't support DSM command */ 189 cdata->oncs.dsm = 0; 190 191 cdata->vwc.present = 1; 192 } 193 #endif /* CHATHAM2 */ 194 195 static void 196 nvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr) 197 { 198 struct nvme_qpair *qpair; 199 uint32_t num_entries; 200 201 qpair = &ctrlr->adminq; 202 203 num_entries = NVME_ADMIN_ENTRIES; 204 TUNABLE_INT_FETCH("hw.nvme.admin_entries", &num_entries); 205 /* 206 * If admin_entries was overridden to an invalid value, revert it 207 * back to our default value. 208 */ 209 if (num_entries < NVME_MIN_ADMIN_ENTRIES || 210 num_entries > NVME_MAX_ADMIN_ENTRIES) { 211 printf("nvme: invalid hw.nvme.admin_entries=%d specified\n", 212 num_entries); 213 num_entries = NVME_ADMIN_ENTRIES; 214 } 215 216 /* 217 * The admin queue's max xfer size is treated differently than the 218 * max I/O xfer size. 16KB is sufficient here - maybe even less? 219 */ 220 nvme_qpair_construct(qpair, 221 0, /* qpair ID */ 222 0, /* vector */ 223 num_entries, 224 NVME_ADMIN_TRACKERS, 225 16*1024, /* max xfer size */ 226 ctrlr); 227 } 228 229 static int 230 nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr) 231 { 232 struct nvme_qpair *qpair; 233 union cap_lo_register cap_lo; 234 int i, num_entries, num_trackers; 235 236 num_entries = NVME_IO_ENTRIES; 237 TUNABLE_INT_FETCH("hw.nvme.io_entries", &num_entries); 238 239 /* 240 * NVMe spec sets a hard limit of 64K max entries, but 241 * devices may specify a smaller limit, so we need to check 242 * the MQES field in the capabilities register. 243 */ 244 cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo); 245 num_entries = min(num_entries, cap_lo.bits.mqes+1); 246 247 num_trackers = NVME_IO_TRACKERS; 248 TUNABLE_INT_FETCH("hw.nvme.io_trackers", &num_trackers); 249 250 num_trackers = max(num_trackers, NVME_MIN_IO_TRACKERS); 251 num_trackers = min(num_trackers, NVME_MAX_IO_TRACKERS); 252 /* 253 * No need to have more trackers than entries in the submit queue. 254 * Note also that for a queue size of N, we can only have (N-1) 255 * commands outstanding, hence the "-1" here. 256 */ 257 num_trackers = min(num_trackers, (num_entries-1)); 258 259 ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE; 260 TUNABLE_INT_FETCH("hw.nvme.max_xfer_size", &ctrlr->max_xfer_size); 261 /* 262 * Check that tunable doesn't specify a size greater than what our 263 * driver supports, and is an even PAGE_SIZE multiple. 264 */ 265 if (ctrlr->max_xfer_size > NVME_MAX_XFER_SIZE || 266 ctrlr->max_xfer_size % PAGE_SIZE) 267 ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE; 268 269 ctrlr->ioq = malloc(ctrlr->num_io_queues * sizeof(struct nvme_qpair), 270 M_NVME, M_ZERO | M_NOWAIT); 271 272 if (ctrlr->ioq == NULL) 273 return (ENOMEM); 274 275 for (i = 0; i < ctrlr->num_io_queues; i++) { 276 qpair = &ctrlr->ioq[i]; 277 278 /* 279 * Admin queue has ID=0. IO queues start at ID=1 - 280 * hence the 'i+1' here. 281 * 282 * For I/O queues, use the controller-wide max_xfer_size 283 * calculated in nvme_attach(). 284 */ 285 nvme_qpair_construct(qpair, 286 i+1, /* qpair ID */ 287 ctrlr->msix_enabled ? i+1 : 0, /* vector */ 288 num_entries, 289 num_trackers, 290 ctrlr->max_xfer_size, 291 ctrlr); 292 293 if (ctrlr->per_cpu_io_queues) 294 bus_bind_intr(ctrlr->dev, qpair->res, i); 295 } 296 297 return (0); 298 } 299 300 static int 301 nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr) 302 { 303 int ms_waited; 304 union cc_register cc; 305 union csts_register csts; 306 307 cc.raw = nvme_mmio_read_4(ctrlr, cc); 308 csts.raw = nvme_mmio_read_4(ctrlr, csts); 309 310 if (!cc.bits.en) { 311 device_printf(ctrlr->dev, "%s called with cc.en = 0\n", 312 __func__); 313 return (ENXIO); 314 } 315 316 ms_waited = 0; 317 318 while (!csts.bits.rdy) { 319 DELAY(1000); 320 if (ms_waited++ > ctrlr->ready_timeout_in_ms) { 321 device_printf(ctrlr->dev, "controller did not become " 322 "ready within %d ms\n", ctrlr->ready_timeout_in_ms); 323 return (ENXIO); 324 } 325 csts.raw = nvme_mmio_read_4(ctrlr, csts); 326 } 327 328 return (0); 329 } 330 331 static void 332 nvme_ctrlr_disable(struct nvme_controller *ctrlr) 333 { 334 union cc_register cc; 335 union csts_register csts; 336 337 cc.raw = nvme_mmio_read_4(ctrlr, cc); 338 csts.raw = nvme_mmio_read_4(ctrlr, csts); 339 340 if (cc.bits.en == 1 && csts.bits.rdy == 0) 341 nvme_ctrlr_wait_for_ready(ctrlr); 342 343 cc.bits.en = 0; 344 nvme_mmio_write_4(ctrlr, cc, cc.raw); 345 DELAY(5000); 346 } 347 348 static int 349 nvme_ctrlr_enable(struct nvme_controller *ctrlr) 350 { 351 union cc_register cc; 352 union csts_register csts; 353 union aqa_register aqa; 354 355 cc.raw = nvme_mmio_read_4(ctrlr, cc); 356 csts.raw = nvme_mmio_read_4(ctrlr, csts); 357 358 if (cc.bits.en == 1) { 359 if (csts.bits.rdy == 1) 360 return (0); 361 else 362 return (nvme_ctrlr_wait_for_ready(ctrlr)); 363 } 364 365 nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr); 366 DELAY(5000); 367 nvme_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr); 368 DELAY(5000); 369 370 aqa.raw = 0; 371 /* acqs and asqs are 0-based. */ 372 aqa.bits.acqs = ctrlr->adminq.num_entries-1; 373 aqa.bits.asqs = ctrlr->adminq.num_entries-1; 374 nvme_mmio_write_4(ctrlr, aqa, aqa.raw); 375 DELAY(5000); 376 377 cc.bits.en = 1; 378 cc.bits.css = 0; 379 cc.bits.ams = 0; 380 cc.bits.shn = 0; 381 cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */ 382 cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */ 383 384 /* This evaluates to 0, which is according to spec. */ 385 cc.bits.mps = (PAGE_SIZE >> 13); 386 387 nvme_mmio_write_4(ctrlr, cc, cc.raw); 388 DELAY(5000); 389 390 return (nvme_ctrlr_wait_for_ready(ctrlr)); 391 } 392 393 int 394 nvme_ctrlr_reset(struct nvme_controller *ctrlr) 395 { 396 397 nvme_ctrlr_disable(ctrlr); 398 return (nvme_ctrlr_enable(ctrlr)); 399 } 400 401 /* 402 * Disable this code for now, since Chatham doesn't support 403 * AERs so I have no good way to test them. 404 */ 405 #if 0 406 static void 407 nvme_async_event_cb(void *arg, const struct nvme_completion *status) 408 { 409 struct nvme_controller *ctrlr = arg; 410 411 printf("Asynchronous event occurred.\n"); 412 413 /* TODO: decode async event type based on status */ 414 /* TODO: check status for any error bits */ 415 416 /* 417 * Repost an asynchronous event request so that it can be 418 * used again by the controller. 419 */ 420 nvme_ctrlr_cmd_asynchronous_event_request(ctrlr, nvme_async_event_cb, 421 ctrlr); 422 } 423 #endif 424 425 static int 426 nvme_ctrlr_identify(struct nvme_controller *ctrlr) 427 { 428 struct mtx *mtx; 429 struct nvme_completion cpl; 430 int status; 431 432 mtx = mtx_pool_find(mtxpool_sleep, &cpl); 433 434 mtx_lock(mtx); 435 nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata, 436 nvme_ctrlr_cb, &cpl); 437 status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5); 438 mtx_unlock(mtx); 439 if ((status != 0) || cpl.sf_sc || cpl.sf_sct) { 440 printf("nvme_identify_controller failed!\n"); 441 return (ENXIO); 442 } 443 444 #ifdef CHATHAM2 445 if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) 446 nvme_chatham_populate_cdata(ctrlr); 447 #endif 448 449 return (0); 450 } 451 452 static int 453 nvme_ctrlr_set_num_qpairs(struct nvme_controller *ctrlr) 454 { 455 struct mtx *mtx; 456 struct nvme_completion cpl; 457 int cq_allocated, sq_allocated, status; 458 459 mtx = mtx_pool_find(mtxpool_sleep, &cpl); 460 461 mtx_lock(mtx); 462 nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->num_io_queues, 463 nvme_ctrlr_cb, &cpl); 464 status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5); 465 mtx_unlock(mtx); 466 if ((status != 0) || cpl.sf_sc || cpl.sf_sct) { 467 printf("nvme_set_num_queues failed!\n"); 468 return (ENXIO); 469 } 470 471 /* 472 * Data in cdw0 is 0-based. 473 * Lower 16-bits indicate number of submission queues allocated. 474 * Upper 16-bits indicate number of completion queues allocated. 475 */ 476 sq_allocated = (cpl.cdw0 & 0xFFFF) + 1; 477 cq_allocated = (cpl.cdw0 >> 16) + 1; 478 479 /* 480 * Check that the controller was able to allocate the number of 481 * queues we requested. If not, revert to one IO queue. 482 */ 483 if (sq_allocated < ctrlr->num_io_queues || 484 cq_allocated < ctrlr->num_io_queues) { 485 ctrlr->num_io_queues = 1; 486 ctrlr->per_cpu_io_queues = 0; 487 488 /* TODO: destroy extra queues that were created 489 * previously but now found to be not needed. 490 */ 491 } 492 493 return (0); 494 } 495 496 static int 497 nvme_ctrlr_create_qpairs(struct nvme_controller *ctrlr) 498 { 499 struct mtx *mtx; 500 struct nvme_qpair *qpair; 501 struct nvme_completion cpl; 502 int i, status; 503 504 mtx = mtx_pool_find(mtxpool_sleep, &cpl); 505 506 for (i = 0; i < ctrlr->num_io_queues; i++) { 507 qpair = &ctrlr->ioq[i]; 508 509 mtx_lock(mtx); 510 nvme_ctrlr_cmd_create_io_cq(ctrlr, qpair, qpair->vector, 511 nvme_ctrlr_cb, &cpl); 512 status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5); 513 mtx_unlock(mtx); 514 if ((status != 0) || cpl.sf_sc || cpl.sf_sct) { 515 printf("nvme_create_io_cq failed!\n"); 516 return (ENXIO); 517 } 518 519 mtx_lock(mtx); 520 nvme_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair, 521 nvme_ctrlr_cb, &cpl); 522 status = msleep(&cpl, mtx, PRIBIO, "nvme_start", hz*5); 523 mtx_unlock(mtx); 524 if ((status != 0) || cpl.sf_sc || cpl.sf_sct) { 525 printf("nvme_create_io_sq failed!\n"); 526 return (ENXIO); 527 } 528 } 529 530 return (0); 531 } 532 533 static int 534 nvme_ctrlr_construct_namespaces(struct nvme_controller *ctrlr) 535 { 536 struct nvme_namespace *ns; 537 int i, status; 538 539 for (i = 0; i < ctrlr->cdata.nn; i++) { 540 ns = &ctrlr->ns[i]; 541 status = nvme_ns_construct(ns, i+1, ctrlr); 542 if (status != 0) 543 return (status); 544 } 545 546 return (0); 547 } 548 549 static void 550 nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr) 551 { 552 union nvme_critical_warning_state state; 553 uint8_t num_async_events; 554 555 state.raw = 0xFF; 556 state.bits.reserved = 0; 557 nvme_ctrlr_cmd_set_asynchronous_event_config(ctrlr, state, NULL, NULL); 558 559 /* aerl is a zero-based value, so we need to add 1 here. */ 560 num_async_events = min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl+1)); 561 562 /* 563 * Disable this code for now, since Chatham doesn't support 564 * AERs so I have no good way to test them. 565 */ 566 #if 0 567 for (int i = 0; i < num_async_events; i++) 568 nvme_ctrlr_cmd_asynchronous_event_request(ctrlr, 569 nvme_async_event_cb, ctrlr); 570 #endif 571 } 572 573 static void 574 nvme_ctrlr_configure_int_coalescing(struct nvme_controller *ctrlr) 575 { 576 577 ctrlr->int_coal_time = 0; 578 TUNABLE_INT_FETCH("hw.nvme.int_coal_time", 579 &ctrlr->int_coal_time); 580 581 ctrlr->int_coal_threshold = 0; 582 TUNABLE_INT_FETCH("hw.nvme.int_coal_threshold", 583 &ctrlr->int_coal_threshold); 584 585 nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, ctrlr->int_coal_time, 586 ctrlr->int_coal_threshold, NULL, NULL); 587 } 588 589 void 590 nvme_ctrlr_start(void *ctrlr_arg) 591 { 592 struct nvme_controller *ctrlr = ctrlr_arg; 593 594 if (nvme_ctrlr_identify(ctrlr) != 0) 595 goto err; 596 597 if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) 598 goto err; 599 600 if (nvme_ctrlr_create_qpairs(ctrlr) != 0) 601 goto err; 602 603 if (nvme_ctrlr_construct_namespaces(ctrlr) != 0) 604 goto err; 605 606 nvme_ctrlr_configure_aer(ctrlr); 607 nvme_ctrlr_configure_int_coalescing(ctrlr); 608 609 ctrlr->is_started = TRUE; 610 611 err: 612 613 /* 614 * Initialize sysctls, even if controller failed to start, to 615 * assist with debugging admin queue pair. 616 */ 617 nvme_sysctl_initialize_ctrlr(ctrlr); 618 config_intrhook_disestablish(&ctrlr->config_hook); 619 } 620 621 static void 622 nvme_ctrlr_intx_task(void *arg, int pending) 623 { 624 struct nvme_controller *ctrlr = arg; 625 626 nvme_qpair_process_completions(&ctrlr->adminq); 627 628 if (ctrlr->ioq[0].cpl) 629 nvme_qpair_process_completions(&ctrlr->ioq[0]); 630 631 nvme_mmio_write_4(ctrlr, intmc, 1); 632 } 633 634 static void 635 nvme_ctrlr_intx_handler(void *arg) 636 { 637 struct nvme_controller *ctrlr = arg; 638 639 nvme_mmio_write_4(ctrlr, intms, 1); 640 taskqueue_enqueue_fast(ctrlr->taskqueue, &ctrlr->task); 641 } 642 643 static int 644 nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr) 645 { 646 647 ctrlr->num_io_queues = 1; 648 ctrlr->per_cpu_io_queues = 0; 649 ctrlr->rid = 0; 650 ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ, 651 &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE); 652 653 if (ctrlr->res == NULL) { 654 device_printf(ctrlr->dev, "unable to allocate shared IRQ\n"); 655 return (ENOMEM); 656 } 657 658 bus_setup_intr(ctrlr->dev, ctrlr->res, 659 INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler, 660 ctrlr, &ctrlr->tag); 661 662 if (ctrlr->tag == NULL) { 663 device_printf(ctrlr->dev, 664 "unable to setup legacy interrupt handler\n"); 665 return (ENOMEM); 666 } 667 668 TASK_INIT(&ctrlr->task, 0, nvme_ctrlr_intx_task, ctrlr); 669 ctrlr->taskqueue = taskqueue_create_fast("nvme_taskq", M_NOWAIT, 670 taskqueue_thread_enqueue, &ctrlr->taskqueue); 671 taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_NET, 672 "%s intx taskq", device_get_nameunit(ctrlr->dev)); 673 674 return (0); 675 } 676 677 static int 678 nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, 679 struct thread *td) 680 { 681 struct nvme_controller *ctrlr; 682 struct nvme_completion cpl; 683 struct mtx *mtx; 684 685 ctrlr = cdev->si_drv1; 686 687 switch (cmd) { 688 case NVME_IDENTIFY_CONTROLLER: 689 #ifdef CHATHAM2 690 /* 691 * Don't refresh data on Chatham, since Chatham returns 692 * garbage on IDENTIFY anyways. 693 */ 694 if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) { 695 memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata)); 696 break; 697 } 698 #endif 699 /* Refresh data before returning to user. */ 700 mtx = mtx_pool_find(mtxpool_sleep, &cpl); 701 mtx_lock(mtx); 702 nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata, 703 nvme_ctrlr_cb, &cpl); 704 msleep(&cpl, mtx, PRIBIO, "nvme_ioctl", 0); 705 mtx_unlock(mtx); 706 if (cpl.sf_sc || cpl.sf_sct) 707 return (ENXIO); 708 memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata)); 709 break; 710 default: 711 return (ENOTTY); 712 } 713 714 return (0); 715 } 716 717 static struct cdevsw nvme_ctrlr_cdevsw = { 718 .d_version = D_VERSION, 719 .d_flags = 0, 720 .d_ioctl = nvme_ctrlr_ioctl 721 }; 722 723 int 724 nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev) 725 { 726 union cap_lo_register cap_lo; 727 union cap_hi_register cap_hi; 728 int num_vectors, per_cpu_io_queues, status = 0; 729 730 ctrlr->dev = dev; 731 ctrlr->is_started = FALSE; 732 733 status = nvme_ctrlr_allocate_bar(ctrlr); 734 735 if (status != 0) 736 return (status); 737 738 #ifdef CHATHAM2 739 if (pci_get_devid(dev) == CHATHAM_PCI_ID) { 740 status = nvme_ctrlr_allocate_chatham_bar(ctrlr); 741 if (status != 0) 742 return (status); 743 nvme_ctrlr_setup_chatham(ctrlr); 744 } 745 #endif 746 747 /* 748 * Software emulators may set the doorbell stride to something 749 * other than zero, but this driver is not set up to handle that. 750 */ 751 cap_hi.raw = nvme_mmio_read_4(ctrlr, cap_hi); 752 if (cap_hi.bits.dstrd != 0) 753 return (ENXIO); 754 755 /* Get ready timeout value from controller, in units of 500ms. */ 756 cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo); 757 ctrlr->ready_timeout_in_ms = cap_lo.bits.to * 500; 758 759 per_cpu_io_queues = 1; 760 TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues); 761 ctrlr->per_cpu_io_queues = per_cpu_io_queues ? TRUE : FALSE; 762 763 if (ctrlr->per_cpu_io_queues) 764 ctrlr->num_io_queues = mp_ncpus; 765 else 766 ctrlr->num_io_queues = 1; 767 768 ctrlr->force_intx = 0; 769 TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx); 770 771 ctrlr->msix_enabled = 1; 772 773 if (ctrlr->force_intx) { 774 ctrlr->msix_enabled = 0; 775 goto intx; 776 } 777 778 /* One vector per IO queue, plus one vector for admin queue. */ 779 num_vectors = ctrlr->num_io_queues + 1; 780 781 if (pci_msix_count(dev) < num_vectors) { 782 ctrlr->msix_enabled = 0; 783 goto intx; 784 } 785 786 if (pci_alloc_msix(dev, &num_vectors) != 0) 787 ctrlr->msix_enabled = 0; 788 789 intx: 790 791 if (!ctrlr->msix_enabled) 792 nvme_ctrlr_configure_intx(ctrlr); 793 794 nvme_ctrlr_construct_admin_qpair(ctrlr); 795 796 status = nvme_ctrlr_construct_io_qpairs(ctrlr); 797 798 if (status != 0) 799 return (status); 800 801 ctrlr->cdev = make_dev(&nvme_ctrlr_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, 802 "nvme%d", device_get_unit(dev)); 803 804 if (ctrlr->cdev == NULL) 805 return (ENXIO); 806 807 ctrlr->cdev->si_drv1 = (void *)ctrlr; 808 809 return (0); 810 } 811 812 void 813 nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr, 814 struct nvme_request *req) 815 { 816 817 nvme_qpair_submit_request(&ctrlr->adminq, req); 818 } 819 820 void 821 nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr, 822 struct nvme_request *req) 823 { 824 struct nvme_qpair *qpair; 825 826 if (ctrlr->per_cpu_io_queues) 827 qpair = &ctrlr->ioq[curcpu]; 828 else 829 qpair = &ctrlr->ioq[0]; 830 831 nvme_qpair_submit_request(qpair, req); 832 } 833