1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2017 Shunsuke Mie 5 * Copyright (c) 2018 Leon Dang 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * bhyve PCIe-NVMe device emulation. 31 * 32 * options: 33 * -s <n>,nvme,devpath,maxq=#,qsz=#,ioslots=#,sectsz=#,ser=A-Z 34 * 35 * accepted devpath: 36 * /dev/blockdev 37 * /path/to/image 38 * ram=size_in_MiB 39 * 40 * maxq = max number of queues 41 * qsz = max elements in each queue 42 * ioslots = max number of concurrent io requests 43 * sectsz = sector size (defaults to blockif sector size) 44 * ser = serial number (20-chars max) 45 * 46 */ 47 48 /* TODO: 49 - create async event for smart and log 50 - intr coalesce 51 */ 52 53 #include <sys/cdefs.h> 54 __FBSDID("$FreeBSD$"); 55 56 #include <sys/types.h> 57 58 #include <assert.h> 59 #include <pthread.h> 60 #include <semaphore.h> 61 #include <stdbool.h> 62 #include <stddef.h> 63 #include <stdint.h> 64 #include <stdio.h> 65 #include <stdlib.h> 66 #include <string.h> 67 68 #include <machine/atomic.h> 69 #include <machine/vmm.h> 70 #include <vmmapi.h> 71 72 #include <dev/nvme/nvme.h> 73 74 #include "bhyverun.h" 75 #include "block_if.h" 76 #include "pci_emul.h" 77 78 79 static int nvme_debug = 0; 80 #define DPRINTF(params) if (nvme_debug) printf params 81 #define WPRINTF(params) printf params 82 83 /* defaults; can be overridden */ 84 #define NVME_MSIX_BAR 4 85 86 #define NVME_IOSLOTS 8 87 88 #define NVME_QUEUES 16 89 #define NVME_MAX_QENTRIES 2048 90 91 #define NVME_PRP2_ITEMS (PAGE_SIZE/sizeof(uint64_t)) 92 #define NVME_MAX_BLOCKIOVS 512 93 94 /* helpers */ 95 96 #define NVME_DOORBELL_OFFSET offsetof(struct nvme_registers, doorbell) 97 98 enum nvme_controller_register_offsets { 99 NVME_CR_CAP_LOW = 0x00, 100 NVME_CR_CAP_HI = 0x04, 101 NVME_CR_VS = 0x08, 102 NVME_CR_INTMS = 0x0c, 103 NVME_CR_INTMC = 0x10, 104 NVME_CR_CC = 0x14, 105 NVME_CR_CSTS = 0x1c, 106 NVME_CR_NSSR = 0x20, 107 NVME_CR_AQA = 0x24, 108 NVME_CR_ASQ_LOW = 0x28, 109 NVME_CR_ASQ_HI = 0x2c, 110 NVME_CR_ACQ_LOW = 0x30, 111 NVME_CR_ACQ_HI = 0x34, 112 }; 113 114 enum nvme_cmd_cdw11 { 115 NVME_CMD_CDW11_PC = 0x0001, 116 NVME_CMD_CDW11_IEN = 0x0002, 117 NVME_CMD_CDW11_IV = 0xFFFF0000, 118 }; 119 120 #define NVME_CMD_GET_OPC(opc) \ 121 ((opc) >> NVME_CMD_OPC_SHIFT & NVME_CMD_OPC_MASK) 122 123 #define NVME_CQ_INTEN 0x01 124 #define NVME_CQ_INTCOAL 0x02 125 126 struct nvme_completion_queue { 127 struct nvme_completion *qbase; 128 uint32_t size; 129 uint16_t tail; /* nvme progress */ 130 uint16_t head; /* guest progress */ 131 uint16_t intr_vec; 132 uint32_t intr_en; 133 pthread_mutex_t mtx; 134 }; 135 136 struct nvme_submission_queue { 137 struct nvme_command *qbase; 138 uint32_t size; 139 uint16_t head; /* nvme progress */ 140 uint16_t tail; /* guest progress */ 141 uint16_t cqid; /* completion queue id */ 142 int busy; /* queue is being processed */ 143 int qpriority; 144 }; 145 146 enum nvme_storage_type { 147 NVME_STOR_BLOCKIF = 0, 148 NVME_STOR_RAM = 1, 149 }; 150 151 struct pci_nvme_blockstore { 152 enum nvme_storage_type type; 153 void *ctx; 154 uint64_t size; 155 uint32_t sectsz; 156 uint32_t sectsz_bits; 157 }; 158 159 struct pci_nvme_ioreq { 160 struct pci_nvme_softc *sc; 161 struct pci_nvme_ioreq *next; 162 struct nvme_submission_queue *nvme_sq; 163 uint16_t sqid; 164 165 /* command information */ 166 uint16_t opc; 167 uint16_t cid; 168 uint32_t nsid; 169 170 uint64_t prev_gpaddr; 171 size_t prev_size; 172 173 /* 174 * lock if all iovs consumed (big IO); 175 * complete transaction before continuing 176 */ 177 pthread_mutex_t mtx; 178 pthread_cond_t cv; 179 180 struct blockif_req io_req; 181 182 /* pad to fit up to 512 page descriptors from guest IO request */ 183 struct iovec iovpadding[NVME_MAX_BLOCKIOVS-BLOCKIF_IOV_MAX]; 184 }; 185 186 struct pci_nvme_softc { 187 struct pci_devinst *nsc_pi; 188 189 pthread_mutex_t mtx; 190 191 struct nvme_registers regs; 192 193 struct nvme_namespace_data nsdata; 194 struct nvme_controller_data ctrldata; 195 196 struct pci_nvme_blockstore nvstore; 197 198 uint16_t max_qentries; /* max entries per queue */ 199 uint32_t max_queues; 200 uint32_t num_cqueues; 201 uint32_t num_squeues; 202 203 struct pci_nvme_ioreq *ioreqs; 204 struct pci_nvme_ioreq *ioreqs_free; /* free list of ioreqs */ 205 uint32_t pending_ios; 206 uint32_t ioslots; 207 sem_t iosemlock; 208 209 /* status and guest memory mapped queues */ 210 struct nvme_completion_queue *compl_queues; 211 struct nvme_submission_queue *submit_queues; 212 213 /* controller features */ 214 uint32_t intr_coales_aggr_time; /* 0x08: uS to delay intr */ 215 uint32_t intr_coales_aggr_thresh; /* 0x08: compl-Q entries */ 216 uint32_t async_ev_config; /* 0x0B: async event config */ 217 }; 218 219 220 static void pci_nvme_io_partial(struct blockif_req *br, int err); 221 222 /* Controller Configuration utils */ 223 #define NVME_CC_GET_EN(cc) \ 224 ((cc) >> NVME_CC_REG_EN_SHIFT & NVME_CC_REG_EN_MASK) 225 #define NVME_CC_GET_CSS(cc) \ 226 ((cc) >> NVME_CC_REG_CSS_SHIFT & NVME_CC_REG_CSS_MASK) 227 #define NVME_CC_GET_SHN(cc) \ 228 ((cc) >> NVME_CC_REG_SHN_SHIFT & NVME_CC_REG_SHN_MASK) 229 #define NVME_CC_GET_IOSQES(cc) \ 230 ((cc) >> NVME_CC_REG_IOSQES_SHIFT & NVME_CC_REG_IOSQES_MASK) 231 #define NVME_CC_GET_IOCQES(cc) \ 232 ((cc) >> NVME_CC_REG_IOCQES_SHIFT & NVME_CC_REG_IOCQES_MASK) 233 234 #define NVME_CC_WRITE_MASK \ 235 ((NVME_CC_REG_EN_MASK << NVME_CC_REG_EN_SHIFT) | \ 236 (NVME_CC_REG_IOSQES_MASK << NVME_CC_REG_IOSQES_SHIFT) | \ 237 (NVME_CC_REG_IOCQES_MASK << NVME_CC_REG_IOCQES_SHIFT)) 238 239 #define NVME_CC_NEN_WRITE_MASK \ 240 ((NVME_CC_REG_CSS_MASK << NVME_CC_REG_CSS_SHIFT) | \ 241 (NVME_CC_REG_MPS_MASK << NVME_CC_REG_MPS_SHIFT) | \ 242 (NVME_CC_REG_AMS_MASK << NVME_CC_REG_AMS_SHIFT)) 243 244 /* Controller Status utils */ 245 #define NVME_CSTS_GET_RDY(sts) \ 246 ((sts) >> NVME_CSTS_REG_RDY_SHIFT & NVME_CSTS_REG_RDY_MASK) 247 248 #define NVME_CSTS_RDY (1 << NVME_CSTS_REG_RDY_SHIFT) 249 250 /* Completion Queue status word utils */ 251 #define NVME_STATUS_P (1 << NVME_STATUS_P_SHIFT) 252 #define NVME_STATUS_MASK \ 253 ((NVME_STATUS_SCT_MASK << NVME_STATUS_SCT_SHIFT) |\ 254 (NVME_STATUS_SC_MASK << NVME_STATUS_SC_SHIFT)) 255 256 static __inline void 257 pci_nvme_status_tc(uint16_t *status, uint16_t type, uint16_t code) 258 { 259 260 *status &= ~NVME_STATUS_MASK; 261 *status |= (type & NVME_STATUS_SCT_MASK) << NVME_STATUS_SCT_SHIFT | 262 (code & NVME_STATUS_SC_MASK) << NVME_STATUS_SC_SHIFT; 263 } 264 265 static __inline void 266 pci_nvme_status_genc(uint16_t *status, uint16_t code) 267 { 268 269 pci_nvme_status_tc(status, NVME_SCT_GENERIC, code); 270 } 271 272 static __inline void 273 pci_nvme_toggle_phase(uint16_t *status, int prev) 274 { 275 276 if (prev) 277 *status &= ~NVME_STATUS_P; 278 else 279 *status |= NVME_STATUS_P; 280 } 281 282 static void 283 pci_nvme_init_ctrldata(struct pci_nvme_softc *sc) 284 { 285 struct nvme_controller_data *cd = &sc->ctrldata; 286 287 cd->vid = 0xFB5D; 288 cd->ssvid = 0x0000; 289 290 cd->mn[0] = 'b'; 291 cd->mn[1] = 'h'; 292 cd->mn[2] = 'y'; 293 cd->mn[3] = 'v'; 294 cd->mn[4] = 'e'; 295 cd->mn[5] = '-'; 296 cd->mn[6] = 'N'; 297 cd->mn[7] = 'V'; 298 cd->mn[8] = 'M'; 299 cd->mn[9] = 'e'; 300 301 cd->fr[0] = '1'; 302 cd->fr[1] = '.'; 303 cd->fr[2] = '0'; 304 305 /* Num of submission commands that we can handle at a time (2^rab) */ 306 cd->rab = 4; 307 308 /* FreeBSD OUI */ 309 cd->ieee[0] = 0x58; 310 cd->ieee[1] = 0x9c; 311 cd->ieee[2] = 0xfc; 312 313 cd->mic = 0; 314 315 cd->mdts = 9; /* max data transfer size (2^mdts * CAP.MPSMIN) */ 316 317 cd->ver = 0x00010300; 318 319 cd->oacs = 1 << NVME_CTRLR_DATA_OACS_FORMAT_SHIFT; 320 cd->acl = 2; 321 cd->aerl = 4; 322 323 cd->lpa = 0; /* TODO: support some simple things like SMART */ 324 cd->elpe = 0; /* max error log page entries */ 325 cd->npss = 1; /* number of power states support */ 326 327 /* Warning Composite Temperature Threshold */ 328 cd->wctemp = 0x0157; 329 330 cd->sqes = (6 << NVME_CTRLR_DATA_SQES_MAX_SHIFT) | 331 (6 << NVME_CTRLR_DATA_SQES_MIN_SHIFT); 332 cd->cqes = (4 << NVME_CTRLR_DATA_CQES_MAX_SHIFT) | 333 (4 << NVME_CTRLR_DATA_CQES_MIN_SHIFT); 334 cd->nn = 1; /* number of namespaces */ 335 336 cd->fna = 0x03; 337 338 cd->power_state[0].mp = 10; 339 } 340 341 static void 342 pci_nvme_init_nsdata(struct pci_nvme_softc *sc) 343 { 344 struct nvme_namespace_data *nd; 345 346 nd = &sc->nsdata; 347 348 nd->nsze = sc->nvstore.size / sc->nvstore.sectsz; 349 nd->ncap = nd->nsze; 350 nd->nuse = nd->nsze; 351 352 /* Get LBA and backstore information from backing store */ 353 nd->nlbaf = 1; 354 /* LBA data-sz = 2^lbads */ 355 nd->lbaf[0] = sc->nvstore.sectsz_bits << NVME_NS_DATA_LBAF_LBADS_SHIFT; 356 357 nd->flbas = 0; 358 } 359 360 static void 361 pci_nvme_reset(struct pci_nvme_softc *sc) 362 { 363 DPRINTF(("%s\r\n", __func__)); 364 365 sc->regs.cap_lo = (sc->max_qentries & NVME_CAP_LO_REG_MQES_MASK) | 366 (1 << NVME_CAP_LO_REG_CQR_SHIFT) | 367 (60 << NVME_CAP_LO_REG_TO_SHIFT); 368 369 sc->regs.cap_hi = 1 << NVME_CAP_HI_REG_CSS_NVM_SHIFT; 370 371 sc->regs.vs = 0x00010300; /* NVMe v1.3 */ 372 373 sc->regs.cc = 0; 374 sc->regs.csts = 0; 375 376 if (sc->submit_queues != NULL) { 377 pthread_mutex_lock(&sc->mtx); 378 sc->num_cqueues = sc->num_squeues = sc->max_queues; 379 380 for (int i = 0; i <= sc->max_queues; i++) { 381 /* 382 * The Admin Submission Queue is at index 0. 383 * It must not be changed at reset otherwise the 384 * emulation will be out of sync with the guest. 385 */ 386 if (i != 0) { 387 sc->submit_queues[i].qbase = NULL; 388 sc->submit_queues[i].size = 0; 389 sc->submit_queues[i].cqid = 0; 390 391 sc->compl_queues[i].qbase = NULL; 392 sc->compl_queues[i].size = 0; 393 } 394 sc->submit_queues[i].tail = 0; 395 sc->submit_queues[i].head = 0; 396 sc->submit_queues[i].busy = 0; 397 398 sc->compl_queues[i].tail = 0; 399 sc->compl_queues[i].head = 0; 400 } 401 402 pthread_mutex_unlock(&sc->mtx); 403 } else 404 sc->submit_queues = calloc(sc->max_queues + 1, 405 sizeof(struct nvme_submission_queue)); 406 407 if (sc->compl_queues == NULL) { 408 sc->compl_queues = calloc(sc->max_queues + 1, 409 sizeof(struct nvme_completion_queue)); 410 411 for (int i = 0; i <= sc->num_cqueues; i++) 412 pthread_mutex_init(&sc->compl_queues[i].mtx, NULL); 413 } 414 } 415 416 static void 417 pci_nvme_init_controller(struct vmctx *ctx, struct pci_nvme_softc *sc) 418 { 419 uint16_t acqs, asqs; 420 421 DPRINTF(("%s\r\n", __func__)); 422 423 asqs = (sc->regs.aqa & NVME_AQA_REG_ASQS_MASK) + 1; 424 sc->submit_queues[0].size = asqs; 425 sc->submit_queues[0].qbase = vm_map_gpa(ctx, sc->regs.asq, 426 sizeof(struct nvme_command) * asqs); 427 428 DPRINTF(("%s mapping Admin-SQ guest 0x%lx, host: %p\r\n", 429 __func__, sc->regs.asq, sc->submit_queues[0].qbase)); 430 431 acqs = ((sc->regs.aqa >> NVME_AQA_REG_ACQS_SHIFT) & 432 NVME_AQA_REG_ACQS_MASK) + 1; 433 sc->compl_queues[0].size = acqs; 434 sc->compl_queues[0].qbase = vm_map_gpa(ctx, sc->regs.acq, 435 sizeof(struct nvme_completion) * acqs); 436 DPRINTF(("%s mapping Admin-CQ guest 0x%lx, host: %p\r\n", 437 __func__, sc->regs.acq, sc->compl_queues[0].qbase)); 438 } 439 440 static int 441 nvme_opc_delete_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, 442 struct nvme_completion* compl) 443 { 444 uint16_t qid = command->cdw10 & 0xffff; 445 446 DPRINTF(("%s DELETE_IO_SQ %u\r\n", __func__, qid)); 447 if (qid == 0 || qid > sc->num_cqueues) { 448 WPRINTF(("%s NOT PERMITTED queue id %u / num_squeues %u\r\n", 449 __func__, qid, sc->num_squeues)); 450 pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, 451 NVME_SC_INVALID_QUEUE_IDENTIFIER); 452 return (1); 453 } 454 455 sc->submit_queues[qid].qbase = NULL; 456 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); 457 return (1); 458 } 459 460 static int 461 nvme_opc_create_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, 462 struct nvme_completion* compl) 463 { 464 if (command->cdw11 & NVME_CMD_CDW11_PC) { 465 uint16_t qid = command->cdw10 & 0xffff; 466 struct nvme_submission_queue *nsq; 467 468 if (qid > sc->num_squeues) { 469 WPRINTF(("%s queue index %u > num_squeues %u\r\n", 470 __func__, qid, sc->num_squeues)); 471 pci_nvme_status_tc(&compl->status, 472 NVME_SCT_COMMAND_SPECIFIC, 473 NVME_SC_INVALID_QUEUE_IDENTIFIER); 474 return (1); 475 } 476 477 nsq = &sc->submit_queues[qid]; 478 nsq->size = ((command->cdw10 >> 16) & 0xffff) + 1; 479 480 nsq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, 481 sizeof(struct nvme_command) * (size_t)nsq->size); 482 nsq->cqid = (command->cdw11 >> 16) & 0xffff; 483 nsq->qpriority = (command->cdw11 >> 1) & 0x03; 484 485 DPRINTF(("%s sq %u size %u gaddr %p cqid %u\r\n", __func__, 486 qid, nsq->size, nsq->qbase, nsq->cqid)); 487 488 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); 489 490 DPRINTF(("%s completed creating IOSQ qid %u\r\n", 491 __func__, qid)); 492 } else { 493 /* 494 * Guest sent non-cont submission queue request. 495 * This setting is unsupported by this emulation. 496 */ 497 WPRINTF(("%s unsupported non-contig (list-based) " 498 "create i/o submission queue\r\n", __func__)); 499 500 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); 501 } 502 return (1); 503 } 504 505 static int 506 nvme_opc_delete_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command, 507 struct nvme_completion* compl) 508 { 509 uint16_t qid = command->cdw10 & 0xffff; 510 511 DPRINTF(("%s DELETE_IO_CQ %u\r\n", __func__, qid)); 512 if (qid == 0 || qid > sc->num_cqueues) { 513 WPRINTF(("%s queue index %u / num_cqueues %u\r\n", 514 __func__, qid, sc->num_cqueues)); 515 pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, 516 NVME_SC_INVALID_QUEUE_IDENTIFIER); 517 return (1); 518 } 519 520 sc->compl_queues[qid].qbase = NULL; 521 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); 522 return (1); 523 } 524 525 static int 526 nvme_opc_create_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command, 527 struct nvme_completion* compl) 528 { 529 if (command->cdw11 & NVME_CMD_CDW11_PC) { 530 uint16_t qid = command->cdw10 & 0xffff; 531 struct nvme_completion_queue *ncq; 532 533 if (qid > sc->num_cqueues) { 534 WPRINTF(("%s queue index %u > num_cqueues %u\r\n", 535 __func__, qid, sc->num_cqueues)); 536 pci_nvme_status_tc(&compl->status, 537 NVME_SCT_COMMAND_SPECIFIC, 538 NVME_SC_INVALID_QUEUE_IDENTIFIER); 539 return (1); 540 } 541 542 ncq = &sc->compl_queues[qid]; 543 ncq->intr_en = (command->cdw11 & NVME_CMD_CDW11_IEN) >> 1; 544 ncq->intr_vec = (command->cdw11 >> 16) & 0xffff; 545 ncq->size = ((command->cdw10 >> 16) & 0xffff) + 1; 546 547 ncq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, 548 command->prp1, 549 sizeof(struct nvme_command) * (size_t)ncq->size); 550 551 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); 552 } else { 553 /* 554 * Non-contig completion queue unsupported. 555 */ 556 WPRINTF(("%s unsupported non-contig (list-based) " 557 "create i/o completion queue\r\n", 558 __func__)); 559 560 /* 0x12 = Invalid Use of Controller Memory Buffer */ 561 pci_nvme_status_genc(&compl->status, 0x12); 562 } 563 564 return (1); 565 } 566 567 static int 568 nvme_opc_get_log_page(struct pci_nvme_softc* sc, struct nvme_command* command, 569 struct nvme_completion* compl) 570 { 571 uint32_t logsize = (1 + ((command->cdw10 >> 16) & 0xFFF)) * 2; 572 uint8_t logpage = command->cdw10 & 0xFF; 573 void *data; 574 575 DPRINTF(("%s log page %u len %u\r\n", __func__, logpage, logsize)); 576 577 if (logpage >= 1 && logpage <= 3) 578 data = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, 579 PAGE_SIZE); 580 581 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); 582 583 switch (logpage) { 584 case 0x01: /* Error information */ 585 memset(data, 0, logsize > PAGE_SIZE ? PAGE_SIZE : logsize); 586 break; 587 case 0x02: /* SMART/Health information */ 588 /* TODO: present some smart info */ 589 memset(data, 0, logsize > PAGE_SIZE ? PAGE_SIZE : logsize); 590 break; 591 case 0x03: /* Firmware slot information */ 592 memset(data, 0, logsize > PAGE_SIZE ? PAGE_SIZE : logsize); 593 break; 594 default: 595 WPRINTF(("%s get log page %x command not supported\r\n", 596 __func__, logpage)); 597 598 pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, 599 NVME_SC_INVALID_LOG_PAGE); 600 } 601 602 return (1); 603 } 604 605 static int 606 nvme_opc_identify(struct pci_nvme_softc* sc, struct nvme_command* command, 607 struct nvme_completion* compl) 608 { 609 void *dest; 610 611 DPRINTF(("%s identify 0x%x nsid 0x%x\r\n", __func__, 612 command->cdw10 & 0xFF, command->nsid)); 613 614 switch (command->cdw10 & 0xFF) { 615 case 0x00: /* return Identify Namespace data structure */ 616 dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, 617 sizeof(sc->nsdata)); 618 memcpy(dest, &sc->nsdata, sizeof(sc->nsdata)); 619 break; 620 case 0x01: /* return Identify Controller data structure */ 621 dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, 622 sizeof(sc->ctrldata)); 623 memcpy(dest, &sc->ctrldata, sizeof(sc->ctrldata)); 624 break; 625 case 0x02: /* list of 1024 active NSIDs > CDW1.NSID */ 626 dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, 627 sizeof(uint32_t) * 1024); 628 ((uint32_t *)dest)[0] = 1; 629 ((uint32_t *)dest)[1] = 0; 630 break; 631 case 0x11: 632 pci_nvme_status_genc(&compl->status, 633 NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 634 return (1); 635 case 0x03: /* list of NSID structures in CDW1.NSID, 4096 bytes */ 636 case 0x10: 637 case 0x12: 638 case 0x13: 639 case 0x14: 640 case 0x15: 641 default: 642 DPRINTF(("%s unsupported identify command requested 0x%x\r\n", 643 __func__, command->cdw10 & 0xFF)); 644 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); 645 return (1); 646 } 647 648 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); 649 return (1); 650 } 651 652 static int 653 nvme_opc_set_features(struct pci_nvme_softc* sc, struct nvme_command* command, 654 struct nvme_completion* compl) 655 { 656 int feature = command->cdw10 & 0xFF; 657 uint32_t iv; 658 659 DPRINTF(("%s feature 0x%x\r\n", __func__, feature)); 660 compl->cdw0 = 0; 661 662 switch (feature) { 663 case NVME_FEAT_ARBITRATION: 664 DPRINTF((" arbitration 0x%x\r\n", command->cdw11)); 665 break; 666 case NVME_FEAT_POWER_MANAGEMENT: 667 DPRINTF((" power management 0x%x\r\n", command->cdw11)); 668 break; 669 case NVME_FEAT_LBA_RANGE_TYPE: 670 DPRINTF((" lba range 0x%x\r\n", command->cdw11)); 671 break; 672 case NVME_FEAT_TEMPERATURE_THRESHOLD: 673 DPRINTF((" temperature threshold 0x%x\r\n", command->cdw11)); 674 break; 675 case NVME_FEAT_ERROR_RECOVERY: 676 DPRINTF((" error recovery 0x%x\r\n", command->cdw11)); 677 break; 678 case NVME_FEAT_VOLATILE_WRITE_CACHE: 679 DPRINTF((" volatile write cache 0x%x\r\n", command->cdw11)); 680 break; 681 case NVME_FEAT_NUMBER_OF_QUEUES: 682 sc->num_squeues = command->cdw11 & 0xFFFF; 683 sc->num_cqueues = (command->cdw11 >> 16) & 0xFFFF; 684 DPRINTF((" number of queues (submit %u, completion %u)\r\n", 685 sc->num_squeues, sc->num_cqueues)); 686 687 if (sc->num_squeues == 0 || sc->num_squeues > sc->max_queues) 688 sc->num_squeues = sc->max_queues; 689 if (sc->num_cqueues == 0 || sc->num_cqueues > sc->max_queues) 690 sc->num_cqueues = sc->max_queues; 691 692 compl->cdw0 = (sc->num_squeues & 0xFFFF) | 693 ((sc->num_cqueues & 0xFFFF) << 16); 694 695 break; 696 case NVME_FEAT_INTERRUPT_COALESCING: 697 DPRINTF((" interrupt coalescing 0x%x\r\n", command->cdw11)); 698 699 /* in uS */ 700 sc->intr_coales_aggr_time = ((command->cdw11 >> 8) & 0xFF)*100; 701 702 sc->intr_coales_aggr_thresh = command->cdw11 & 0xFF; 703 break; 704 case NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION: 705 iv = command->cdw11 & 0xFFFF; 706 707 DPRINTF((" interrupt vector configuration 0x%x\r\n", 708 command->cdw11)); 709 710 for (uint32_t i = 0; i <= sc->num_cqueues; i++) { 711 if (sc->compl_queues[i].intr_vec == iv) { 712 if (command->cdw11 & (1 << 16)) 713 sc->compl_queues[i].intr_en |= 714 NVME_CQ_INTCOAL; 715 else 716 sc->compl_queues[i].intr_en &= 717 ~NVME_CQ_INTCOAL; 718 } 719 } 720 break; 721 case NVME_FEAT_WRITE_ATOMICITY: 722 DPRINTF((" write atomicity 0x%x\r\n", command->cdw11)); 723 break; 724 case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: 725 DPRINTF((" async event configuration 0x%x\r\n", 726 command->cdw11)); 727 sc->async_ev_config = command->cdw11; 728 break; 729 case NVME_FEAT_SOFTWARE_PROGRESS_MARKER: 730 DPRINTF((" software progress marker 0x%x\r\n", 731 command->cdw11)); 732 break; 733 case 0x0C: 734 DPRINTF((" autonomous power state transition 0x%x\r\n", 735 command->cdw11)); 736 break; 737 default: 738 WPRINTF(("%s invalid feature\r\n", __func__)); 739 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); 740 return (1); 741 } 742 743 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); 744 return (1); 745 } 746 747 static int 748 nvme_opc_get_features(struct pci_nvme_softc* sc, struct nvme_command* command, 749 struct nvme_completion* compl) 750 { 751 int feature = command->cdw10 & 0xFF; 752 753 DPRINTF(("%s feature 0x%x\r\n", __func__, feature)); 754 755 compl->cdw0 = 0; 756 757 switch (feature) { 758 case NVME_FEAT_ARBITRATION: 759 DPRINTF((" arbitration\r\n")); 760 break; 761 case NVME_FEAT_POWER_MANAGEMENT: 762 DPRINTF((" power management\r\n")); 763 break; 764 case NVME_FEAT_LBA_RANGE_TYPE: 765 DPRINTF((" lba range\r\n")); 766 break; 767 case NVME_FEAT_TEMPERATURE_THRESHOLD: 768 DPRINTF((" temperature threshold\r\n")); 769 switch ((command->cdw11 >> 20) & 0x3) { 770 case 0: 771 /* Over temp threshold */ 772 compl->cdw0 = 0xFFFF; 773 break; 774 case 1: 775 /* Under temp threshold */ 776 compl->cdw0 = 0; 777 break; 778 default: 779 WPRINTF((" invalid threshold type select\r\n")); 780 pci_nvme_status_genc(&compl->status, 781 NVME_SC_INVALID_FIELD); 782 return (1); 783 } 784 break; 785 case NVME_FEAT_ERROR_RECOVERY: 786 DPRINTF((" error recovery\r\n")); 787 break; 788 case NVME_FEAT_VOLATILE_WRITE_CACHE: 789 DPRINTF((" volatile write cache\r\n")); 790 break; 791 case NVME_FEAT_NUMBER_OF_QUEUES: 792 compl->cdw0 = 0; 793 if (sc->num_squeues == 0) 794 compl->cdw0 |= sc->max_queues & 0xFFFF; 795 else 796 compl->cdw0 |= sc->num_squeues & 0xFFFF; 797 798 if (sc->num_cqueues == 0) 799 compl->cdw0 |= (sc->max_queues & 0xFFFF) << 16; 800 else 801 compl->cdw0 |= (sc->num_cqueues & 0xFFFF) << 16; 802 803 DPRINTF((" number of queues (submit %u, completion %u)\r\n", 804 compl->cdw0 & 0xFFFF, 805 (compl->cdw0 >> 16) & 0xFFFF)); 806 807 break; 808 case NVME_FEAT_INTERRUPT_COALESCING: 809 DPRINTF((" interrupt coalescing\r\n")); 810 break; 811 case NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION: 812 DPRINTF((" interrupt vector configuration\r\n")); 813 break; 814 case NVME_FEAT_WRITE_ATOMICITY: 815 DPRINTF((" write atomicity\r\n")); 816 break; 817 case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: 818 DPRINTF((" async event configuration\r\n")); 819 sc->async_ev_config = command->cdw11; 820 break; 821 case NVME_FEAT_SOFTWARE_PROGRESS_MARKER: 822 DPRINTF((" software progress marker\r\n")); 823 break; 824 case 0x0C: 825 DPRINTF((" autonomous power state transition\r\n")); 826 break; 827 default: 828 WPRINTF(("%s invalid feature 0x%x\r\n", __func__, feature)); 829 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); 830 return (1); 831 } 832 833 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); 834 return (1); 835 } 836 837 static int 838 nvme_opc_abort(struct pci_nvme_softc* sc, struct nvme_command* command, 839 struct nvme_completion* compl) 840 { 841 DPRINTF(("%s submission queue %u, command ID 0x%x\r\n", __func__, 842 command->cdw10 & 0xFFFF, (command->cdw10 >> 16) & 0xFFFF)); 843 844 /* TODO: search for the command ID and abort it */ 845 846 compl->cdw0 = 1; 847 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); 848 return (1); 849 } 850 851 static int 852 nvme_opc_async_event_req(struct pci_nvme_softc* sc, 853 struct nvme_command* command, struct nvme_completion* compl) 854 { 855 DPRINTF(("%s async event request 0x%x\r\n", __func__, command->cdw11)); 856 857 /* 858 * TODO: raise events when they happen based on the Set Features cmd. 859 * These events happen async, so only set completion successful if 860 * there is an event reflective of the request to get event. 861 */ 862 pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, 863 NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED); 864 return (0); 865 } 866 867 static void 868 pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, uint64_t value) 869 { 870 struct nvme_completion compl; 871 struct nvme_command *cmd; 872 struct nvme_submission_queue *sq; 873 struct nvme_completion_queue *cq; 874 int do_intr = 0; 875 uint16_t sqhead; 876 877 DPRINTF(("%s index %u\r\n", __func__, (uint32_t)value)); 878 879 sq = &sc->submit_queues[0]; 880 881 sqhead = atomic_load_acq_short(&sq->head); 882 883 if (atomic_testandset_int(&sq->busy, 1)) { 884 DPRINTF(("%s SQ busy, head %u, tail %u\r\n", 885 __func__, sqhead, sq->tail)); 886 return; 887 } 888 889 DPRINTF(("sqhead %u, tail %u\r\n", sqhead, sq->tail)); 890 891 while (sqhead != atomic_load_acq_short(&sq->tail)) { 892 cmd = &(sq->qbase)[sqhead]; 893 compl.status = 0; 894 895 switch (NVME_CMD_GET_OPC(cmd->opc_fuse)) { 896 case NVME_OPC_DELETE_IO_SQ: 897 DPRINTF(("%s command DELETE_IO_SQ\r\n", __func__)); 898 do_intr |= nvme_opc_delete_io_sq(sc, cmd, &compl); 899 break; 900 case NVME_OPC_CREATE_IO_SQ: 901 DPRINTF(("%s command CREATE_IO_SQ\r\n", __func__)); 902 do_intr |= nvme_opc_create_io_sq(sc, cmd, &compl); 903 break; 904 case NVME_OPC_DELETE_IO_CQ: 905 DPRINTF(("%s command DELETE_IO_CQ\r\n", __func__)); 906 do_intr |= nvme_opc_delete_io_cq(sc, cmd, &compl); 907 break; 908 case NVME_OPC_CREATE_IO_CQ: 909 DPRINTF(("%s command CREATE_IO_CQ\r\n", __func__)); 910 do_intr |= nvme_opc_create_io_cq(sc, cmd, &compl); 911 break; 912 case NVME_OPC_GET_LOG_PAGE: 913 DPRINTF(("%s command GET_LOG_PAGE\r\n", __func__)); 914 do_intr |= nvme_opc_get_log_page(sc, cmd, &compl); 915 break; 916 case NVME_OPC_IDENTIFY: 917 DPRINTF(("%s command IDENTIFY\r\n", __func__)); 918 do_intr |= nvme_opc_identify(sc, cmd, &compl); 919 break; 920 case NVME_OPC_ABORT: 921 DPRINTF(("%s command ABORT\r\n", __func__)); 922 do_intr |= nvme_opc_abort(sc, cmd, &compl); 923 break; 924 case NVME_OPC_SET_FEATURES: 925 DPRINTF(("%s command SET_FEATURES\r\n", __func__)); 926 do_intr |= nvme_opc_set_features(sc, cmd, &compl); 927 break; 928 case NVME_OPC_GET_FEATURES: 929 DPRINTF(("%s command GET_FEATURES\r\n", __func__)); 930 do_intr |= nvme_opc_get_features(sc, cmd, &compl); 931 break; 932 case NVME_OPC_ASYNC_EVENT_REQUEST: 933 DPRINTF(("%s command ASYNC_EVENT_REQ\r\n", __func__)); 934 /* XXX dont care, unhandled for now 935 do_intr |= nvme_opc_async_event_req(sc, cmd, &compl); 936 */ 937 break; 938 default: 939 WPRINTF(("0x%x command is not implemented\r\n", 940 NVME_CMD_GET_OPC(cmd->opc_fuse))); 941 } 942 943 /* for now skip async event generation */ 944 if (NVME_CMD_GET_OPC(cmd->opc_fuse) != 945 NVME_OPC_ASYNC_EVENT_REQUEST) { 946 struct nvme_completion *cp; 947 int phase; 948 949 cq = &sc->compl_queues[0]; 950 951 cp = &(cq->qbase)[cq->tail]; 952 cp->sqid = 0; 953 cp->sqhd = sqhead; 954 cp->cid = cmd->cid; 955 956 phase = NVME_STATUS_GET_P(cp->status); 957 cp->status = compl.status; 958 pci_nvme_toggle_phase(&cp->status, phase); 959 960 cq->tail = (cq->tail + 1) % cq->size; 961 } 962 sqhead = (sqhead + 1) % sq->size; 963 } 964 965 DPRINTF(("setting sqhead %u\r\n", sqhead)); 966 atomic_store_short(&sq->head, sqhead); 967 atomic_store_int(&sq->busy, 0); 968 969 if (do_intr) 970 pci_generate_msix(sc->nsc_pi, 0); 971 972 } 973 974 static int 975 pci_nvme_append_iov_req(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req, 976 uint64_t gpaddr, size_t size, int do_write, uint64_t lba) 977 { 978 int iovidx; 979 980 if (req != NULL) { 981 /* concatenate contig block-iovs to minimize number of iovs */ 982 if ((req->prev_gpaddr + req->prev_size) == gpaddr) { 983 iovidx = req->io_req.br_iovcnt - 1; 984 985 req->io_req.br_iov[iovidx].iov_base = 986 paddr_guest2host(req->sc->nsc_pi->pi_vmctx, 987 req->prev_gpaddr, size); 988 989 req->prev_size += size; 990 req->io_req.br_resid += size; 991 992 req->io_req.br_iov[iovidx].iov_len = req->prev_size; 993 } else { 994 pthread_mutex_lock(&req->mtx); 995 996 iovidx = req->io_req.br_iovcnt; 997 if (iovidx == NVME_MAX_BLOCKIOVS) { 998 int err = 0; 999 1000 DPRINTF(("large I/O, doing partial req\r\n")); 1001 1002 iovidx = 0; 1003 req->io_req.br_iovcnt = 0; 1004 1005 req->io_req.br_callback = pci_nvme_io_partial; 1006 1007 if (!do_write) 1008 err = blockif_read(sc->nvstore.ctx, 1009 &req->io_req); 1010 else 1011 err = blockif_write(sc->nvstore.ctx, 1012 &req->io_req); 1013 1014 /* wait until req completes before cont */ 1015 if (err == 0) 1016 pthread_cond_wait(&req->cv, &req->mtx); 1017 } 1018 if (iovidx == 0) { 1019 req->io_req.br_offset = lba; 1020 req->io_req.br_resid = 0; 1021 req->io_req.br_param = req; 1022 } 1023 1024 req->io_req.br_iov[iovidx].iov_base = 1025 paddr_guest2host(req->sc->nsc_pi->pi_vmctx, 1026 gpaddr, size); 1027 1028 req->io_req.br_iov[iovidx].iov_len = size; 1029 1030 req->prev_gpaddr = gpaddr; 1031 req->prev_size = size; 1032 req->io_req.br_resid += size; 1033 1034 req->io_req.br_iovcnt++; 1035 1036 pthread_mutex_unlock(&req->mtx); 1037 } 1038 } else { 1039 /* RAM buffer: read/write directly */ 1040 void *p = sc->nvstore.ctx; 1041 void *gptr; 1042 1043 if ((lba + size) > sc->nvstore.size) { 1044 WPRINTF(("%s write would overflow RAM\r\n", __func__)); 1045 return (-1); 1046 } 1047 1048 p = (void *)((uintptr_t)p + (uintptr_t)lba); 1049 gptr = paddr_guest2host(sc->nsc_pi->pi_vmctx, gpaddr, size); 1050 if (do_write) 1051 memcpy(p, gptr, size); 1052 else 1053 memcpy(gptr, p, size); 1054 } 1055 return (0); 1056 } 1057 1058 static void 1059 pci_nvme_set_completion(struct pci_nvme_softc *sc, 1060 struct nvme_submission_queue *sq, int sqid, uint16_t cid, 1061 uint32_t cdw0, uint16_t status, int ignore_busy) 1062 { 1063 struct nvme_completion_queue *cq = &sc->compl_queues[sq->cqid]; 1064 struct nvme_completion *compl; 1065 int do_intr = 0; 1066 int phase; 1067 1068 DPRINTF(("%s sqid %d cqid %u cid %u status: 0x%x 0x%x\r\n", 1069 __func__, sqid, sq->cqid, cid, NVME_STATUS_GET_SCT(status), 1070 NVME_STATUS_GET_SC(status))); 1071 1072 pthread_mutex_lock(&cq->mtx); 1073 1074 assert(cq->qbase != NULL); 1075 1076 compl = &cq->qbase[cq->tail]; 1077 1078 compl->sqhd = atomic_load_acq_short(&sq->head); 1079 compl->sqid = sqid; 1080 compl->cid = cid; 1081 1082 // toggle phase 1083 phase = NVME_STATUS_GET_P(compl->status); 1084 compl->status = status; 1085 pci_nvme_toggle_phase(&compl->status, phase); 1086 1087 cq->tail = (cq->tail + 1) % cq->size; 1088 1089 if (cq->intr_en & NVME_CQ_INTEN) 1090 do_intr = 1; 1091 1092 pthread_mutex_unlock(&cq->mtx); 1093 1094 if (ignore_busy || !atomic_load_acq_int(&sq->busy)) 1095 if (do_intr) 1096 pci_generate_msix(sc->nsc_pi, cq->intr_vec); 1097 } 1098 1099 static void 1100 pci_nvme_release_ioreq(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req) 1101 { 1102 req->sc = NULL; 1103 req->nvme_sq = NULL; 1104 req->sqid = 0; 1105 1106 pthread_mutex_lock(&sc->mtx); 1107 1108 req->next = sc->ioreqs_free; 1109 sc->ioreqs_free = req; 1110 sc->pending_ios--; 1111 1112 /* when no more IO pending, can set to ready if device reset/enabled */ 1113 if (sc->pending_ios == 0 && 1114 NVME_CC_GET_EN(sc->regs.cc) && !(NVME_CSTS_GET_RDY(sc->regs.csts))) 1115 sc->regs.csts |= NVME_CSTS_RDY; 1116 1117 pthread_mutex_unlock(&sc->mtx); 1118 1119 sem_post(&sc->iosemlock); 1120 } 1121 1122 static struct pci_nvme_ioreq * 1123 pci_nvme_get_ioreq(struct pci_nvme_softc *sc) 1124 { 1125 struct pci_nvme_ioreq *req = NULL;; 1126 1127 sem_wait(&sc->iosemlock); 1128 pthread_mutex_lock(&sc->mtx); 1129 1130 req = sc->ioreqs_free; 1131 assert(req != NULL); 1132 1133 sc->ioreqs_free = req->next; 1134 1135 req->next = NULL; 1136 req->sc = sc; 1137 1138 sc->pending_ios++; 1139 1140 pthread_mutex_unlock(&sc->mtx); 1141 1142 req->io_req.br_iovcnt = 0; 1143 req->io_req.br_offset = 0; 1144 req->io_req.br_resid = 0; 1145 req->io_req.br_param = req; 1146 req->prev_gpaddr = 0; 1147 req->prev_size = 0; 1148 1149 return req; 1150 } 1151 1152 static void 1153 pci_nvme_io_done(struct blockif_req *br, int err) 1154 { 1155 struct pci_nvme_ioreq *req = br->br_param; 1156 struct nvme_submission_queue *sq = req->nvme_sq; 1157 uint16_t code, status; 1158 1159 DPRINTF(("%s error %d %s\r\n", __func__, err, strerror(err))); 1160 1161 /* TODO return correct error */ 1162 code = err ? NVME_SC_DATA_TRANSFER_ERROR : NVME_SC_SUCCESS; 1163 pci_nvme_status_genc(&status, code); 1164 1165 pci_nvme_set_completion(req->sc, sq, req->sqid, req->cid, 0, status, 0); 1166 pci_nvme_release_ioreq(req->sc, req); 1167 } 1168 1169 static void 1170 pci_nvme_io_partial(struct blockif_req *br, int err) 1171 { 1172 struct pci_nvme_ioreq *req = br->br_param; 1173 1174 DPRINTF(("%s error %d %s\r\n", __func__, err, strerror(err))); 1175 1176 pthread_cond_signal(&req->cv); 1177 } 1178 1179 1180 static void 1181 pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx) 1182 { 1183 struct nvme_submission_queue *sq; 1184 uint16_t status; 1185 uint16_t sqhead; 1186 int err; 1187 1188 /* handle all submissions up to sq->tail index */ 1189 sq = &sc->submit_queues[idx]; 1190 1191 if (atomic_testandset_int(&sq->busy, 1)) { 1192 DPRINTF(("%s sqid %u busy\r\n", __func__, idx)); 1193 return; 1194 } 1195 1196 sqhead = atomic_load_acq_short(&sq->head); 1197 1198 DPRINTF(("nvme_handle_io qid %u head %u tail %u cmdlist %p\r\n", 1199 idx, sqhead, sq->tail, sq->qbase)); 1200 1201 while (sqhead != atomic_load_acq_short(&sq->tail)) { 1202 struct nvme_command *cmd; 1203 struct pci_nvme_ioreq *req = NULL; 1204 uint64_t lba; 1205 uint64_t nblocks, bytes, size, cpsz; 1206 1207 /* TODO: support scatter gather list handling */ 1208 1209 cmd = &sq->qbase[sqhead]; 1210 sqhead = (sqhead + 1) % sq->size; 1211 1212 lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10; 1213 1214 if (NVME_CMD_GET_OPC(cmd->opc_fuse) == NVME_OPC_FLUSH) { 1215 pci_nvme_status_genc(&status, NVME_SC_SUCCESS); 1216 pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, 1217 status, 1); 1218 1219 continue; 1220 } else if (NVME_CMD_GET_OPC(cmd->opc_fuse) == 0x08) { 1221 /* TODO: write zeroes */ 1222 WPRINTF(("%s write zeroes lba 0x%lx blocks %u\r\n", 1223 __func__, lba, cmd->cdw12 & 0xFFFF)); 1224 pci_nvme_status_genc(&status, NVME_SC_SUCCESS); 1225 pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, 1226 status, 1); 1227 1228 continue; 1229 } 1230 1231 nblocks = (cmd->cdw12 & 0xFFFF) + 1; 1232 1233 bytes = nblocks * sc->nvstore.sectsz; 1234 1235 if (sc->nvstore.type == NVME_STOR_BLOCKIF) { 1236 req = pci_nvme_get_ioreq(sc); 1237 req->nvme_sq = sq; 1238 req->sqid = idx; 1239 } 1240 1241 /* 1242 * If data starts mid-page and flows into the next page, then 1243 * increase page count 1244 */ 1245 1246 DPRINTF(("[h%u:t%u:n%u] %s starting LBA 0x%lx blocks %lu " 1247 "(%lu-bytes)\r\n", 1248 sqhead==0 ? sq->size-1 : sqhead-1, sq->tail, sq->size, 1249 NVME_CMD_GET_OPC(cmd->opc_fuse) == NVME_OPC_WRITE ? 1250 "WRITE" : "READ", 1251 lba, nblocks, bytes)); 1252 1253 cmd->prp1 &= ~(0x03UL); 1254 cmd->prp2 &= ~(0x03UL); 1255 1256 DPRINTF((" prp1 0x%lx prp2 0x%lx\r\n", cmd->prp1, cmd->prp2)); 1257 1258 size = bytes; 1259 lba *= sc->nvstore.sectsz; 1260 1261 cpsz = PAGE_SIZE - (cmd->prp1 % PAGE_SIZE); 1262 1263 if (cpsz > bytes) 1264 cpsz = bytes; 1265 1266 if (req != NULL) { 1267 req->io_req.br_offset = ((uint64_t)cmd->cdw11 << 32) | 1268 cmd->cdw10; 1269 req->opc = NVME_CMD_GET_OPC(cmd->opc_fuse); 1270 req->cid = cmd->cid; 1271 req->nsid = cmd->nsid; 1272 } 1273 1274 err = pci_nvme_append_iov_req(sc, req, cmd->prp1, cpsz, 1275 NVME_CMD_GET_OPC(cmd->opc_fuse) == NVME_OPC_WRITE, lba); 1276 lba += cpsz; 1277 size -= cpsz; 1278 1279 if (size == 0) 1280 goto iodone; 1281 1282 if (size <= PAGE_SIZE) { 1283 /* prp2 is second (and final) page in transfer */ 1284 1285 err = pci_nvme_append_iov_req(sc, req, cmd->prp2, 1286 size, 1287 NVME_CMD_GET_OPC(cmd->opc_fuse) == NVME_OPC_WRITE, 1288 lba); 1289 } else { 1290 uint64_t *prp_list; 1291 int i; 1292 1293 /* prp2 is pointer to a physical region page list */ 1294 prp_list = paddr_guest2host(sc->nsc_pi->pi_vmctx, 1295 cmd->prp2, PAGE_SIZE); 1296 1297 i = 0; 1298 while (size != 0) { 1299 cpsz = MIN(size, PAGE_SIZE); 1300 1301 /* 1302 * Move to linked physical region page list 1303 * in last item. 1304 */ 1305 if (i == (NVME_PRP2_ITEMS-1) && 1306 size > PAGE_SIZE) { 1307 assert((prp_list[i] & (PAGE_SIZE-1)) == 0); 1308 prp_list = paddr_guest2host( 1309 sc->nsc_pi->pi_vmctx, 1310 prp_list[i], PAGE_SIZE); 1311 i = 0; 1312 } 1313 if (prp_list[i] == 0) { 1314 WPRINTF(("PRP2[%d] = 0 !!!\r\n", i)); 1315 err = 1; 1316 break; 1317 } 1318 1319 err = pci_nvme_append_iov_req(sc, req, 1320 prp_list[i], cpsz, 1321 NVME_CMD_GET_OPC(cmd->opc_fuse) == 1322 NVME_OPC_WRITE, lba); 1323 if (err) 1324 break; 1325 1326 lba += cpsz; 1327 size -= cpsz; 1328 i++; 1329 } 1330 } 1331 1332 iodone: 1333 if (sc->nvstore.type == NVME_STOR_RAM) { 1334 uint16_t code, status; 1335 1336 code = err ? NVME_SC_LBA_OUT_OF_RANGE : 1337 NVME_SC_SUCCESS; 1338 pci_nvme_status_genc(&status, code); 1339 1340 pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, 1341 status, 1); 1342 1343 continue; 1344 } 1345 1346 1347 if (err) 1348 goto do_error; 1349 1350 req->io_req.br_callback = pci_nvme_io_done; 1351 1352 err = 0; 1353 switch (NVME_CMD_GET_OPC(cmd->opc_fuse)) { 1354 case NVME_OPC_READ: 1355 err = blockif_read(sc->nvstore.ctx, &req->io_req); 1356 break; 1357 case NVME_OPC_WRITE: 1358 err = blockif_write(sc->nvstore.ctx, &req->io_req); 1359 break; 1360 default: 1361 WPRINTF(("%s unhandled io command 0x%x\r\n", 1362 __func__, NVME_CMD_GET_OPC(cmd->opc_fuse))); 1363 err = 1; 1364 } 1365 1366 do_error: 1367 if (err) { 1368 uint16_t status; 1369 1370 pci_nvme_status_genc(&status, 1371 NVME_SC_DATA_TRANSFER_ERROR); 1372 1373 pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, 1374 status, 1); 1375 pci_nvme_release_ioreq(sc, req); 1376 } 1377 } 1378 1379 atomic_store_short(&sq->head, sqhead); 1380 atomic_store_int(&sq->busy, 0); 1381 } 1382 1383 static void 1384 pci_nvme_handle_doorbell(struct vmctx *ctx, struct pci_nvme_softc* sc, 1385 uint64_t idx, int is_sq, uint64_t value) 1386 { 1387 DPRINTF(("nvme doorbell %lu, %s, val 0x%lx\r\n", 1388 idx, is_sq ? "SQ" : "CQ", value & 0xFFFF)); 1389 1390 if (is_sq) { 1391 atomic_store_short(&sc->submit_queues[idx].tail, 1392 (uint16_t)value); 1393 1394 if (idx == 0) { 1395 pci_nvme_handle_admin_cmd(sc, value); 1396 } else { 1397 /* submission queue; handle new entries in SQ */ 1398 if (idx > sc->num_squeues) { 1399 WPRINTF(("%s SQ index %lu overflow from " 1400 "guest (max %u)\r\n", 1401 __func__, idx, sc->num_squeues)); 1402 return; 1403 } 1404 pci_nvme_handle_io_cmd(sc, (uint16_t)idx); 1405 } 1406 } else { 1407 if (idx > sc->num_cqueues) { 1408 WPRINTF(("%s queue index %lu overflow from " 1409 "guest (max %u)\r\n", 1410 __func__, idx, sc->num_cqueues)); 1411 return; 1412 } 1413 1414 sc->compl_queues[idx].head = (uint16_t)value; 1415 } 1416 } 1417 1418 static void 1419 pci_nvme_bar0_reg_dumps(const char *func, uint64_t offset, int iswrite) 1420 { 1421 const char *s = iswrite ? "WRITE" : "READ"; 1422 1423 switch (offset) { 1424 case NVME_CR_CAP_LOW: 1425 DPRINTF(("%s %s NVME_CR_CAP_LOW\r\n", func, s)); 1426 break; 1427 case NVME_CR_CAP_HI: 1428 DPRINTF(("%s %s NVME_CR_CAP_HI\r\n", func, s)); 1429 break; 1430 case NVME_CR_VS: 1431 DPRINTF(("%s %s NVME_CR_VS\r\n", func, s)); 1432 break; 1433 case NVME_CR_INTMS: 1434 DPRINTF(("%s %s NVME_CR_INTMS\r\n", func, s)); 1435 break; 1436 case NVME_CR_INTMC: 1437 DPRINTF(("%s %s NVME_CR_INTMC\r\n", func, s)); 1438 break; 1439 case NVME_CR_CC: 1440 DPRINTF(("%s %s NVME_CR_CC\r\n", func, s)); 1441 break; 1442 case NVME_CR_CSTS: 1443 DPRINTF(("%s %s NVME_CR_CSTS\r\n", func, s)); 1444 break; 1445 case NVME_CR_NSSR: 1446 DPRINTF(("%s %s NVME_CR_NSSR\r\n", func, s)); 1447 break; 1448 case NVME_CR_AQA: 1449 DPRINTF(("%s %s NVME_CR_AQA\r\n", func, s)); 1450 break; 1451 case NVME_CR_ASQ_LOW: 1452 DPRINTF(("%s %s NVME_CR_ASQ_LOW\r\n", func, s)); 1453 break; 1454 case NVME_CR_ASQ_HI: 1455 DPRINTF(("%s %s NVME_CR_ASQ_HI\r\n", func, s)); 1456 break; 1457 case NVME_CR_ACQ_LOW: 1458 DPRINTF(("%s %s NVME_CR_ACQ_LOW\r\n", func, s)); 1459 break; 1460 case NVME_CR_ACQ_HI: 1461 DPRINTF(("%s %s NVME_CR_ACQ_HI\r\n", func, s)); 1462 break; 1463 default: 1464 DPRINTF(("unknown nvme bar-0 offset 0x%lx\r\n", offset)); 1465 } 1466 1467 } 1468 1469 static void 1470 pci_nvme_write_bar_0(struct vmctx *ctx, struct pci_nvme_softc* sc, 1471 uint64_t offset, int size, uint64_t value) 1472 { 1473 uint32_t ccreg; 1474 1475 if (offset >= NVME_DOORBELL_OFFSET) { 1476 uint64_t belloffset = offset - NVME_DOORBELL_OFFSET; 1477 uint64_t idx = belloffset / 8; /* door bell size = 2*int */ 1478 int is_sq = (belloffset % 8) < 4; 1479 1480 if (belloffset > ((sc->max_queues+1) * 8 - 4)) { 1481 WPRINTF(("guest attempted an overflow write offset " 1482 "0x%lx, val 0x%lx in %s", 1483 offset, value, __func__)); 1484 return; 1485 } 1486 1487 pci_nvme_handle_doorbell(ctx, sc, idx, is_sq, value); 1488 return; 1489 } 1490 1491 DPRINTF(("nvme-write offset 0x%lx, size %d, value 0x%lx\r\n", 1492 offset, size, value)); 1493 1494 if (size != 4) { 1495 WPRINTF(("guest wrote invalid size %d (offset 0x%lx, " 1496 "val 0x%lx) to bar0 in %s", 1497 size, offset, value, __func__)); 1498 /* TODO: shutdown device */ 1499 return; 1500 } 1501 1502 pci_nvme_bar0_reg_dumps(__func__, offset, 1); 1503 1504 pthread_mutex_lock(&sc->mtx); 1505 1506 switch (offset) { 1507 case NVME_CR_CAP_LOW: 1508 case NVME_CR_CAP_HI: 1509 /* readonly */ 1510 break; 1511 case NVME_CR_VS: 1512 /* readonly */ 1513 break; 1514 case NVME_CR_INTMS: 1515 /* MSI-X, so ignore */ 1516 break; 1517 case NVME_CR_INTMC: 1518 /* MSI-X, so ignore */ 1519 break; 1520 case NVME_CR_CC: 1521 ccreg = (uint32_t)value; 1522 1523 DPRINTF(("%s NVME_CR_CC en %x css %x shn %x iosqes %u " 1524 "iocqes %u\r\n", 1525 __func__, 1526 NVME_CC_GET_EN(ccreg), NVME_CC_GET_CSS(ccreg), 1527 NVME_CC_GET_SHN(ccreg), NVME_CC_GET_IOSQES(ccreg), 1528 NVME_CC_GET_IOCQES(ccreg))); 1529 1530 if (NVME_CC_GET_SHN(ccreg)) { 1531 /* perform shutdown - flush out data to backend */ 1532 sc->regs.csts &= ~(NVME_CSTS_REG_SHST_MASK << 1533 NVME_CSTS_REG_SHST_SHIFT); 1534 sc->regs.csts |= NVME_SHST_COMPLETE << 1535 NVME_CSTS_REG_SHST_SHIFT; 1536 } 1537 if (NVME_CC_GET_EN(ccreg) != NVME_CC_GET_EN(sc->regs.cc)) { 1538 if (NVME_CC_GET_EN(ccreg) == 0) 1539 /* transition 1-> causes controller reset */ 1540 pci_nvme_reset(sc); 1541 else 1542 pci_nvme_init_controller(ctx, sc); 1543 } 1544 1545 /* Insert the iocqes, iosqes and en bits from the write */ 1546 sc->regs.cc &= ~NVME_CC_WRITE_MASK; 1547 sc->regs.cc |= ccreg & NVME_CC_WRITE_MASK; 1548 if (NVME_CC_GET_EN(ccreg) == 0) { 1549 /* Insert the ams, mps and css bit fields */ 1550 sc->regs.cc &= ~NVME_CC_NEN_WRITE_MASK; 1551 sc->regs.cc |= ccreg & NVME_CC_NEN_WRITE_MASK; 1552 sc->regs.csts &= ~NVME_CSTS_RDY; 1553 } else if (sc->pending_ios == 0) { 1554 sc->regs.csts |= NVME_CSTS_RDY; 1555 } 1556 break; 1557 case NVME_CR_CSTS: 1558 break; 1559 case NVME_CR_NSSR: 1560 /* ignore writes; don't support subsystem reset */ 1561 break; 1562 case NVME_CR_AQA: 1563 sc->regs.aqa = (uint32_t)value; 1564 break; 1565 case NVME_CR_ASQ_LOW: 1566 sc->regs.asq = (sc->regs.asq & (0xFFFFFFFF00000000)) | 1567 (0xFFFFF000 & value); 1568 break; 1569 case NVME_CR_ASQ_HI: 1570 sc->regs.asq = (sc->regs.asq & (0x00000000FFFFFFFF)) | 1571 (value << 32); 1572 break; 1573 case NVME_CR_ACQ_LOW: 1574 sc->regs.acq = (sc->regs.acq & (0xFFFFFFFF00000000)) | 1575 (0xFFFFF000 & value); 1576 break; 1577 case NVME_CR_ACQ_HI: 1578 sc->regs.acq = (sc->regs.acq & (0x00000000FFFFFFFF)) | 1579 (value << 32); 1580 break; 1581 default: 1582 DPRINTF(("%s unknown offset 0x%lx, value 0x%lx size %d\r\n", 1583 __func__, offset, value, size)); 1584 } 1585 pthread_mutex_unlock(&sc->mtx); 1586 } 1587 1588 static void 1589 pci_nvme_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 1590 int baridx, uint64_t offset, int size, uint64_t value) 1591 { 1592 struct pci_nvme_softc* sc = pi->pi_arg; 1593 1594 if (baridx == pci_msix_table_bar(pi) || 1595 baridx == pci_msix_pba_bar(pi)) { 1596 DPRINTF(("nvme-write baridx %d, msix: off 0x%lx, size %d, " 1597 " value 0x%lx\r\n", baridx, offset, size, value)); 1598 1599 pci_emul_msix_twrite(pi, offset, size, value); 1600 return; 1601 } 1602 1603 switch (baridx) { 1604 case 0: 1605 pci_nvme_write_bar_0(ctx, sc, offset, size, value); 1606 break; 1607 1608 default: 1609 DPRINTF(("%s unknown baridx %d, val 0x%lx\r\n", 1610 __func__, baridx, value)); 1611 } 1612 } 1613 1614 static uint64_t pci_nvme_read_bar_0(struct pci_nvme_softc* sc, 1615 uint64_t offset, int size) 1616 { 1617 uint64_t value; 1618 1619 pci_nvme_bar0_reg_dumps(__func__, offset, 0); 1620 1621 if (offset < NVME_DOORBELL_OFFSET) { 1622 void *p = &(sc->regs); 1623 pthread_mutex_lock(&sc->mtx); 1624 memcpy(&value, (void *)((uintptr_t)p + offset), size); 1625 pthread_mutex_unlock(&sc->mtx); 1626 } else { 1627 value = 0; 1628 WPRINTF(("pci_nvme: read invalid offset %ld\r\n", offset)); 1629 } 1630 1631 switch (size) { 1632 case 1: 1633 value &= 0xFF; 1634 break; 1635 case 2: 1636 value &= 0xFFFF; 1637 break; 1638 case 4: 1639 value &= 0xFFFFFFFF; 1640 break; 1641 } 1642 1643 DPRINTF((" nvme-read offset 0x%lx, size %d -> value 0x%x\r\n", 1644 offset, size, (uint32_t)value)); 1645 1646 return (value); 1647 } 1648 1649 1650 1651 static uint64_t 1652 pci_nvme_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 1653 uint64_t offset, int size) 1654 { 1655 struct pci_nvme_softc* sc = pi->pi_arg; 1656 1657 if (baridx == pci_msix_table_bar(pi) || 1658 baridx == pci_msix_pba_bar(pi)) { 1659 DPRINTF(("nvme-read bar: %d, msix: regoff 0x%lx, size %d\r\n", 1660 baridx, offset, size)); 1661 1662 return pci_emul_msix_tread(pi, offset, size); 1663 } 1664 1665 switch (baridx) { 1666 case 0: 1667 return pci_nvme_read_bar_0(sc, offset, size); 1668 1669 default: 1670 DPRINTF(("unknown bar %d, 0x%lx\r\n", baridx, offset)); 1671 } 1672 1673 return (0); 1674 } 1675 1676 1677 static int 1678 pci_nvme_parse_opts(struct pci_nvme_softc *sc, char *opts) 1679 { 1680 char bident[sizeof("XX:X:X")]; 1681 char *uopt, *xopts, *config; 1682 uint32_t sectsz; 1683 int optidx; 1684 1685 sc->max_queues = NVME_QUEUES; 1686 sc->max_qentries = NVME_MAX_QENTRIES; 1687 sc->ioslots = NVME_IOSLOTS; 1688 sc->num_squeues = sc->max_queues; 1689 sc->num_cqueues = sc->max_queues; 1690 sectsz = 0; 1691 1692 uopt = strdup(opts); 1693 optidx = 0; 1694 snprintf(sc->ctrldata.sn, sizeof(sc->ctrldata.sn), 1695 "NVME-%d-%d", sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func); 1696 for (xopts = strtok(uopt, ","); 1697 xopts != NULL; 1698 xopts = strtok(NULL, ",")) { 1699 1700 if ((config = strchr(xopts, '=')) != NULL) 1701 *config++ = '\0'; 1702 1703 if (!strcmp("maxq", xopts)) { 1704 sc->max_queues = atoi(config); 1705 } else if (!strcmp("qsz", xopts)) { 1706 sc->max_qentries = atoi(config); 1707 } else if (!strcmp("ioslots", xopts)) { 1708 sc->ioslots = atoi(config); 1709 } else if (!strcmp("sectsz", xopts)) { 1710 sectsz = atoi(config); 1711 } else if (!strcmp("ser", xopts)) { 1712 /* 1713 * This field indicates the Product Serial Number in 1714 * 8-bit ASCII, unused bytes should be NULL characters. 1715 * Ref: NVM Express Management Interface 1.0a. 1716 */ 1717 memset(sc->ctrldata.sn, 0, sizeof(sc->ctrldata.sn)); 1718 strncpy(sc->ctrldata.sn, config, 1719 sizeof(sc->ctrldata.sn)); 1720 } else if (!strcmp("ram", xopts)) { 1721 uint64_t sz = strtoull(&xopts[4], NULL, 10); 1722 1723 sc->nvstore.type = NVME_STOR_RAM; 1724 sc->nvstore.size = sz * 1024 * 1024; 1725 sc->nvstore.ctx = calloc(1, sc->nvstore.size); 1726 sc->nvstore.sectsz = 4096; 1727 sc->nvstore.sectsz_bits = 12; 1728 if (sc->nvstore.ctx == NULL) { 1729 perror("Unable to allocate RAM"); 1730 return (-1); 1731 } 1732 } else if (optidx == 0) { 1733 snprintf(bident, sizeof(bident), "%d:%d", 1734 sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func); 1735 sc->nvstore.ctx = blockif_open(xopts, bident); 1736 if (sc->nvstore.ctx == NULL) { 1737 perror("Could not open backing file"); 1738 return (-1); 1739 } 1740 sc->nvstore.type = NVME_STOR_BLOCKIF; 1741 sc->nvstore.size = blockif_size(sc->nvstore.ctx); 1742 } else { 1743 fprintf(stderr, "Invalid option %s\n", xopts); 1744 return (-1); 1745 } 1746 1747 optidx++; 1748 } 1749 free(uopt); 1750 1751 if (sc->nvstore.ctx == NULL || sc->nvstore.size == 0) { 1752 fprintf(stderr, "backing store not specified\n"); 1753 return (-1); 1754 } 1755 if (sectsz == 512 || sectsz == 4096 || sectsz == 8192) 1756 sc->nvstore.sectsz = sectsz; 1757 else if (sc->nvstore.type != NVME_STOR_RAM) 1758 sc->nvstore.sectsz = blockif_sectsz(sc->nvstore.ctx); 1759 for (sc->nvstore.sectsz_bits = 9; 1760 (1 << sc->nvstore.sectsz_bits) < sc->nvstore.sectsz; 1761 sc->nvstore.sectsz_bits++); 1762 1763 if (sc->max_queues == 0) { 1764 fprintf(stderr, "Invalid maxq option\n"); 1765 return (-1); 1766 } 1767 if (sc->max_qentries <= 0) { 1768 fprintf(stderr, "Invalid qsz option\n"); 1769 return (-1); 1770 } 1771 if (sc->ioslots <= 0) { 1772 fprintf(stderr, "Invalid ioslots option\n"); 1773 return (-1); 1774 } 1775 1776 return (0); 1777 } 1778 1779 static int 1780 pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 1781 { 1782 struct pci_nvme_softc *sc; 1783 uint32_t pci_membar_sz; 1784 int error; 1785 1786 error = 0; 1787 1788 sc = calloc(1, sizeof(struct pci_nvme_softc)); 1789 pi->pi_arg = sc; 1790 sc->nsc_pi = pi; 1791 1792 error = pci_nvme_parse_opts(sc, opts); 1793 if (error < 0) 1794 goto done; 1795 else 1796 error = 0; 1797 1798 sc->ioreqs = calloc(sc->ioslots, sizeof(struct pci_nvme_ioreq)); 1799 for (int i = 0; i < sc->ioslots; i++) { 1800 if (i < (sc->ioslots-1)) 1801 sc->ioreqs[i].next = &sc->ioreqs[i+1]; 1802 pthread_mutex_init(&sc->ioreqs[i].mtx, NULL); 1803 pthread_cond_init(&sc->ioreqs[i].cv, NULL); 1804 } 1805 sc->ioreqs_free = sc->ioreqs; 1806 sc->intr_coales_aggr_thresh = 1; 1807 1808 pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0A0A); 1809 pci_set_cfgdata16(pi, PCIR_VENDOR, 0xFB5D); 1810 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); 1811 pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_NVM); 1812 pci_set_cfgdata8(pi, PCIR_PROGIF, 1813 PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0); 1814 1815 /* allocate size of nvme registers + doorbell space for all queues */ 1816 pci_membar_sz = sizeof(struct nvme_registers) + 1817 2*sizeof(uint32_t)*(sc->max_queues); 1818 1819 DPRINTF(("nvme membar size: %u\r\n", pci_membar_sz)); 1820 1821 error = pci_emul_alloc_bar(pi, 0, PCIBAR_MEM64, pci_membar_sz); 1822 if (error) { 1823 WPRINTF(("%s pci alloc mem bar failed\r\n", __func__)); 1824 goto done; 1825 } 1826 1827 error = pci_emul_add_msixcap(pi, sc->max_queues, NVME_MSIX_BAR); 1828 if (error) { 1829 WPRINTF(("%s pci add msixcap failed\r\n", __func__)); 1830 goto done; 1831 } 1832 1833 pthread_mutex_init(&sc->mtx, NULL); 1834 sem_init(&sc->iosemlock, 0, sc->ioslots); 1835 1836 pci_nvme_reset(sc); 1837 pci_nvme_init_ctrldata(sc); 1838 pci_nvme_init_nsdata(sc); 1839 1840 pci_lintr_request(pi); 1841 1842 done: 1843 return (error); 1844 } 1845 1846 1847 struct pci_devemu pci_de_nvme = { 1848 .pe_emu = "nvme", 1849 .pe_init = pci_nvme_init, 1850 .pe_barwrite = pci_nvme_write, 1851 .pe_barread = pci_nvme_read 1852 }; 1853 PCI_EMUL_SET(pci_de_nvme); 1854