1 /*- 2 * Copyright (C) 2012 Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/bus.h> 32 33 #include "nvme_private.h" 34 35 static boolean_t 36 nvme_completion_check_retry(const struct nvme_completion *cpl) 37 { 38 /* 39 * TODO: spec is not clear how commands that are aborted due 40 * to TLER will be marked. So for now, it seems 41 * NAMESPACE_NOT_READY is the only case where we should 42 * look at the DNR bit. 43 */ 44 switch (cpl->sf_sct) { 45 case NVME_SCT_GENERIC: 46 switch (cpl->sf_sc) { 47 case NVME_SC_NAMESPACE_NOT_READY: 48 if (cpl->sf_dnr) 49 return (0); 50 else 51 return (1); 52 case NVME_SC_INVALID_OPCODE: 53 case NVME_SC_INVALID_FIELD: 54 case NVME_SC_COMMAND_ID_CONFLICT: 55 case NVME_SC_DATA_TRANSFER_ERROR: 56 case NVME_SC_ABORTED_POWER_LOSS: 57 case NVME_SC_INTERNAL_DEVICE_ERROR: 58 case NVME_SC_ABORTED_BY_REQUEST: 59 case NVME_SC_ABORTED_SQ_DELETION: 60 case NVME_SC_ABORTED_FAILED_FUSED: 61 case NVME_SC_ABORTED_MISSING_FUSED: 62 case NVME_SC_INVALID_NAMESPACE_OR_FORMAT: 63 case NVME_SC_COMMAND_SEQUENCE_ERROR: 64 case NVME_SC_LBA_OUT_OF_RANGE: 65 case NVME_SC_CAPACITY_EXCEEDED: 66 default: 67 return (0); 68 } 69 case NVME_SCT_COMMAND_SPECIFIC: 70 case NVME_SCT_MEDIA_ERROR: 71 case NVME_SCT_VENDOR_SPECIFIC: 72 default: 73 return (0); 74 } 75 } 76 77 struct nvme_tracker * 78 nvme_qpair_allocate_tracker(struct nvme_qpair *qpair, boolean_t alloc_prp_list) 79 { 80 struct nvme_tracker *tr; 81 struct nvme_prp_list *prp_list; 82 83 mtx_lock(&qpair->lock); 84 85 tr = SLIST_FIRST(&qpair->free_tr); 86 if (tr == NULL) { 87 /* TODO: fail if malloc returns NULL */ 88 tr = malloc(sizeof(struct nvme_tracker), M_NVME, 89 M_ZERO | M_NOWAIT); 90 91 bus_dmamap_create(qpair->dma_tag, 0, &tr->dma_map); 92 callout_init_mtx(&tr->timer, &qpair->lock, 0); 93 tr->cid = qpair->num_tr++; 94 } else 95 SLIST_REMOVE_HEAD(&qpair->free_tr, slist); 96 97 if (alloc_prp_list) { 98 prp_list = SLIST_FIRST(&qpair->free_prp_list); 99 100 if (prp_list == NULL) { 101 prp_list = malloc(sizeof(struct nvme_prp_list), 102 M_NVME, M_ZERO | M_NOWAIT); 103 104 bus_dmamap_create(qpair->dma_tag, 0, &prp_list->dma_map); 105 106 bus_dmamap_load(qpair->dma_tag, prp_list->dma_map, 107 prp_list->prp, sizeof(struct nvme_prp_list), 108 nvme_single_map, &prp_list->bus_addr, 0); 109 110 qpair->num_prp_list++; 111 } else { 112 SLIST_REMOVE_HEAD(&qpair->free_prp_list, slist); 113 } 114 115 tr->prp_list = prp_list; 116 } 117 118 return (tr); 119 } 120 121 void 122 nvme_qpair_process_completions(struct nvme_qpair *qpair) 123 { 124 struct nvme_tracker *tr; 125 struct nvme_completion *cpl; 126 boolean_t retry, error; 127 128 while (1) { 129 cpl = &qpair->cpl[qpair->cq_head]; 130 131 if (cpl->p != qpair->phase) 132 break; 133 134 tr = qpair->act_tr[cpl->cid]; 135 KASSERT(tr, 136 ("completion queue has entries but no active trackers\n")); 137 138 error = cpl->sf_sc || cpl->sf_sct; 139 retry = error && nvme_completion_check_retry(cpl); 140 141 if (error) { 142 nvme_dump_completion(cpl); 143 nvme_dump_command(&tr->cmd); 144 } 145 146 qpair->act_tr[cpl->cid] = NULL; 147 148 KASSERT(cpl->cid == tr->cmd.cid, 149 ("cpl cid does not match cmd cid\n")); 150 151 if (tr->cb_fn && !retry) 152 tr->cb_fn(tr->cb_arg, cpl); 153 154 qpair->sq_head = cpl->sqhd; 155 156 mtx_lock(&qpair->lock); 157 callout_stop(&tr->timer); 158 159 if (retry) 160 /* nvme_qpair_submit_cmd() will release the lock. */ 161 nvme_qpair_submit_cmd(qpair, tr); 162 else { 163 if (tr->prp_list) { 164 SLIST_INSERT_HEAD(&qpair->free_prp_list, 165 tr->prp_list, slist); 166 tr->prp_list = NULL; 167 } 168 169 if (tr->payload_size > 0) 170 bus_dmamap_unload(qpair->dma_tag, tr->dma_map); 171 172 SLIST_INSERT_HEAD(&qpair->free_tr, tr, slist); 173 174 mtx_unlock(&qpair->lock); 175 } 176 177 if (++qpair->cq_head == qpair->num_entries) { 178 qpair->cq_head = 0; 179 qpair->phase = !qpair->phase; 180 } 181 182 nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl, 183 qpair->cq_head); 184 } 185 } 186 187 static void 188 nvme_qpair_msix_handler(void *arg) 189 { 190 struct nvme_qpair *qpair = arg; 191 192 nvme_qpair_process_completions(qpair); 193 } 194 195 void 196 nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, 197 uint16_t vector, uint32_t num_entries, uint32_t max_xfer_size, 198 struct nvme_controller *ctrlr) 199 { 200 201 qpair->id = id; 202 qpair->vector = vector; 203 qpair->num_entries = num_entries; 204 qpair->max_xfer_size = max_xfer_size; 205 qpair->ctrlr = ctrlr; 206 207 /* 208 * First time through the completion queue, HW will set phase 209 * bit on completions to 1. So set this to 1 here, indicating 210 * we're looking for a 1 to know which entries have completed. 211 * we'll toggle the bit each time when the completion queue 212 * rolls over. 213 */ 214 qpair->phase = 1; 215 216 if (ctrlr->msix_enabled) { 217 218 /* 219 * MSI-X vector resource IDs start at 1, so we add one to 220 * the queue's vector to get the corresponding rid to use. 221 */ 222 qpair->rid = vector + 1; 223 224 qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ, 225 &qpair->rid, RF_ACTIVE); 226 227 bus_setup_intr(ctrlr->dev, qpair->res, 228 INTR_TYPE_MISC | INTR_MPSAFE, NULL, 229 nvme_qpair_msix_handler, qpair, &qpair->tag); 230 } 231 232 mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF); 233 234 bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), 235 sizeof(uint64_t), PAGE_SIZE, BUS_SPACE_MAXADDR, 236 BUS_SPACE_MAXADDR, NULL, NULL, qpair->max_xfer_size, 237 (qpair->max_xfer_size/PAGE_SIZE)+1, PAGE_SIZE, 0, 238 NULL, NULL, &qpair->dma_tag); 239 240 qpair->num_cmds = 0; 241 qpair->num_tr = 0; 242 qpair->num_prp_list = 0; 243 qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0; 244 245 /* TODO: error checking on contigmalloc, bus_dmamap_load calls */ 246 qpair->cmd = contigmalloc(qpair->num_entries * 247 sizeof(struct nvme_command), M_NVME, M_ZERO | M_NOWAIT, 248 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); 249 qpair->cpl = contigmalloc(qpair->num_entries * 250 sizeof(struct nvme_completion), M_NVME, M_ZERO | M_NOWAIT, 251 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); 252 253 bus_dmamap_create(qpair->dma_tag, 0, &qpair->cmd_dma_map); 254 bus_dmamap_create(qpair->dma_tag, 0, &qpair->cpl_dma_map); 255 256 bus_dmamap_load(qpair->dma_tag, qpair->cmd_dma_map, 257 qpair->cmd, qpair->num_entries * sizeof(struct nvme_command), 258 nvme_single_map, &qpair->cmd_bus_addr, 0); 259 bus_dmamap_load(qpair->dma_tag, qpair->cpl_dma_map, 260 qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion), 261 nvme_single_map, &qpair->cpl_bus_addr, 0); 262 263 qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl); 264 qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl); 265 266 SLIST_INIT(&qpair->free_tr); 267 SLIST_INIT(&qpair->free_prp_list); 268 269 qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries, 270 M_NVME, M_ZERO | M_NOWAIT); 271 } 272 273 static void 274 nvme_qpair_destroy(struct nvme_qpair *qpair) 275 { 276 struct nvme_tracker *tr; 277 struct nvme_prp_list *prp_list; 278 279 if (qpair->tag) 280 bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag); 281 282 if (qpair->res) 283 bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ, 284 rman_get_rid(qpair->res), qpair->res); 285 286 if (qpair->dma_tag) 287 bus_dma_tag_destroy(qpair->dma_tag); 288 289 if (qpair->act_tr) 290 free(qpair->act_tr, M_NVME); 291 292 while (!SLIST_EMPTY(&qpair->free_tr)) { 293 tr = SLIST_FIRST(&qpair->free_tr); 294 SLIST_REMOVE_HEAD(&qpair->free_tr, slist); 295 bus_dmamap_destroy(qpair->dma_tag, tr->dma_map); 296 free(tr, M_NVME); 297 } 298 299 while (!SLIST_EMPTY(&qpair->free_prp_list)) { 300 prp_list = SLIST_FIRST(&qpair->free_prp_list); 301 SLIST_REMOVE_HEAD(&qpair->free_prp_list, slist); 302 bus_dmamap_destroy(qpair->dma_tag, prp_list->dma_map); 303 free(prp_list, M_NVME); 304 } 305 } 306 307 void 308 nvme_admin_qpair_destroy(struct nvme_qpair *qpair) 309 { 310 311 /* 312 * For NVMe, you don't send delete queue commands for the admin 313 * queue, so we just need to unload and free the cmd and cpl memory. 314 */ 315 bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map); 316 bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map); 317 318 contigfree(qpair->cmd, 319 qpair->num_entries * sizeof(struct nvme_command), M_NVME); 320 321 bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map); 322 bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map); 323 contigfree(qpair->cpl, 324 qpair->num_entries * sizeof(struct nvme_completion), M_NVME); 325 326 nvme_qpair_destroy(qpair); 327 } 328 329 static void 330 nvme_free_cmd_ring(void *arg, const struct nvme_completion *status) 331 { 332 struct nvme_qpair *qpair; 333 334 qpair = (struct nvme_qpair *)arg; 335 bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map); 336 bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map); 337 contigfree(qpair->cmd, 338 qpair->num_entries * sizeof(struct nvme_command), M_NVME); 339 qpair->cmd = NULL; 340 } 341 342 static void 343 nvme_free_cpl_ring(void *arg, const struct nvme_completion *status) 344 { 345 struct nvme_qpair *qpair; 346 347 qpair = (struct nvme_qpair *)arg; 348 bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map); 349 bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map); 350 contigfree(qpair->cpl, 351 qpair->num_entries * sizeof(struct nvme_completion), M_NVME); 352 qpair->cpl = NULL; 353 } 354 355 void 356 nvme_io_qpair_destroy(struct nvme_qpair *qpair) 357 { 358 struct nvme_controller *ctrlr = qpair->ctrlr; 359 360 if (qpair->num_entries > 0) { 361 362 nvme_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_free_cmd_ring, 363 qpair); 364 /* Spin until free_cmd_ring sets qpair->cmd to NULL. */ 365 while (qpair->cmd) 366 DELAY(5); 367 368 nvme_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_free_cpl_ring, 369 qpair); 370 /* Spin until free_cpl_ring sets qpair->cmd to NULL. */ 371 while (qpair->cpl) 372 DELAY(5); 373 374 nvme_qpair_destroy(qpair); 375 } 376 } 377 378 static void 379 nvme_timeout(void *arg) 380 { 381 /* 382 * TODO: Add explicit abort operation here, once nvme(4) supports 383 * abort commands. 384 */ 385 } 386 387 void 388 nvme_qpair_submit_cmd(struct nvme_qpair *qpair, struct nvme_tracker *tr) 389 { 390 391 tr->cmd.cid = tr->cid; 392 qpair->act_tr[tr->cid] = tr; 393 394 /* 395 * TODO: rather than spin until entries free up, put this tracker 396 * on a queue, and submit from the interrupt handler when 397 * entries free up. 398 */ 399 if ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head) { 400 do { 401 mtx_unlock(&qpair->lock); 402 DELAY(5); 403 mtx_lock(&qpair->lock); 404 } while ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head); 405 } 406 407 callout_reset(&tr->timer, NVME_TIMEOUT_IN_SEC * hz, nvme_timeout, tr); 408 409 /* Copy the command from the tracker to the submission queue. */ 410 memcpy(&qpair->cmd[qpair->sq_tail], &tr->cmd, sizeof(tr->cmd)); 411 412 if (++qpair->sq_tail == qpair->num_entries) 413 qpair->sq_tail = 0; 414 415 wmb(); 416 nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].sq_tdbl, 417 qpair->sq_tail); 418 419 qpair->num_cmds++; 420 421 mtx_unlock(&qpair->lock); 422 } 423