xref: /freebsd/sys/dev/nvme/nvme_qpair.c (revision a10cee30c94cf5944826d2a495e9cdf339dfbcc8)
1 /*-
2  * Copyright (C) 2012 Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/bus.h>
32 
33 #include "nvme_private.h"
34 
35 static boolean_t
36 nvme_completion_check_retry(const struct nvme_completion *cpl)
37 {
38 	/*
39 	 * TODO: spec is not clear how commands that are aborted due
40 	 *  to TLER will be marked.  So for now, it seems
41 	 *  NAMESPACE_NOT_READY is the only case where we should
42 	 *  look at the DNR bit.
43 	 */
44 	switch (cpl->sf_sct) {
45 	case NVME_SCT_GENERIC:
46 		switch (cpl->sf_sc) {
47 		case NVME_SC_NAMESPACE_NOT_READY:
48 			if (cpl->sf_dnr)
49 				return (0);
50 			else
51 				return (1);
52 		case NVME_SC_INVALID_OPCODE:
53 		case NVME_SC_INVALID_FIELD:
54 		case NVME_SC_COMMAND_ID_CONFLICT:
55 		case NVME_SC_DATA_TRANSFER_ERROR:
56 		case NVME_SC_ABORTED_POWER_LOSS:
57 		case NVME_SC_INTERNAL_DEVICE_ERROR:
58 		case NVME_SC_ABORTED_BY_REQUEST:
59 		case NVME_SC_ABORTED_SQ_DELETION:
60 		case NVME_SC_ABORTED_FAILED_FUSED:
61 		case NVME_SC_ABORTED_MISSING_FUSED:
62 		case NVME_SC_INVALID_NAMESPACE_OR_FORMAT:
63 		case NVME_SC_COMMAND_SEQUENCE_ERROR:
64 		case NVME_SC_LBA_OUT_OF_RANGE:
65 		case NVME_SC_CAPACITY_EXCEEDED:
66 		default:
67 			return (0);
68 		}
69 	case NVME_SCT_COMMAND_SPECIFIC:
70 	case NVME_SCT_MEDIA_ERROR:
71 	case NVME_SCT_VENDOR_SPECIFIC:
72 	default:
73 		return (0);
74 	}
75 }
76 
77 struct nvme_tracker *
78 nvme_qpair_allocate_tracker(struct nvme_qpair *qpair, boolean_t alloc_prp_list)
79 {
80 	struct nvme_tracker	*tr;
81 	struct nvme_prp_list	*prp_list;
82 
83 	mtx_lock(&qpair->lock);
84 
85 	tr = SLIST_FIRST(&qpair->free_tr);
86 	if (tr == NULL) {
87 		/* TODO: fail if malloc returns NULL */
88 		tr = malloc(sizeof(struct nvme_tracker), M_NVME,
89 		    M_ZERO | M_NOWAIT);
90 
91 		bus_dmamap_create(qpair->dma_tag, 0, &tr->dma_map);
92 		callout_init_mtx(&tr->timer, &qpair->lock, 0);
93 		tr->cid = qpair->num_tr++;
94 	} else
95 		SLIST_REMOVE_HEAD(&qpair->free_tr, slist);
96 
97 	if (alloc_prp_list) {
98 		prp_list = SLIST_FIRST(&qpair->free_prp_list);
99 
100 		if (prp_list == NULL) {
101 			prp_list = malloc(sizeof(struct nvme_prp_list),
102 			    M_NVME, M_ZERO | M_NOWAIT);
103 
104 			bus_dmamap_create(qpair->dma_tag, 0, &prp_list->dma_map);
105 
106 			bus_dmamap_load(qpair->dma_tag, prp_list->dma_map,
107 			    prp_list->prp, sizeof(struct nvme_prp_list),
108 			    nvme_single_map, &prp_list->bus_addr, 0);
109 
110 			qpair->num_prp_list++;
111 		} else {
112 			SLIST_REMOVE_HEAD(&qpair->free_prp_list, slist);
113 		}
114 
115 		tr->prp_list = prp_list;
116 	}
117 
118 	return (tr);
119 }
120 
121 void
122 nvme_qpair_process_completions(struct nvme_qpair *qpair)
123 {
124 	struct nvme_tracker	*tr;
125 	struct nvme_completion	*cpl;
126 	boolean_t		retry, error;
127 
128 	while (1) {
129 		cpl = &qpair->cpl[qpair->cq_head];
130 
131 		if (cpl->p != qpair->phase)
132 			break;
133 
134 		tr = qpair->act_tr[cpl->cid];
135 		KASSERT(tr,
136 		    ("completion queue has entries but no active trackers\n"));
137 
138 		error = cpl->sf_sc || cpl->sf_sct;
139 		retry = error && nvme_completion_check_retry(cpl);
140 
141 		if (error) {
142 			nvme_dump_completion(cpl);
143 			nvme_dump_command(&tr->cmd);
144 		}
145 
146 		qpair->act_tr[cpl->cid] = NULL;
147 
148 		KASSERT(cpl->cid == tr->cmd.cid,
149 		    ("cpl cid does not match cmd cid\n"));
150 
151 		if (tr->cb_fn && !retry)
152 			tr->cb_fn(tr->cb_arg, cpl);
153 
154 		qpair->sq_head = cpl->sqhd;
155 
156 		mtx_lock(&qpair->lock);
157 		callout_stop(&tr->timer);
158 
159 		if (retry)
160 			/* nvme_qpair_submit_cmd() will release the lock. */
161 			nvme_qpair_submit_cmd(qpair, tr);
162 		else {
163 			if (tr->prp_list) {
164 				SLIST_INSERT_HEAD(&qpair->free_prp_list,
165 				    tr->prp_list, slist);
166 				tr->prp_list = NULL;
167 			}
168 
169 			if (tr->payload_size > 0)
170 				bus_dmamap_unload(qpair->dma_tag, tr->dma_map);
171 
172 			SLIST_INSERT_HEAD(&qpair->free_tr, tr, slist);
173 
174 			mtx_unlock(&qpair->lock);
175 		}
176 
177 		if (++qpair->cq_head == qpair->num_entries) {
178 			qpair->cq_head = 0;
179 			qpair->phase = !qpair->phase;
180 		}
181 
182 		nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl,
183 		    qpair->cq_head);
184 	}
185 }
186 
187 static void
188 nvme_qpair_msix_handler(void *arg)
189 {
190 	struct nvme_qpair *qpair = arg;
191 
192 	nvme_qpair_process_completions(qpair);
193 }
194 
195 void
196 nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
197     uint16_t vector, uint32_t num_entries, uint32_t max_xfer_size,
198     struct nvme_controller *ctrlr)
199 {
200 
201 	qpair->id = id;
202 	qpair->vector = vector;
203 	qpair->num_entries = num_entries;
204 	qpair->max_xfer_size = max_xfer_size;
205 	qpair->ctrlr = ctrlr;
206 
207 	/*
208 	 * First time through the completion queue, HW will set phase
209 	 *  bit on completions to 1.  So set this to 1 here, indicating
210 	 *  we're looking for a 1 to know which entries have completed.
211 	 *  we'll toggle the bit each time when the completion queue
212 	 *  rolls over.
213 	 */
214 	qpair->phase = 1;
215 
216 	if (ctrlr->msix_enabled) {
217 
218 		/*
219 		 * MSI-X vector resource IDs start at 1, so we add one to
220 		 *  the queue's vector to get the corresponding rid to use.
221 		 */
222 		qpair->rid = vector + 1;
223 
224 		qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
225 		    &qpair->rid, RF_ACTIVE);
226 
227 		bus_setup_intr(ctrlr->dev, qpair->res,
228 		    INTR_TYPE_MISC | INTR_MPSAFE, NULL,
229 		    nvme_qpair_msix_handler, qpair, &qpair->tag);
230 	}
231 
232 	mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF);
233 
234 	bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
235 	    sizeof(uint64_t), PAGE_SIZE, BUS_SPACE_MAXADDR,
236 	    BUS_SPACE_MAXADDR, NULL, NULL, qpair->max_xfer_size,
237 	    (qpair->max_xfer_size/PAGE_SIZE)+1, PAGE_SIZE, 0,
238 	    NULL, NULL, &qpair->dma_tag);
239 
240 	qpair->num_cmds = 0;
241 	qpair->num_tr = 0;
242 	qpair->num_prp_list = 0;
243 	qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0;
244 
245 	/* TODO: error checking on contigmalloc, bus_dmamap_load calls */
246 	qpair->cmd = contigmalloc(qpair->num_entries *
247 	    sizeof(struct nvme_command), M_NVME, M_ZERO | M_NOWAIT,
248 	    0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
249 	qpair->cpl = contigmalloc(qpair->num_entries *
250 	    sizeof(struct nvme_completion), M_NVME, M_ZERO | M_NOWAIT,
251 	    0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
252 
253 	bus_dmamap_create(qpair->dma_tag, 0, &qpair->cmd_dma_map);
254 	bus_dmamap_create(qpair->dma_tag, 0, &qpair->cpl_dma_map);
255 
256 	bus_dmamap_load(qpair->dma_tag, qpair->cmd_dma_map,
257 	    qpair->cmd, qpair->num_entries * sizeof(struct nvme_command),
258 	    nvme_single_map, &qpair->cmd_bus_addr, 0);
259 	bus_dmamap_load(qpair->dma_tag, qpair->cpl_dma_map,
260 	    qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion),
261 	    nvme_single_map, &qpair->cpl_bus_addr, 0);
262 
263 	qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl);
264 	qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl);
265 
266 	SLIST_INIT(&qpair->free_tr);
267 	SLIST_INIT(&qpair->free_prp_list);
268 
269 	qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries,
270 	    M_NVME, M_ZERO | M_NOWAIT);
271 }
272 
273 static void
274 nvme_qpair_destroy(struct nvme_qpair *qpair)
275 {
276 	struct nvme_tracker *tr;
277 	struct nvme_prp_list *prp_list;
278 
279 	if (qpair->tag)
280 		bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag);
281 
282 	if (qpair->res)
283 		bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ,
284 		    rman_get_rid(qpair->res), qpair->res);
285 
286 	if (qpair->dma_tag)
287 		bus_dma_tag_destroy(qpair->dma_tag);
288 
289 	if (qpair->act_tr)
290 		free(qpair->act_tr, M_NVME);
291 
292 	while (!SLIST_EMPTY(&qpair->free_tr)) {
293 		tr = SLIST_FIRST(&qpair->free_tr);
294 		SLIST_REMOVE_HEAD(&qpair->free_tr, slist);
295 		bus_dmamap_destroy(qpair->dma_tag, tr->dma_map);
296 		free(tr, M_NVME);
297 	}
298 
299 	while (!SLIST_EMPTY(&qpair->free_prp_list)) {
300 		prp_list = SLIST_FIRST(&qpair->free_prp_list);
301 		SLIST_REMOVE_HEAD(&qpair->free_prp_list, slist);
302 		bus_dmamap_destroy(qpair->dma_tag, prp_list->dma_map);
303 		free(prp_list, M_NVME);
304 	}
305 }
306 
307 void
308 nvme_admin_qpair_destroy(struct nvme_qpair *qpair)
309 {
310 
311 	/*
312 	 * For NVMe, you don't send delete queue commands for the admin
313 	 *  queue, so we just need to unload and free the cmd and cpl memory.
314 	 */
315 	bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map);
316 	bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map);
317 
318 	contigfree(qpair->cmd,
319 	    qpair->num_entries * sizeof(struct nvme_command), M_NVME);
320 
321 	bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map);
322 	bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map);
323 	contigfree(qpair->cpl,
324 	    qpair->num_entries * sizeof(struct nvme_completion), M_NVME);
325 
326 	nvme_qpair_destroy(qpair);
327 }
328 
329 static void
330 nvme_free_cmd_ring(void *arg, const struct nvme_completion *status)
331 {
332 	struct nvme_qpair *qpair;
333 
334 	qpair = (struct nvme_qpair *)arg;
335 	bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map);
336 	bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map);
337 	contigfree(qpair->cmd,
338 	    qpair->num_entries * sizeof(struct nvme_command), M_NVME);
339 	qpair->cmd = NULL;
340 }
341 
342 static void
343 nvme_free_cpl_ring(void *arg, const struct nvme_completion *status)
344 {
345 	struct nvme_qpair *qpair;
346 
347 	qpair = (struct nvme_qpair *)arg;
348 	bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map);
349 	bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map);
350 	contigfree(qpair->cpl,
351 	    qpair->num_entries * sizeof(struct nvme_completion), M_NVME);
352 	qpair->cpl = NULL;
353 }
354 
355 void
356 nvme_io_qpair_destroy(struct nvme_qpair *qpair)
357 {
358 	struct nvme_controller *ctrlr = qpair->ctrlr;
359 
360 	if (qpair->num_entries > 0) {
361 
362 		nvme_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_free_cmd_ring,
363 		    qpair);
364 		/* Spin until free_cmd_ring sets qpair->cmd to NULL. */
365 		while (qpair->cmd)
366 			DELAY(5);
367 
368 		nvme_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_free_cpl_ring,
369 		    qpair);
370 		/* Spin until free_cpl_ring sets qpair->cmd to NULL. */
371 		while (qpair->cpl)
372 			DELAY(5);
373 
374 		nvme_qpair_destroy(qpair);
375 	}
376 }
377 
378 static void
379 nvme_timeout(void *arg)
380 {
381 	/*
382 	 * TODO: Add explicit abort operation here, once nvme(4) supports
383 	 *  abort commands.
384 	 */
385 }
386 
387 void
388 nvme_qpair_submit_cmd(struct nvme_qpair *qpair, struct nvme_tracker *tr)
389 {
390 
391 	tr->cmd.cid = tr->cid;
392 	qpair->act_tr[tr->cid] = tr;
393 
394 	/*
395 	 * TODO: rather than spin until entries free up, put this tracker
396 	 *  on a queue, and submit from the interrupt handler when
397 	 *  entries free up.
398 	 */
399 	if ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head) {
400 		do {
401 			mtx_unlock(&qpair->lock);
402 			DELAY(5);
403 			mtx_lock(&qpair->lock);
404 		} while ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head);
405 	}
406 
407 	callout_reset(&tr->timer, NVME_TIMEOUT_IN_SEC * hz, nvme_timeout, tr);
408 
409 	/* Copy the command from the tracker to the submission queue. */
410 	memcpy(&qpair->cmd[qpair->sq_tail], &tr->cmd, sizeof(tr->cmd));
411 
412 	if (++qpair->sq_tail == qpair->num_entries)
413 		qpair->sq_tail = 0;
414 
415 	wmb();
416 	nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].sq_tdbl,
417 	    qpair->sq_tail);
418 
419 	qpair->num_cmds++;
420 
421 	mtx_unlock(&qpair->lock);
422 }
423