xref: /freebsd/sys/dev/nvme/nvme.c (revision 25408c853d9ecb2e76b9e38407338f86ecb8a55c)
1 /*-
2  * Copyright (C) 2012 Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/bus.h>
32 #include <sys/conf.h>
33 #include <sys/module.h>
34 
35 #include <dev/pci/pcireg.h>
36 #include <dev/pci/pcivar.h>
37 
38 #include "nvme_private.h"
39 
40 struct nvme_consumer {
41 	nvme_consumer_cb_fn_t		cb_fn;
42 	void				*cb_arg;
43 };
44 
45 struct nvme_consumer nvme_consumer[NVME_MAX_CONSUMERS];
46 
47 MALLOC_DEFINE(M_NVME, "nvme", "nvme(4) memory allocations");
48 
49 static int    nvme_probe(device_t);
50 static int    nvme_attach(device_t);
51 static int    nvme_detach(device_t);
52 
53 static devclass_t nvme_devclass;
54 
55 static device_method_t nvme_pci_methods[] = {
56 	/* Device interface */
57 	DEVMETHOD(device_probe,     nvme_probe),
58 	DEVMETHOD(device_attach,    nvme_attach),
59 	DEVMETHOD(device_detach,    nvme_detach),
60 	{ 0, 0 }
61 };
62 
63 static driver_t nvme_pci_driver = {
64 	"nvme",
65 	nvme_pci_methods,
66 	sizeof(struct nvme_controller),
67 };
68 
69 DRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, 0, 0);
70 MODULE_VERSION(nvme, 1);
71 
72 static struct _pcsid
73 {
74 	u_int32_t   type;
75 	const char  *desc;
76 } pci_ids[] = {
77 	{ 0x01118086,		"NVMe Controller"  },
78 	{ CHATHAM_PCI_ID,	"Chatham Prototype NVMe Controller"  },
79 	{ IDT_PCI_ID,		"IDT NVMe Controller"  },
80 	{ 0x00000000,		NULL  }
81 };
82 
83 static int
84 nvme_probe (device_t device)
85 {
86 	struct _pcsid	*ep;
87 	u_int32_t	type;
88 
89 	type = pci_get_devid(device);
90 	ep = pci_ids;
91 
92 	while (ep->type && ep->type != type)
93 		++ep;
94 
95 	if (ep->desc) {
96 		device_set_desc(device, ep->desc);
97 		return (BUS_PROBE_DEFAULT);
98 	}
99 
100 #if defined(PCIS_STORAGE_NVM)
101 	if (pci_get_class(device)    == PCIC_STORAGE &&
102 	    pci_get_subclass(device) == PCIS_STORAGE_NVM &&
103 	    pci_get_progif(device)   == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) {
104 		device_set_desc(device, "Generic NVMe Device");
105 		return (BUS_PROBE_GENERIC);
106 	}
107 #endif
108 
109 	return (ENXIO);
110 }
111 
112 static void
113 nvme_load(void)
114 {
115 }
116 
117 static void
118 nvme_unload(void)
119 {
120 }
121 
122 static void
123 nvme_shutdown(void)
124 {
125 	device_t		*devlist;
126 	struct nvme_controller	*ctrlr;
127 	union cc_register	cc;
128 	union csts_register	csts;
129 	int			dev, devcount;
130 
131 	if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
132 		return;
133 
134 	for (dev = 0; dev < devcount; dev++) {
135 		/*
136 		 * Only notify controller of shutdown when a real shutdown is
137 		 *  in process, not when a module unload occurs.  It seems at
138 		 *  least some controllers (Chatham at least) don't let you
139 		 *  re-enable the controller after shutdown notification has
140 		 *  been received.
141 		 */
142 		ctrlr = DEVICE2SOFTC(devlist[dev]);
143 		cc.raw = nvme_mmio_read_4(ctrlr, cc);
144 		cc.bits.shn = NVME_SHN_NORMAL;
145 		nvme_mmio_write_4(ctrlr, cc, cc.raw);
146 		csts.raw = nvme_mmio_read_4(ctrlr, csts);
147 		while (csts.bits.shst != NVME_SHST_COMPLETE) {
148 			DELAY(5);
149 			csts.raw = nvme_mmio_read_4(ctrlr, csts);
150 		}
151 	}
152 
153 	free(devlist, M_TEMP);
154 }
155 
156 static int
157 nvme_modevent(module_t mod, int type, void *arg)
158 {
159 
160 	switch (type) {
161 	case MOD_LOAD:
162 		nvme_load();
163 		break;
164 	case MOD_UNLOAD:
165 		nvme_unload();
166 		break;
167 	case MOD_SHUTDOWN:
168 		nvme_shutdown();
169 		break;
170 	default:
171 		break;
172 	}
173 
174 	return (0);
175 }
176 
177 moduledata_t nvme_mod = {
178 	"nvme",
179 	(modeventhand_t)nvme_modevent,
180 	0
181 };
182 
183 DECLARE_MODULE(nvme, nvme_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST);
184 
185 void
186 nvme_dump_command(struct nvme_command *cmd)
187 {
188 	printf("opc:%x f:%x r1:%x cid:%x nsid:%x r2:%x r3:%x "
189 	    "mptr:%qx prp1:%qx prp2:%qx cdw:%x %x %x %x %x %x\n",
190 	    cmd->opc, cmd->fuse, cmd->rsvd1, cmd->cid, cmd->nsid,
191 	    cmd->rsvd2, cmd->rsvd3,
192 	    (long long unsigned int)cmd->mptr,
193 	    (long long unsigned int)cmd->prp1,
194 	    (long long unsigned int)cmd->prp2,
195 	    cmd->cdw10, cmd->cdw11, cmd->cdw12, cmd->cdw13, cmd->cdw14,
196 	    cmd->cdw15);
197 }
198 
199 void
200 nvme_dump_completion(struct nvme_completion *cpl)
201 {
202 	printf("cdw0:%08x sqhd:%04x sqid:%04x "
203 	    "cid:%04x p:%x sc:%02x sct:%x m:%x dnr:%x\n",
204 	    cpl->cdw0, cpl->sqhd, cpl->sqid,
205 	    cpl->cid, cpl->p, cpl->sf_sc, cpl->sf_sct, cpl->sf_m,
206 	    cpl->sf_dnr);
207 }
208 
209 void
210 nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
211 {
212 	struct nvme_tracker 	*tr;
213 	struct nvme_qpair 	*qpair;
214 	struct nvme_prp_list	*prp_list;
215 	uint32_t		cur_nseg;
216 
217 	KASSERT(error == 0, ("nvme_payload_map error != 0\n"));
218 
219 	tr = (struct nvme_tracker *)arg;
220 	qpair = tr->qpair;
221 
222 	/*
223 	 * Note that we specified PAGE_SIZE for alignment and max
224 	 *  segment size when creating the bus dma tags.  So here
225 	 *  we can safely just transfer each segment to its
226 	 *  associated PRP entry.
227 	 */
228 	tr->cmd.prp1 = seg[0].ds_addr;
229 
230 	if (nseg == 2) {
231 		tr->cmd.prp2 = seg[1].ds_addr;
232 	} else if (nseg > 2) {
233 		KASSERT(tr->prp_list,
234 		    ("prp_list needed but not attached to tracker\n"));
235 		cur_nseg = 1;
236 		prp_list = tr->prp_list;
237 		tr->cmd.prp2 = (uint64_t)prp_list->bus_addr;
238 		while (cur_nseg < nseg) {
239 			prp_list->prp[cur_nseg-1] =
240 			    (uint64_t)seg[cur_nseg].ds_addr;
241 			cur_nseg++;
242 		}
243 	}
244 
245 	nvme_qpair_submit_cmd(qpair, tr);
246 }
247 
248 struct nvme_tracker *
249 nvme_allocate_tracker(struct nvme_controller *ctrlr, boolean_t is_admin,
250     nvme_cb_fn_t cb_fn, void *cb_arg, uint32_t payload_size, void *payload)
251 {
252 	struct nvme_tracker 	*tr;
253 	struct nvme_qpair	*qpair;
254 	uint32_t 		modulo, offset, num_prps;
255 	boolean_t		alloc_prp_list = FALSE;
256 
257 	if (is_admin) {
258 		qpair = &ctrlr->adminq;
259 	} else {
260 		if (ctrlr->per_cpu_io_queues)
261 			qpair = &ctrlr->ioq[curcpu];
262 		else
263 			qpair = &ctrlr->ioq[0];
264 	}
265 
266 	num_prps = payload_size / PAGE_SIZE;
267 	modulo = payload_size % PAGE_SIZE;
268 	offset = (uint32_t)((uintptr_t)payload % PAGE_SIZE);
269 
270 	if (modulo || offset)
271 		num_prps += 1 + (modulo + offset - 1) / PAGE_SIZE;
272 
273 	if (num_prps > 2)
274 		alloc_prp_list = TRUE;
275 
276 	tr = nvme_qpair_allocate_tracker(qpair, alloc_prp_list);
277 
278 	memset(&tr->cmd, 0, sizeof(tr->cmd));
279 
280 	tr->qpair = qpair;
281 	tr->cb_fn = cb_fn;
282 	tr->cb_arg = cb_arg;
283 	tr->payload_size = payload_size;
284 
285 	return (tr);
286 }
287 
288 static int
289 nvme_attach(device_t dev)
290 {
291 	struct nvme_controller	*ctrlr = DEVICE2SOFTC(dev);
292 	int			status;
293 
294 	status = nvme_ctrlr_construct(ctrlr, dev);
295 
296 	if (status != 0)
297 		return (status);
298 
299 	/*
300 	 * Reset controller twice to ensure we do a transition from cc.en==1
301 	 *  to cc.en==0.  This is because we don't really know what status
302 	 *  the controller was left in when boot handed off to OS.
303 	 */
304 	status = nvme_ctrlr_reset(ctrlr);
305 	if (status != 0)
306 		return (status);
307 
308 	status = nvme_ctrlr_reset(ctrlr);
309 	if (status != 0)
310 		return (status);
311 
312 	ctrlr->config_hook.ich_func = nvme_ctrlr_start;
313 	ctrlr->config_hook.ich_arg = ctrlr;
314 
315 	config_intrhook_establish(&ctrlr->config_hook);
316 
317 	return (0);
318 }
319 
320 static int
321 nvme_detach (device_t dev)
322 {
323 	struct nvme_controller	*ctrlr = DEVICE2SOFTC(dev);
324 	struct nvme_namespace	*ns;
325 	int			i;
326 
327 	if (ctrlr->taskqueue) {
328 		taskqueue_drain(ctrlr->taskqueue, &ctrlr->task);
329 		taskqueue_free(ctrlr->taskqueue);
330 	}
331 
332 	for (i = 0; i < NVME_MAX_NAMESPACES; i++) {
333 		ns = &ctrlr->ns[i];
334 		if (ns->cdev)
335 			destroy_dev(ns->cdev);
336 	}
337 
338 	if (ctrlr->cdev)
339 		destroy_dev(ctrlr->cdev);
340 
341 	for (i = 0; i < ctrlr->num_io_queues; i++) {
342 		nvme_io_qpair_destroy(&ctrlr->ioq[i]);
343 	}
344 
345 	free(ctrlr->ioq, M_NVME);
346 
347 	nvme_admin_qpair_destroy(&ctrlr->adminq);
348 
349 	if (ctrlr->resource != NULL) {
350 		bus_release_resource(dev, SYS_RES_MEMORY,
351 		    ctrlr->resource_id, ctrlr->resource);
352 	}
353 
354 #ifdef CHATHAM2
355 	if (ctrlr->chatham_resource != NULL) {
356 		bus_release_resource(dev, SYS_RES_MEMORY,
357 		    ctrlr->chatham_resource_id, ctrlr->chatham_resource);
358 	}
359 #endif
360 
361 	if (ctrlr->tag)
362 		bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag);
363 
364 	if (ctrlr->res)
365 		bus_release_resource(ctrlr->dev, SYS_RES_IRQ,
366 		    rman_get_rid(ctrlr->res), ctrlr->res);
367 
368 	if (ctrlr->msix_enabled)
369 		pci_release_msi(dev);
370 
371 	return (0);
372 }
373 
374 static void
375 nvme_notify_consumer(struct nvme_consumer *consumer)
376 {
377 	device_t		*devlist;
378 	struct nvme_controller	*ctrlr;
379 	int			dev, ns, devcount;
380 
381 	if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
382 		return;
383 
384 	for (dev = 0; dev < devcount; dev++) {
385 		ctrlr = DEVICE2SOFTC(devlist[dev]);
386 		for (ns = 0; ns < ctrlr->cdata.nn; ns++)
387 			(*consumer->cb_fn)(consumer->cb_arg, &ctrlr->ns[ns]);
388 	}
389 
390 	free(devlist, M_TEMP);
391 }
392 
393 struct nvme_consumer *
394 nvme_register_consumer(nvme_consumer_cb_fn_t cb_fn, void *cb_arg)
395 {
396 	int i;
397 
398 	/*
399 	 * TODO: add locking around consumer registration.  Not an issue
400 	 *  right now since we only have one nvme consumer - nvd(4).
401 	 */
402 	for (i = 0; i < NVME_MAX_CONSUMERS; i++)
403 		if (nvme_consumer[i].cb_fn == NULL) {
404 			nvme_consumer[i].cb_fn = cb_fn;
405 			nvme_consumer[i].cb_arg = cb_arg;
406 
407 			nvme_notify_consumer(&nvme_consumer[i]);
408 			return (&nvme_consumer[i]);
409 		}
410 
411 	printf("nvme(4): consumer not registered - no slots available\n");
412 	return (NULL);
413 }
414 
415 void
416 nvme_unregister_consumer(struct nvme_consumer *consumer)
417 {
418 
419 	consumer->cb_fn = NULL;
420 	consumer->cb_arg = NULL;
421 }
422 
423