xref: /freebsd/sys/dev/nvme/nvme_private.h (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (C) 2012-2014 Intel Corporation
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #ifndef __NVME_PRIVATE_H__
32 #define __NVME_PRIVATE_H__
33 
34 #include <sys/param.h>
35 #include <sys/bio.h>
36 #include <sys/bus.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/mutex.h>
42 #include <sys/rman.h>
43 #include <sys/systm.h>
44 #include <sys/taskqueue.h>
45 
46 #include <vm/uma.h>
47 
48 #include <machine/bus.h>
49 
50 #include "nvme.h"
51 
52 #define DEVICE2SOFTC(dev) ((struct nvme_controller *) device_get_softc(dev))
53 
54 MALLOC_DECLARE(M_NVME);
55 
56 #define IDT32_PCI_ID		0x80d0111d /* 32 channel board */
57 #define IDT8_PCI_ID		0x80d2111d /* 8 channel board */
58 
59 /*
60  * For commands requiring more than 2 PRP entries, one PRP will be
61  *  embedded in the command (prp1), and the rest of the PRP entries
62  *  will be in a list pointed to by the command (prp2).  This means
63  *  that real max number of PRP entries we support is 32+1, which
64  *  results in a max xfer size of 32*PAGE_SIZE.
65  */
66 #define NVME_MAX_PRP_LIST_ENTRIES	(NVME_MAX_XFER_SIZE / PAGE_SIZE)
67 
68 #define NVME_ADMIN_TRACKERS	(16)
69 #define NVME_ADMIN_ENTRIES	(128)
70 /* min and max are defined in admin queue attributes section of spec */
71 #define NVME_MIN_ADMIN_ENTRIES	(2)
72 #define NVME_MAX_ADMIN_ENTRIES	(4096)
73 
74 /*
75  * NVME_IO_ENTRIES defines the size of an I/O qpair's submission and completion
76  *  queues, while NVME_IO_TRACKERS defines the maximum number of I/O that we
77  *  will allow outstanding on an I/O qpair at any time.  The only advantage in
78  *  having IO_ENTRIES > IO_TRACKERS is for debugging purposes - when dumping
79  *  the contents of the submission and completion queues, it will show a longer
80  *  history of data.
81  */
82 #define NVME_IO_ENTRIES		(256)
83 #define NVME_IO_TRACKERS	(128)
84 #define NVME_MIN_IO_TRACKERS	(4)
85 #define NVME_MAX_IO_TRACKERS	(1024)
86 
87 /*
88  * NVME_MAX_IO_ENTRIES is not defined, since it is specified in CC.MQES
89  *  for each controller.
90  */
91 
92 #define NVME_INT_COAL_TIME	(0)	/* disabled */
93 #define NVME_INT_COAL_THRESHOLD (0)	/* 0-based */
94 
95 #define NVME_MAX_NAMESPACES	(16)
96 #define NVME_MAX_CONSUMERS	(2)
97 #define NVME_MAX_ASYNC_EVENTS	(8)
98 
99 #define NVME_DEFAULT_TIMEOUT_PERIOD	(30)    /* in seconds */
100 #define NVME_MIN_TIMEOUT_PERIOD		(5)
101 #define NVME_MAX_TIMEOUT_PERIOD		(120)
102 
103 #define NVME_DEFAULT_RETRY_COUNT	(4)
104 
105 /* Maximum log page size to fetch for AERs. */
106 #define NVME_MAX_AER_LOG_SIZE		(4096)
107 
108 /*
109  * Define CACHE_LINE_SIZE here for older FreeBSD versions that do not define
110  *  it.
111  */
112 #ifndef CACHE_LINE_SIZE
113 #define CACHE_LINE_SIZE		(64)
114 #endif
115 
116 extern uma_zone_t	nvme_request_zone;
117 extern int32_t		nvme_retry_count;
118 extern bool		nvme_verbose_cmd_dump;
119 
120 struct nvme_completion_poll_status {
121 
122 	struct nvme_completion	cpl;
123 	int			done;
124 };
125 
126 extern devclass_t nvme_devclass;
127 
128 #define NVME_REQUEST_VADDR	1
129 #define NVME_REQUEST_NULL	2 /* For requests with no payload. */
130 #define NVME_REQUEST_UIO	3
131 #define NVME_REQUEST_BIO	4
132 #define NVME_REQUEST_CCB        5
133 
134 struct nvme_request {
135 
136 	struct nvme_command		cmd;
137 	struct nvme_qpair		*qpair;
138 	union {
139 		void			*payload;
140 		struct bio		*bio;
141 	} u;
142 	uint32_t			type;
143 	uint32_t			payload_size;
144 	bool				timeout;
145 	nvme_cb_fn_t			cb_fn;
146 	void				*cb_arg;
147 	int32_t				retries;
148 	STAILQ_ENTRY(nvme_request)	stailq;
149 };
150 
151 struct nvme_async_event_request {
152 
153 	struct nvme_controller		*ctrlr;
154 	struct nvme_request		*req;
155 	struct nvme_completion		cpl;
156 	uint32_t			log_page_id;
157 	uint32_t			log_page_size;
158 	uint8_t				log_page_buffer[NVME_MAX_AER_LOG_SIZE];
159 };
160 
161 struct nvme_tracker {
162 
163 	TAILQ_ENTRY(nvme_tracker)	tailq;
164 	struct nvme_request		*req;
165 	struct nvme_qpair		*qpair;
166 	struct callout			timer;
167 	bus_dmamap_t			payload_dma_map;
168 	uint16_t			cid;
169 
170 	uint64_t			*prp;
171 	bus_addr_t			prp_bus_addr;
172 };
173 
174 struct nvme_qpair {
175 
176 	struct nvme_controller	*ctrlr;
177 	uint32_t		id;
178 	int			domain;
179 	int			cpu;
180 
181 	uint16_t		vector;
182 	int			rid;
183 	struct resource		*res;
184 	void 			*tag;
185 
186 	uint32_t		num_entries;
187 	uint32_t		num_trackers;
188 	uint32_t		sq_tdbl_off;
189 	uint32_t		cq_hdbl_off;
190 
191 	uint32_t		phase;
192 	uint32_t		sq_head;
193 	uint32_t		sq_tail;
194 	uint32_t		cq_head;
195 
196 	int64_t			num_cmds;
197 	int64_t			num_intr_handler_calls;
198 	int64_t			num_retries;
199 	int64_t			num_failures;
200 
201 	struct nvme_command	*cmd;
202 	struct nvme_completion	*cpl;
203 
204 	bus_dma_tag_t		dma_tag;
205 	bus_dma_tag_t		dma_tag_payload;
206 
207 	bus_dmamap_t		queuemem_map;
208 	uint64_t		cmd_bus_addr;
209 	uint64_t		cpl_bus_addr;
210 
211 	TAILQ_HEAD(, nvme_tracker)	free_tr;
212 	TAILQ_HEAD(, nvme_tracker)	outstanding_tr;
213 	STAILQ_HEAD(, nvme_request)	queued_req;
214 
215 	struct nvme_tracker	**act_tr;
216 
217 	bool			is_enabled;
218 
219 	struct mtx		lock __aligned(CACHE_LINE_SIZE);
220 
221 } __aligned(CACHE_LINE_SIZE);
222 
223 struct nvme_namespace {
224 
225 	struct nvme_controller		*ctrlr;
226 	struct nvme_namespace_data	data;
227 	uint32_t			id;
228 	uint32_t			flags;
229 	struct cdev			*cdev;
230 	void				*cons_cookie[NVME_MAX_CONSUMERS];
231 	uint32_t			boundary;
232 	struct mtx			lock;
233 };
234 
235 /*
236  * One of these per allocated PCI device.
237  */
238 struct nvme_controller {
239 
240 	device_t		dev;
241 
242 	struct mtx		lock;
243 	int			domain;
244 	uint32_t		ready_timeout_in_ms;
245 	uint32_t		quirks;
246 #define	QUIRK_DELAY_B4_CHK_RDY	1		/* Can't touch MMIO on disable */
247 #define	QUIRK_DISABLE_TIMEOUT	2		/* Disable broken completion timeout feature */
248 
249 	bus_space_tag_t		bus_tag;
250 	bus_space_handle_t	bus_handle;
251 	int			resource_id;
252 	struct resource		*resource;
253 
254 	/*
255 	 * The NVMe spec allows for the MSI-X table to be placed in BAR 4/5,
256 	 *  separate from the control registers which are in BAR 0/1.  These
257 	 *  members track the mapping of BAR 4/5 for that reason.
258 	 */
259 	int			bar4_resource_id;
260 	struct resource		*bar4_resource;
261 
262 	uint32_t		msix_enabled;
263 	uint32_t		enable_aborts;
264 
265 	uint32_t		num_io_queues;
266 	uint32_t		max_hw_pend_io;
267 
268 	/* Fields for tracking progress during controller initialization. */
269 	struct intr_config_hook	config_hook;
270 	uint32_t		ns_identified;
271 	uint32_t		queues_created;
272 
273 	struct task		reset_task;
274 	struct task		fail_req_task;
275 	struct taskqueue	*taskqueue;
276 
277 	/* For shared legacy interrupt. */
278 	int			rid;
279 	struct resource		*res;
280 	void			*tag;
281 
282 	bus_dma_tag_t		hw_desc_tag;
283 	bus_dmamap_t		hw_desc_map;
284 
285 	/** maximum i/o size in bytes */
286 	uint32_t		max_xfer_size;
287 
288 	/** minimum page size supported by this controller in bytes */
289 	uint32_t		min_page_size;
290 
291 	/** interrupt coalescing time period (in microseconds) */
292 	uint32_t		int_coal_time;
293 
294 	/** interrupt coalescing threshold */
295 	uint32_t		int_coal_threshold;
296 
297 	/** timeout period in seconds */
298 	uint32_t		timeout_period;
299 
300 	/** doorbell stride */
301 	uint32_t		dstrd;
302 
303 	struct nvme_qpair	adminq;
304 	struct nvme_qpair	*ioq;
305 
306 	struct nvme_registers		*regs;
307 
308 	struct nvme_controller_data	cdata;
309 	struct nvme_namespace		ns[NVME_MAX_NAMESPACES];
310 
311 	struct cdev			*cdev;
312 
313 	/** bit mask of event types currently enabled for async events */
314 	uint32_t			async_event_config;
315 
316 	uint32_t			num_aers;
317 	struct nvme_async_event_request	aer[NVME_MAX_ASYNC_EVENTS];
318 
319 	void				*cons_cookie[NVME_MAX_CONSUMERS];
320 
321 	uint32_t			is_resetting;
322 	uint32_t			is_initialized;
323 	uint32_t			notification_sent;
324 
325 	bool				is_failed;
326 	STAILQ_HEAD(, nvme_request)	fail_req;
327 };
328 
329 #define nvme_mmio_offsetof(reg)						       \
330 	offsetof(struct nvme_registers, reg)
331 
332 #define nvme_mmio_read_4(sc, reg)					       \
333 	bus_space_read_4((sc)->bus_tag, (sc)->bus_handle,		       \
334 	    nvme_mmio_offsetof(reg))
335 
336 #define nvme_mmio_write_4(sc, reg, val)					       \
337 	bus_space_write_4((sc)->bus_tag, (sc)->bus_handle,		       \
338 	    nvme_mmio_offsetof(reg), val)
339 
340 #define nvme_mmio_write_8(sc, reg, val)					       \
341 	do {								       \
342 		bus_space_write_4((sc)->bus_tag, (sc)->bus_handle,	       \
343 		    nvme_mmio_offsetof(reg), val & 0xFFFFFFFF); 	       \
344 		bus_space_write_4((sc)->bus_tag, (sc)->bus_handle,	       \
345 		    nvme_mmio_offsetof(reg)+4,				       \
346 		    (val & 0xFFFFFFFF00000000ULL) >> 32);		       \
347 	} while (0);
348 
349 #define nvme_printf(ctrlr, fmt, args...)	\
350     device_printf(ctrlr->dev, fmt, ##args)
351 
352 void	nvme_ns_test(struct nvme_namespace *ns, u_long cmd, caddr_t arg);
353 
354 void	nvme_ctrlr_cmd_identify_controller(struct nvme_controller *ctrlr,
355 					   void *payload,
356 					   nvme_cb_fn_t cb_fn, void *cb_arg);
357 void	nvme_ctrlr_cmd_identify_namespace(struct nvme_controller *ctrlr,
358 					  uint32_t nsid, void *payload,
359 					  nvme_cb_fn_t cb_fn, void *cb_arg);
360 void	nvme_ctrlr_cmd_set_interrupt_coalescing(struct nvme_controller *ctrlr,
361 						uint32_t microseconds,
362 						uint32_t threshold,
363 						nvme_cb_fn_t cb_fn,
364 						void *cb_arg);
365 void	nvme_ctrlr_cmd_get_error_page(struct nvme_controller *ctrlr,
366 				      struct nvme_error_information_entry *payload,
367 				      uint32_t num_entries, /* 0 = max */
368 				      nvme_cb_fn_t cb_fn,
369 				      void *cb_arg);
370 void	nvme_ctrlr_cmd_get_health_information_page(struct nvme_controller *ctrlr,
371 						   uint32_t nsid,
372 						   struct nvme_health_information_page *payload,
373 						   nvme_cb_fn_t cb_fn,
374 						   void *cb_arg);
375 void	nvme_ctrlr_cmd_get_firmware_page(struct nvme_controller *ctrlr,
376 					 struct nvme_firmware_page *payload,
377 					 nvme_cb_fn_t cb_fn,
378 					 void *cb_arg);
379 void	nvme_ctrlr_cmd_create_io_cq(struct nvme_controller *ctrlr,
380 				    struct nvme_qpair *io_que,
381 				    nvme_cb_fn_t cb_fn, void *cb_arg);
382 void	nvme_ctrlr_cmd_create_io_sq(struct nvme_controller *ctrlr,
383 				    struct nvme_qpair *io_que,
384 				    nvme_cb_fn_t cb_fn, void *cb_arg);
385 void	nvme_ctrlr_cmd_delete_io_cq(struct nvme_controller *ctrlr,
386 				    struct nvme_qpair *io_que,
387 				    nvme_cb_fn_t cb_fn, void *cb_arg);
388 void	nvme_ctrlr_cmd_delete_io_sq(struct nvme_controller *ctrlr,
389 				    struct nvme_qpair *io_que,
390 				    nvme_cb_fn_t cb_fn, void *cb_arg);
391 void	nvme_ctrlr_cmd_set_num_queues(struct nvme_controller *ctrlr,
392 				      uint32_t num_queues, nvme_cb_fn_t cb_fn,
393 				      void *cb_arg);
394 void	nvme_ctrlr_cmd_set_async_event_config(struct nvme_controller *ctrlr,
395 					      uint32_t state,
396 					      nvme_cb_fn_t cb_fn, void *cb_arg);
397 void	nvme_ctrlr_cmd_abort(struct nvme_controller *ctrlr, uint16_t cid,
398 			     uint16_t sqid, nvme_cb_fn_t cb_fn, void *cb_arg);
399 
400 void	nvme_completion_poll_cb(void *arg, const struct nvme_completion *cpl);
401 
402 int	nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev);
403 void	nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev);
404 void	nvme_ctrlr_shutdown(struct nvme_controller *ctrlr);
405 int	nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr);
406 void	nvme_ctrlr_reset(struct nvme_controller *ctrlr);
407 /* ctrlr defined as void * to allow use with config_intrhook. */
408 void	nvme_ctrlr_start_config_hook(void *ctrlr_arg);
409 void	nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr,
410 					struct nvme_request *req);
411 void	nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
412 				     struct nvme_request *req);
413 void	nvme_ctrlr_post_failed_request(struct nvme_controller *ctrlr,
414 				       struct nvme_request *req);
415 
416 int	nvme_qpair_construct(struct nvme_qpair *qpair,
417 			     uint32_t num_entries, uint32_t num_trackers,
418 			     struct nvme_controller *ctrlr);
419 void	nvme_qpair_submit_tracker(struct nvme_qpair *qpair,
420 				  struct nvme_tracker *tr);
421 bool	nvme_qpair_process_completions(struct nvme_qpair *qpair);
422 void	nvme_qpair_submit_request(struct nvme_qpair *qpair,
423 				  struct nvme_request *req);
424 void	nvme_qpair_reset(struct nvme_qpair *qpair);
425 void	nvme_qpair_fail(struct nvme_qpair *qpair);
426 void	nvme_qpair_manual_complete_request(struct nvme_qpair *qpair,
427 					   struct nvme_request *req,
428                                            uint32_t sct, uint32_t sc);
429 
430 void	nvme_admin_qpair_enable(struct nvme_qpair *qpair);
431 void	nvme_admin_qpair_disable(struct nvme_qpair *qpair);
432 void	nvme_admin_qpair_destroy(struct nvme_qpair *qpair);
433 
434 void	nvme_io_qpair_enable(struct nvme_qpair *qpair);
435 void	nvme_io_qpair_disable(struct nvme_qpair *qpair);
436 void	nvme_io_qpair_destroy(struct nvme_qpair *qpair);
437 
438 int	nvme_ns_construct(struct nvme_namespace *ns, uint32_t id,
439 			  struct nvme_controller *ctrlr);
440 void	nvme_ns_destruct(struct nvme_namespace *ns);
441 
442 void	nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr);
443 
444 void	nvme_dump_command(struct nvme_command *cmd);
445 void	nvme_dump_completion(struct nvme_completion *cpl);
446 
447 int	nvme_attach(device_t dev);
448 int	nvme_shutdown(device_t dev);
449 int	nvme_detach(device_t dev);
450 
451 /*
452  * Wait for a command to complete using the nvme_completion_poll_cb.
453  * Used in limited contexts where the caller knows it's OK to block
454  * briefly while the command runs. The ISR will run the callback which
455  * will set status->done to true.usually within microseconds. A 1s
456  * pause means something is seriously AFU and we should panic to
457  * provide the proper context to diagnose.
458  */
459 static __inline
460 void
461 nvme_completion_poll(struct nvme_completion_poll_status *status)
462 {
463 	int sanity = hz * 1;
464 
465 	while (!atomic_load_acq_int(&status->done) && --sanity > 0)
466 		pause("nvme", 1);
467 	if (sanity <= 0)
468 		panic("NVME polled command failed to complete within 1s.");
469 }
470 
471 static __inline void
472 nvme_single_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
473 {
474 	uint64_t *bus_addr = (uint64_t *)arg;
475 
476 	if (error != 0)
477 		printf("nvme_single_map err %d\n", error);
478 	*bus_addr = seg[0].ds_addr;
479 }
480 
481 static __inline struct nvme_request *
482 _nvme_allocate_request(nvme_cb_fn_t cb_fn, void *cb_arg)
483 {
484 	struct nvme_request *req;
485 
486 	req = uma_zalloc(nvme_request_zone, M_NOWAIT | M_ZERO);
487 	if (req != NULL) {
488 		req->cb_fn = cb_fn;
489 		req->cb_arg = cb_arg;
490 		req->timeout = true;
491 	}
492 	return (req);
493 }
494 
495 static __inline struct nvme_request *
496 nvme_allocate_request_vaddr(void *payload, uint32_t payload_size,
497     nvme_cb_fn_t cb_fn, void *cb_arg)
498 {
499 	struct nvme_request *req;
500 
501 	req = _nvme_allocate_request(cb_fn, cb_arg);
502 	if (req != NULL) {
503 		req->type = NVME_REQUEST_VADDR;
504 		req->u.payload = payload;
505 		req->payload_size = payload_size;
506 	}
507 	return (req);
508 }
509 
510 static __inline struct nvme_request *
511 nvme_allocate_request_null(nvme_cb_fn_t cb_fn, void *cb_arg)
512 {
513 	struct nvme_request *req;
514 
515 	req = _nvme_allocate_request(cb_fn, cb_arg);
516 	if (req != NULL)
517 		req->type = NVME_REQUEST_NULL;
518 	return (req);
519 }
520 
521 static __inline struct nvme_request *
522 nvme_allocate_request_bio(struct bio *bio, nvme_cb_fn_t cb_fn, void *cb_arg)
523 {
524 	struct nvme_request *req;
525 
526 	req = _nvme_allocate_request(cb_fn, cb_arg);
527 	if (req != NULL) {
528 		req->type = NVME_REQUEST_BIO;
529 		req->u.bio = bio;
530 	}
531 	return (req);
532 }
533 
534 static __inline struct nvme_request *
535 nvme_allocate_request_ccb(union ccb *ccb, nvme_cb_fn_t cb_fn, void *cb_arg)
536 {
537 	struct nvme_request *req;
538 
539 	req = _nvme_allocate_request(cb_fn, cb_arg);
540 	if (req != NULL) {
541 		req->type = NVME_REQUEST_CCB;
542 		req->u.payload = ccb;
543 	}
544 
545 	return (req);
546 }
547 
548 #define nvme_free_request(req)	uma_zfree(nvme_request_zone, req)
549 
550 void	nvme_notify_async_consumers(struct nvme_controller *ctrlr,
551 				    const struct nvme_completion *async_cpl,
552 				    uint32_t log_page_id, void *log_page_buffer,
553 				    uint32_t log_page_size);
554 void	nvme_notify_fail_consumers(struct nvme_controller *ctrlr);
555 void	nvme_notify_new_controller(struct nvme_controller *ctrlr);
556 void	nvme_notify_ns(struct nvme_controller *ctrlr, int nsid);
557 
558 void	nvme_ctrlr_intx_handler(void *arg);
559 void	nvme_ctrlr_poll(struct nvme_controller *ctrlr);
560 
561 int	nvme_ctrlr_suspend(struct nvme_controller *ctrlr);
562 int	nvme_ctrlr_resume(struct nvme_controller *ctrlr);
563 
564 #endif /* __NVME_PRIVATE_H__ */
565