xref: /freebsd/sys/dev/nvme/nvme_sysctl.c (revision 1f4bcc459a76b7aa664f3fd557684cd0ba6da352)
1 /*-
2  * Copyright (C) 2012-2016 Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/bus.h>
32 #include <sys/sysctl.h>
33 
34 #include "nvme_private.h"
35 
36 SYSCTL_NODE(_kern, OID_AUTO, nvme, CTLFLAG_RD, 0, "NVM Express");
37 /*
38  * Intel NVMe controllers have a slow path for I/Os that span a 128KB
39  * stripe boundary but ZFS limits ashift, which is derived from
40  * d_stripesize, to 13 (8KB) so we limit the stripesize reported to
41  * geom(8) to 4KB by default.
42  *
43  * This may result in a small number of additional I/Os to require
44  * splitting in nvme(4), however the NVMe I/O path is very efficient
45  * so these additional I/Os will cause very minimal (if any) difference
46  * in performance or CPU utilisation.
47  */
48 int nvme_max_optimal_sectorsize = 1<<12;
49 SYSCTL_INT(_kern_nvme, OID_AUTO, max_optimal_sectorsize, CTLFLAG_RWTUN,
50     &nvme_max_optimal_sectorsize, 0, "The maximum optimal sectorsize reported");
51 
52 /*
53  * CTLTYPE_S64 and sysctl_handle_64 were added in r217616.  Define these
54  *  explicitly here for older kernels that don't include the r217616
55  *  changeset.
56  */
57 #ifndef CTLTYPE_S64
58 #define CTLTYPE_S64		CTLTYPE_QUAD
59 #define sysctl_handle_64	sysctl_handle_quad
60 #endif
61 
62 static void
63 nvme_dump_queue(struct nvme_qpair *qpair)
64 {
65 	struct nvme_completion *cpl;
66 	struct nvme_command *cmd;
67 	int i;
68 
69 	printf("id:%04Xh phase:%d\n", qpair->id, qpair->phase);
70 
71 	printf("Completion queue:\n");
72 	for (i = 0; i < qpair->num_entries; i++) {
73 		cpl = &qpair->cpl[i];
74 		printf("%05d: ", i);
75 		nvme_dump_completion(cpl);
76 	}
77 
78 	printf("Submission queue:\n");
79 	for (i = 0; i < qpair->num_entries; i++) {
80 		cmd = &qpair->cmd[i];
81 		printf("%05d: ", i);
82 		nvme_dump_command(cmd);
83 	}
84 }
85 
86 
87 static int
88 nvme_sysctl_dump_debug(SYSCTL_HANDLER_ARGS)
89 {
90 	struct nvme_qpair 	*qpair = arg1;
91 	uint32_t		val = 0;
92 
93 	int error = sysctl_handle_int(oidp, &val, 0, req);
94 
95 	if (error)
96 		return (error);
97 
98 	if (val != 0)
99 		nvme_dump_queue(qpair);
100 
101 	return (0);
102 }
103 
104 static int
105 nvme_sysctl_int_coal_time(SYSCTL_HANDLER_ARGS)
106 {
107 	struct nvme_controller *ctrlr = arg1;
108 	uint32_t oldval = ctrlr->int_coal_time;
109 	int error = sysctl_handle_int(oidp, &ctrlr->int_coal_time, 0,
110 	    req);
111 
112 	if (error)
113 		return (error);
114 
115 	if (oldval != ctrlr->int_coal_time)
116 		nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr,
117 		    ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL,
118 		    NULL);
119 
120 	return (0);
121 }
122 
123 static int
124 nvme_sysctl_int_coal_threshold(SYSCTL_HANDLER_ARGS)
125 {
126 	struct nvme_controller *ctrlr = arg1;
127 	uint32_t oldval = ctrlr->int_coal_threshold;
128 	int error = sysctl_handle_int(oidp, &ctrlr->int_coal_threshold, 0,
129 	    req);
130 
131 	if (error)
132 		return (error);
133 
134 	if (oldval != ctrlr->int_coal_threshold)
135 		nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr,
136 		    ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL,
137 		    NULL);
138 
139 	return (0);
140 }
141 
142 static int
143 nvme_sysctl_timeout_period(SYSCTL_HANDLER_ARGS)
144 {
145 	struct nvme_controller *ctrlr = arg1;
146 	uint32_t oldval = ctrlr->timeout_period;
147 	int error = sysctl_handle_int(oidp, &ctrlr->timeout_period, 0, req);
148 
149 	if (error)
150 		return (error);
151 
152 	if (ctrlr->timeout_period > NVME_MAX_TIMEOUT_PERIOD ||
153 	    ctrlr->timeout_period < NVME_MIN_TIMEOUT_PERIOD) {
154 		ctrlr->timeout_period = oldval;
155 		return (EINVAL);
156 	}
157 
158 	return (0);
159 }
160 
161 static void
162 nvme_qpair_reset_stats(struct nvme_qpair *qpair)
163 {
164 
165 	qpair->num_cmds = 0;
166 	qpair->num_intr_handler_calls = 0;
167 }
168 
169 static int
170 nvme_sysctl_num_cmds(SYSCTL_HANDLER_ARGS)
171 {
172 	struct nvme_controller 	*ctrlr = arg1;
173 	int64_t			num_cmds = 0;
174 	int			i;
175 
176 	num_cmds = ctrlr->adminq.num_cmds;
177 
178 	for (i = 0; i < ctrlr->num_io_queues; i++)
179 		num_cmds += ctrlr->ioq[i].num_cmds;
180 
181 	return (sysctl_handle_64(oidp, &num_cmds, 0, req));
182 }
183 
184 static int
185 nvme_sysctl_num_intr_handler_calls(SYSCTL_HANDLER_ARGS)
186 {
187 	struct nvme_controller 	*ctrlr = arg1;
188 	int64_t			num_intr_handler_calls = 0;
189 	int			i;
190 
191 	num_intr_handler_calls = ctrlr->adminq.num_intr_handler_calls;
192 
193 	for (i = 0; i < ctrlr->num_io_queues; i++)
194 		num_intr_handler_calls += ctrlr->ioq[i].num_intr_handler_calls;
195 
196 	return (sysctl_handle_64(oidp, &num_intr_handler_calls, 0, req));
197 }
198 
199 static int
200 nvme_sysctl_reset_stats(SYSCTL_HANDLER_ARGS)
201 {
202 	struct nvme_controller 	*ctrlr = arg1;
203 	uint32_t		i, val = 0;
204 
205 	int error = sysctl_handle_int(oidp, &val, 0, req);
206 
207 	if (error)
208 		return (error);
209 
210 	if (val != 0) {
211 		nvme_qpair_reset_stats(&ctrlr->adminq);
212 
213 		for (i = 0; i < ctrlr->num_io_queues; i++)
214 			nvme_qpair_reset_stats(&ctrlr->ioq[i]);
215 	}
216 
217 	return (0);
218 }
219 
220 
221 static void
222 nvme_sysctl_initialize_queue(struct nvme_qpair *qpair,
223     struct sysctl_ctx_list *ctrlr_ctx, struct sysctl_oid *que_tree)
224 {
225 	struct sysctl_oid_list	*que_list = SYSCTL_CHILDREN(que_tree);
226 
227 	SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_entries",
228 	    CTLFLAG_RD, &qpair->num_entries, 0,
229 	    "Number of entries in hardware queue");
230 	SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_trackers",
231 	    CTLFLAG_RD, &qpair->num_trackers, 0,
232 	    "Number of trackers pre-allocated for this queue pair");
233 	SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_head",
234 	    CTLFLAG_RD, &qpair->sq_head, 0,
235 	    "Current head of submission queue (as observed by driver)");
236 	SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_tail",
237 	    CTLFLAG_RD, &qpair->sq_tail, 0,
238 	    "Current tail of submission queue (as observed by driver)");
239 	SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "cq_head",
240 	    CTLFLAG_RD, &qpair->cq_head, 0,
241 	    "Current head of completion queue (as observed by driver)");
242 
243 	SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_cmds",
244 	    CTLFLAG_RD, &qpair->num_cmds, "Number of commands submitted");
245 	SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_intr_handler_calls",
246 	    CTLFLAG_RD, &qpair->num_intr_handler_calls,
247 	    "Number of times interrupt handler was invoked (will typically be "
248 	    "less than number of actual interrupts generated due to "
249 	    "coalescing)");
250 
251 	SYSCTL_ADD_PROC(ctrlr_ctx, que_list, OID_AUTO,
252 	    "dump_debug", CTLTYPE_UINT | CTLFLAG_RW, qpair, 0,
253 	    nvme_sysctl_dump_debug, "IU", "Dump debug data");
254 }
255 
256 void
257 nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr)
258 {
259 	struct sysctl_ctx_list	*ctrlr_ctx;
260 	struct sysctl_oid	*ctrlr_tree, *que_tree;
261 	struct sysctl_oid_list	*ctrlr_list;
262 #define QUEUE_NAME_LENGTH	16
263 	char			queue_name[QUEUE_NAME_LENGTH];
264 	int			i;
265 
266 	ctrlr_ctx = device_get_sysctl_ctx(ctrlr->dev);
267 	ctrlr_tree = device_get_sysctl_tree(ctrlr->dev);
268 	ctrlr_list = SYSCTL_CHILDREN(ctrlr_tree);
269 
270 	SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "num_cpus_per_ioq",
271 	    CTLFLAG_RD, &ctrlr->num_cpus_per_ioq, 0,
272 	    "Number of CPUs assigned per I/O queue pair");
273 
274 	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
275 	    "int_coal_time", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0,
276 	    nvme_sysctl_int_coal_time, "IU",
277 	    "Interrupt coalescing timeout (in microseconds)");
278 
279 	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
280 	    "int_coal_threshold", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0,
281 	    nvme_sysctl_int_coal_threshold, "IU",
282 	    "Interrupt coalescing threshold");
283 
284 	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
285 	    "timeout_period", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0,
286 	    nvme_sysctl_timeout_period, "IU",
287 	    "Timeout period (in seconds)");
288 
289 	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
290 	    "num_cmds", CTLTYPE_S64 | CTLFLAG_RD,
291 	    ctrlr, 0, nvme_sysctl_num_cmds, "IU",
292 	    "Number of commands submitted");
293 
294 	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
295 	    "num_intr_handler_calls", CTLTYPE_S64 | CTLFLAG_RD,
296 	    ctrlr, 0, nvme_sysctl_num_intr_handler_calls, "IU",
297 	    "Number of times interrupt handler was invoked (will "
298 	    "typically be less than number of actual interrupts "
299 	    "generated due to coalescing)");
300 
301 	SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
302 	    "reset_stats", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0,
303 	    nvme_sysctl_reset_stats, "IU", "Reset statistics to zero");
304 
305 	que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO, "adminq",
306 	    CTLFLAG_RD, NULL, "Admin Queue");
307 
308 	nvme_sysctl_initialize_queue(&ctrlr->adminq, ctrlr_ctx, que_tree);
309 
310 	for (i = 0; i < ctrlr->num_io_queues; i++) {
311 		snprintf(queue_name, QUEUE_NAME_LENGTH, "ioq%d", i);
312 		que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO,
313 		    queue_name, CTLFLAG_RD, NULL, "IO Queue");
314 		nvme_sysctl_initialize_queue(&ctrlr->ioq[i], ctrlr_ctx,
315 		    que_tree);
316 	}
317 }
318