1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (C) 2012-2016 Intel Corporation
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #include "opt_nvme.h"
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/sysctl.h>
36
37 #include "nvme_private.h"
38
39 #ifndef NVME_USE_NVD
40 #define NVME_USE_NVD 0
41 #endif
42
43 int nvme_use_nvd = NVME_USE_NVD;
44 bool nvme_verbose_cmd_dump = false;
45
46 SYSCTL_NODE(_hw, OID_AUTO, nvme, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
47 "NVMe sysctl tunables");
48 SYSCTL_INT(_hw_nvme, OID_AUTO, use_nvd, CTLFLAG_RDTUN,
49 &nvme_use_nvd, 1, "1 = Create NVD devices, 0 = Create NDA devices");
50 SYSCTL_BOOL(_hw_nvme, OID_AUTO, verbose_cmd_dump, CTLFLAG_RWTUN,
51 &nvme_verbose_cmd_dump, 0,
52 "enable verbose command printing when a command fails");
53
54 static void
nvme_dump_queue(struct nvme_qpair * qpair)55 nvme_dump_queue(struct nvme_qpair *qpair)
56 {
57 struct nvme_completion *cpl;
58 struct nvme_command *cmd;
59 int i;
60
61 printf("id:%04Xh phase:%d\n", qpair->id, qpair->phase);
62
63 printf("Completion queue:\n");
64 for (i = 0; i < qpair->num_entries; i++) {
65 cpl = &qpair->cpl[i];
66 printf("%05d: ", i);
67 nvme_qpair_print_completion(qpair, cpl);
68 }
69
70 printf("Submission queue:\n");
71 for (i = 0; i < qpair->num_entries; i++) {
72 cmd = &qpair->cmd[i];
73 printf("%05d: ", i);
74 nvme_qpair_print_command(qpair, cmd);
75 }
76 }
77
78 static int
nvme_sysctl_dump_debug(SYSCTL_HANDLER_ARGS)79 nvme_sysctl_dump_debug(SYSCTL_HANDLER_ARGS)
80 {
81 struct nvme_qpair *qpair = arg1;
82 uint32_t val = 0;
83
84 int error = sysctl_handle_int(oidp, &val, 0, req);
85
86 if (error)
87 return (error);
88
89 if (val != 0)
90 nvme_dump_queue(qpair);
91
92 return (0);
93 }
94
95 static int
nvme_sysctl_int_coal_time(SYSCTL_HANDLER_ARGS)96 nvme_sysctl_int_coal_time(SYSCTL_HANDLER_ARGS)
97 {
98 struct nvme_controller *ctrlr = arg1;
99 uint32_t oldval = ctrlr->int_coal_time;
100 int error = sysctl_handle_int(oidp, &ctrlr->int_coal_time, 0,
101 req);
102
103 if (error)
104 return (error);
105
106 if (oldval != ctrlr->int_coal_time)
107 nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr,
108 ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL,
109 NULL);
110
111 return (0);
112 }
113
114 static int
nvme_sysctl_int_coal_threshold(SYSCTL_HANDLER_ARGS)115 nvme_sysctl_int_coal_threshold(SYSCTL_HANDLER_ARGS)
116 {
117 struct nvme_controller *ctrlr = arg1;
118 uint32_t oldval = ctrlr->int_coal_threshold;
119 int error = sysctl_handle_int(oidp, &ctrlr->int_coal_threshold, 0,
120 req);
121
122 if (error)
123 return (error);
124
125 if (oldval != ctrlr->int_coal_threshold)
126 nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr,
127 ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL,
128 NULL);
129
130 return (0);
131 }
132
133 static int
nvme_sysctl_timeout_period(SYSCTL_HANDLER_ARGS)134 nvme_sysctl_timeout_period(SYSCTL_HANDLER_ARGS)
135 {
136 uint32_t *ptr = arg1;
137 uint32_t newval = *ptr;
138 int error = sysctl_handle_int(oidp, &newval, 0, req);
139
140 if (error || (req->newptr == NULL))
141 return (error);
142
143 if (newval > NVME_MAX_TIMEOUT_PERIOD ||
144 newval < NVME_MIN_TIMEOUT_PERIOD) {
145 return (EINVAL);
146 } else {
147 *ptr = newval;
148 }
149
150 return (0);
151 }
152
153 static void
nvme_qpair_reset_stats(struct nvme_qpair * qpair)154 nvme_qpair_reset_stats(struct nvme_qpair *qpair)
155 {
156
157 /*
158 * Reset the values. Due to sanity checks in
159 * nvme_qpair_process_completions, we reset the number of interrupt
160 * calls to 1.
161 */
162 qpair->num_cmds = 0;
163 qpair->num_intr_handler_calls = 1;
164 qpair->num_retries = 0;
165 qpair->num_failures = 0;
166 qpair->num_ignored = 0;
167 qpair->num_recovery_nolock = 0;
168 }
169
170 static int
nvme_sysctl_num_cmds(SYSCTL_HANDLER_ARGS)171 nvme_sysctl_num_cmds(SYSCTL_HANDLER_ARGS)
172 {
173 struct nvme_controller *ctrlr = arg1;
174 int64_t num_cmds = 0;
175 int i;
176
177 num_cmds = ctrlr->adminq.num_cmds;
178
179 if (ctrlr->ioq != NULL) {
180 for (i = 0; i < ctrlr->num_io_queues; i++)
181 num_cmds += ctrlr->ioq[i].num_cmds;
182 }
183
184 return (sysctl_handle_64(oidp, &num_cmds, 0, req));
185 }
186
187 static int
nvme_sysctl_num_intr_handler_calls(SYSCTL_HANDLER_ARGS)188 nvme_sysctl_num_intr_handler_calls(SYSCTL_HANDLER_ARGS)
189 {
190 struct nvme_controller *ctrlr = arg1;
191 int64_t num_intr_handler_calls = 0;
192 int i;
193
194 num_intr_handler_calls = ctrlr->adminq.num_intr_handler_calls;
195
196 if (ctrlr->ioq != NULL) {
197 for (i = 0; i < ctrlr->num_io_queues; i++)
198 num_intr_handler_calls += ctrlr->ioq[i].num_intr_handler_calls;
199 }
200
201 return (sysctl_handle_64(oidp, &num_intr_handler_calls, 0, req));
202 }
203
204 static int
nvme_sysctl_num_retries(SYSCTL_HANDLER_ARGS)205 nvme_sysctl_num_retries(SYSCTL_HANDLER_ARGS)
206 {
207 struct nvme_controller *ctrlr = arg1;
208 int64_t num_retries = 0;
209 int i;
210
211 num_retries = ctrlr->adminq.num_retries;
212
213 if (ctrlr->ioq != NULL) {
214 for (i = 0; i < ctrlr->num_io_queues; i++)
215 num_retries += ctrlr->ioq[i].num_retries;
216 }
217
218 return (sysctl_handle_64(oidp, &num_retries, 0, req));
219 }
220
221 static int
nvme_sysctl_num_failures(SYSCTL_HANDLER_ARGS)222 nvme_sysctl_num_failures(SYSCTL_HANDLER_ARGS)
223 {
224 struct nvme_controller *ctrlr = arg1;
225 int64_t num_failures = 0;
226 int i;
227
228 num_failures = ctrlr->adminq.num_failures;
229
230 if (ctrlr->ioq != NULL) {
231 for (i = 0; i < ctrlr->num_io_queues; i++)
232 num_failures += ctrlr->ioq[i].num_failures;
233 }
234
235 return (sysctl_handle_64(oidp, &num_failures, 0, req));
236 }
237
238 static int
nvme_sysctl_num_ignored(SYSCTL_HANDLER_ARGS)239 nvme_sysctl_num_ignored(SYSCTL_HANDLER_ARGS)
240 {
241 struct nvme_controller *ctrlr = arg1;
242 int64_t num_ignored = 0;
243 int i;
244
245 num_ignored = ctrlr->adminq.num_ignored;
246
247 if (ctrlr->ioq != NULL) {
248 for (i = 0; i < ctrlr->num_io_queues; i++)
249 num_ignored += ctrlr->ioq[i].num_ignored;
250 }
251
252 return (sysctl_handle_64(oidp, &num_ignored, 0, req));
253 }
254
255 static int
nvme_sysctl_num_recovery_nolock(SYSCTL_HANDLER_ARGS)256 nvme_sysctl_num_recovery_nolock(SYSCTL_HANDLER_ARGS)
257 {
258 struct nvme_controller *ctrlr = arg1;
259 int64_t num;
260 int i;
261
262 num = ctrlr->adminq.num_recovery_nolock;
263
264 if (ctrlr->ioq != NULL) {
265 for (i = 0; i < ctrlr->num_io_queues; i++)
266 num += ctrlr->ioq[i].num_recovery_nolock;
267 }
268
269 return (sysctl_handle_64(oidp, &num, 0, req));
270 }
271
272 static int
nvme_sysctl_reset_stats(SYSCTL_HANDLER_ARGS)273 nvme_sysctl_reset_stats(SYSCTL_HANDLER_ARGS)
274 {
275 struct nvme_controller *ctrlr = arg1;
276 uint32_t i, val = 0;
277
278 int error = sysctl_handle_int(oidp, &val, 0, req);
279
280 if (error)
281 return (error);
282
283 if (val != 0) {
284 nvme_qpair_reset_stats(&ctrlr->adminq);
285
286 if (ctrlr->ioq != NULL) {
287 for (i = 0; i < ctrlr->num_io_queues; i++)
288 nvme_qpair_reset_stats(&ctrlr->ioq[i]);
289 }
290 }
291
292 return (0);
293 }
294
295 static void
nvme_sysctl_initialize_queue(struct nvme_qpair * qpair,struct sysctl_ctx_list * ctrlr_ctx,struct sysctl_oid * que_tree)296 nvme_sysctl_initialize_queue(struct nvme_qpair *qpair,
297 struct sysctl_ctx_list *ctrlr_ctx, struct sysctl_oid *que_tree)
298 {
299 struct sysctl_oid_list *que_list = SYSCTL_CHILDREN(que_tree);
300
301 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_entries",
302 CTLFLAG_RD, &qpair->num_entries, 0,
303 "Number of entries in hardware queue");
304 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_trackers",
305 CTLFLAG_RD, &qpair->num_trackers, 0,
306 "Number of trackers pre-allocated for this queue pair");
307 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_head",
308 CTLFLAG_RD, &qpair->sq_head, 0,
309 "Current head of submission queue (as observed by driver)");
310 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_tail",
311 CTLFLAG_RD, &qpair->sq_tail, 0,
312 "Current tail of submission queue (as observed by driver)");
313 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "cq_head",
314 CTLFLAG_RD, &qpair->cq_head, 0,
315 "Current head of completion queue (as observed by driver)");
316
317 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_cmds",
318 CTLFLAG_RD, &qpair->num_cmds, "Number of commands submitted");
319 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_intr_handler_calls",
320 CTLFLAG_RD, &qpair->num_intr_handler_calls,
321 "Number of times interrupt handler was invoked (will typically be "
322 "less than number of actual interrupts generated due to "
323 "coalescing)");
324 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_retries",
325 CTLFLAG_RD, &qpair->num_retries, "Number of commands retried");
326 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_failures",
327 CTLFLAG_RD, &qpair->num_failures,
328 "Number of commands ending in failure after all retries");
329 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_ignored",
330 CTLFLAG_RD, &qpair->num_ignored,
331 "Number of interrupts posted, but were administratively ignored");
332 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_recovery_nolock",
333 CTLFLAG_RD, &qpair->num_recovery_nolock,
334 "Number of times that we failed to lock recovery in the ISR");
335
336 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "recovery",
337 CTLFLAG_RW, &qpair->recovery_state, 0,
338 "Current recovery state of the queue");
339
340 SYSCTL_ADD_PROC(ctrlr_ctx, que_list, OID_AUTO,
341 "dump_debug", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
342 qpair, 0, nvme_sysctl_dump_debug, "IU", "Dump debug data");
343 }
344
345 void
nvme_sysctl_initialize_ctrlr(struct nvme_controller * ctrlr)346 nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr)
347 {
348 struct sysctl_ctx_list *ctrlr_ctx;
349 struct sysctl_oid *ctrlr_tree, *que_tree, *ioq_tree;
350 struct sysctl_oid_list *ctrlr_list, *ioq_list;
351 #define QUEUE_NAME_LENGTH 16
352 char queue_name[QUEUE_NAME_LENGTH];
353 int i;
354
355 ctrlr_ctx = device_get_sysctl_ctx(ctrlr->dev);
356 ctrlr_tree = device_get_sysctl_tree(ctrlr->dev);
357 ctrlr_list = SYSCTL_CHILDREN(ctrlr_tree);
358
359 SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "num_io_queues",
360 CTLFLAG_RD, &ctrlr->num_io_queues, 0,
361 "Number of I/O queue pairs");
362
363 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
364 "int_coal_time", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
365 ctrlr, 0, nvme_sysctl_int_coal_time, "IU",
366 "Interrupt coalescing timeout (in microseconds)");
367
368 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
369 "int_coal_threshold",
370 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, ctrlr, 0,
371 nvme_sysctl_int_coal_threshold, "IU",
372 "Interrupt coalescing threshold");
373
374 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
375 "admin_timeout_period", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
376 &ctrlr->admin_timeout_period, 0, nvme_sysctl_timeout_period, "IU",
377 "Timeout period for Admin queue (in seconds)");
378
379 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
380 "timeout_period", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
381 &ctrlr->timeout_period, 0, nvme_sysctl_timeout_period, "IU",
382 "Timeout period for I/O queues (in seconds)");
383
384 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
385 "num_cmds", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
386 ctrlr, 0, nvme_sysctl_num_cmds, "IU",
387 "Number of commands submitted");
388
389 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
390 "num_intr_handler_calls",
391 CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, ctrlr, 0,
392 nvme_sysctl_num_intr_handler_calls, "IU",
393 "Number of times interrupt handler was invoked (will "
394 "typically be less than number of actual interrupts "
395 "generated due to coalescing)");
396
397 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
398 "num_retries", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
399 ctrlr, 0, nvme_sysctl_num_retries, "IU",
400 "Number of commands retried");
401
402 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
403 "num_failures", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
404 ctrlr, 0, nvme_sysctl_num_failures, "IU",
405 "Number of commands ending in failure after all retries");
406
407 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
408 "num_ignored", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
409 ctrlr, 0, nvme_sysctl_num_ignored, "IU",
410 "Number of interrupts ignored administratively");
411
412 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
413 "num_recovery_nolock", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
414 ctrlr, 0, nvme_sysctl_num_recovery_nolock, "IU",
415 "Number of times that we failed to lock recovery in the ISR");
416
417 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
418 "reset_stats", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, ctrlr,
419 0, nvme_sysctl_reset_stats, "IU", "Reset statistics to zero");
420
421 SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "cap_lo",
422 CTLFLAG_RD, &ctrlr->cap_lo, 0,
423 "Low 32-bits of capacities for the drive");
424
425 SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "cap_hi",
426 CTLFLAG_RD, &ctrlr->cap_hi, 0,
427 "Hi 32-bits of capacities for the drive");
428
429 SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "fail_on_reset",
430 CTLFLAG_RD, &ctrlr->fail_on_reset, 0,
431 "Pretend the next reset fails and fail the controller");
432
433 que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO, "adminq",
434 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Admin Queue");
435
436 nvme_sysctl_initialize_queue(&ctrlr->adminq, ctrlr_ctx, que_tree);
437
438 /*
439 * Make sure that we've constructed the I/O queues before setting up the
440 * sysctls. Failed controllers won't allocate it, but we want the rest
441 * of the sysctls to diagnose things.
442 */
443 if (ctrlr->ioq != NULL) {
444 ioq_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO,
445 "ioq", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "I/O Queues");
446 ioq_list = SYSCTL_CHILDREN(ioq_tree);
447
448 for (i = 0; i < ctrlr->num_io_queues; i++) {
449 snprintf(queue_name, QUEUE_NAME_LENGTH, "%d", i);
450 que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ioq_list, OID_AUTO,
451 queue_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "IO Queue");
452 nvme_sysctl_initialize_queue(&ctrlr->ioq[i], ctrlr_ctx,
453 que_tree);
454 }
455 }
456
457 SYSCTL_ADD_COUNTER_U64(ctrlr_ctx, ctrlr_list, OID_AUTO, "alignment_splits",
458 CTLFLAG_RD, &ctrlr->alignment_splits,
459 "Number of times we split the I/O alignment for drives with preferred alignment");
460 }
461