1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (C) 2012-2016 Intel Corporation
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #include "opt_nvme.h"
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/sysctl.h>
36
37 #include "nvme_private.h"
38
39 #ifndef NVME_USE_NVD
40 #define NVME_USE_NVD 0
41 #endif
42
43 int nvme_use_nvd = NVME_USE_NVD;
44 bool nvme_verbose_cmd_dump = false;
45
46 SYSCTL_NODE(_hw, OID_AUTO, nvme, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
47 "NVMe sysctl tunables");
48 SYSCTL_INT(_hw_nvme, OID_AUTO, use_nvd, CTLFLAG_RDTUN,
49 &nvme_use_nvd, 1, "1 = Create NVD devices, 0 = Create NDA devices");
50 SYSCTL_BOOL(_hw_nvme, OID_AUTO, verbose_cmd_dump, CTLFLAG_RWTUN,
51 &nvme_verbose_cmd_dump, 0,
52 "enable verbose command printing when a command fails");
53
54 static void
nvme_dump_queue(struct nvme_qpair * qpair)55 nvme_dump_queue(struct nvme_qpair *qpair)
56 {
57 struct nvme_completion *cpl;
58 struct nvme_command *cmd;
59 int i;
60
61 printf("id:%04Xh phase:%d\n", qpair->id, qpair->phase);
62
63 printf("Completion queue:\n");
64 for (i = 0; i < qpair->num_entries; i++) {
65 cpl = &qpair->cpl[i];
66 printf("%05d: ", i);
67 nvme_qpair_print_completion(qpair, cpl);
68 }
69
70 printf("Submission queue:\n");
71 for (i = 0; i < qpair->num_entries; i++) {
72 cmd = &qpair->cmd[i];
73 printf("%05d: ", i);
74 nvme_qpair_print_command(qpair, cmd);
75 }
76 }
77
78 static int
nvme_sysctl_dump_debug(SYSCTL_HANDLER_ARGS)79 nvme_sysctl_dump_debug(SYSCTL_HANDLER_ARGS)
80 {
81 struct nvme_qpair *qpair = arg1;
82 uint32_t val = 0;
83
84 int error = sysctl_handle_int(oidp, &val, 0, req);
85
86 if (error)
87 return (error);
88
89 if (val != 0)
90 nvme_dump_queue(qpair);
91
92 return (0);
93 }
94
95 static int
nvme_sysctl_int_coal_time(SYSCTL_HANDLER_ARGS)96 nvme_sysctl_int_coal_time(SYSCTL_HANDLER_ARGS)
97 {
98 struct nvme_controller *ctrlr = arg1;
99 uint32_t oldval = ctrlr->int_coal_time;
100 int error = sysctl_handle_int(oidp, &ctrlr->int_coal_time, 0,
101 req);
102
103 if (error)
104 return (error);
105
106 if (oldval != ctrlr->int_coal_time)
107 nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr,
108 ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL,
109 NULL);
110
111 return (0);
112 }
113
114 static int
nvme_sysctl_int_coal_threshold(SYSCTL_HANDLER_ARGS)115 nvme_sysctl_int_coal_threshold(SYSCTL_HANDLER_ARGS)
116 {
117 struct nvme_controller *ctrlr = arg1;
118 uint32_t oldval = ctrlr->int_coal_threshold;
119 int error = sysctl_handle_int(oidp, &ctrlr->int_coal_threshold, 0,
120 req);
121
122 if (error)
123 return (error);
124
125 if (oldval != ctrlr->int_coal_threshold)
126 nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr,
127 ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL,
128 NULL);
129
130 return (0);
131 }
132
133 static int
nvme_sysctl_timeout_period(SYSCTL_HANDLER_ARGS)134 nvme_sysctl_timeout_period(SYSCTL_HANDLER_ARGS)
135 {
136 uint32_t *ptr = arg1;
137 uint32_t newval = *ptr;
138 int error = sysctl_handle_int(oidp, &newval, 0, req);
139
140 if (error || (req->newptr == NULL))
141 return (error);
142
143 if (newval > NVME_MAX_TIMEOUT_PERIOD ||
144 newval < NVME_MIN_TIMEOUT_PERIOD) {
145 return (EINVAL);
146 } else {
147 *ptr = newval;
148 }
149
150 return (0);
151 }
152
153 static void
nvme_qpair_reset_stats(struct nvme_qpair * qpair)154 nvme_qpair_reset_stats(struct nvme_qpair *qpair)
155 {
156 /*
157 * Reset the values. Due to sanity checks in
158 * nvme_qpair_process_completions, we reset the number of interrupt
159 * calls to 1.
160 */
161 qpair->num_cmds = 0;
162 qpair->num_intr_handler_calls = 1;
163 qpair->num_retries = 0;
164 qpair->num_failures = 0;
165 qpair->num_ignored = 0;
166 qpair->num_recovery_nolock = 0;
167 }
168
169 static int
nvme_sysctl_num_cmds(SYSCTL_HANDLER_ARGS)170 nvme_sysctl_num_cmds(SYSCTL_HANDLER_ARGS)
171 {
172 struct nvme_controller *ctrlr = arg1;
173 int64_t num_cmds = 0;
174 int i;
175
176 num_cmds = ctrlr->adminq.num_cmds;
177
178 if (ctrlr->ioq != NULL) {
179 for (i = 0; i < ctrlr->num_io_queues; i++)
180 num_cmds += ctrlr->ioq[i].num_cmds;
181 }
182
183 return (sysctl_handle_64(oidp, &num_cmds, 0, req));
184 }
185
186 static int
nvme_sysctl_num_intr_handler_calls(SYSCTL_HANDLER_ARGS)187 nvme_sysctl_num_intr_handler_calls(SYSCTL_HANDLER_ARGS)
188 {
189 struct nvme_controller *ctrlr = arg1;
190 int64_t num_intr_handler_calls = 0;
191 int i;
192
193 num_intr_handler_calls = ctrlr->adminq.num_intr_handler_calls;
194
195 if (ctrlr->ioq != NULL) {
196 for (i = 0; i < ctrlr->num_io_queues; i++)
197 num_intr_handler_calls += ctrlr->ioq[i].num_intr_handler_calls;
198 }
199
200 return (sysctl_handle_64(oidp, &num_intr_handler_calls, 0, req));
201 }
202
203 static int
nvme_sysctl_num_retries(SYSCTL_HANDLER_ARGS)204 nvme_sysctl_num_retries(SYSCTL_HANDLER_ARGS)
205 {
206 struct nvme_controller *ctrlr = arg1;
207 int64_t num_retries = 0;
208 int i;
209
210 num_retries = ctrlr->adminq.num_retries;
211
212 if (ctrlr->ioq != NULL) {
213 for (i = 0; i < ctrlr->num_io_queues; i++)
214 num_retries += ctrlr->ioq[i].num_retries;
215 }
216
217 return (sysctl_handle_64(oidp, &num_retries, 0, req));
218 }
219
220 static int
nvme_sysctl_num_failures(SYSCTL_HANDLER_ARGS)221 nvme_sysctl_num_failures(SYSCTL_HANDLER_ARGS)
222 {
223 struct nvme_controller *ctrlr = arg1;
224 int64_t num_failures = 0;
225 int i;
226
227 num_failures = ctrlr->adminq.num_failures;
228
229 if (ctrlr->ioq != NULL) {
230 for (i = 0; i < ctrlr->num_io_queues; i++)
231 num_failures += ctrlr->ioq[i].num_failures;
232 }
233
234 return (sysctl_handle_64(oidp, &num_failures, 0, req));
235 }
236
237 static int
nvme_sysctl_num_ignored(SYSCTL_HANDLER_ARGS)238 nvme_sysctl_num_ignored(SYSCTL_HANDLER_ARGS)
239 {
240 struct nvme_controller *ctrlr = arg1;
241 int64_t num_ignored = 0;
242 int i;
243
244 num_ignored = ctrlr->adminq.num_ignored;
245
246 if (ctrlr->ioq != NULL) {
247 for (i = 0; i < ctrlr->num_io_queues; i++)
248 num_ignored += ctrlr->ioq[i].num_ignored;
249 }
250
251 return (sysctl_handle_64(oidp, &num_ignored, 0, req));
252 }
253
254 static int
nvme_sysctl_num_recovery_nolock(SYSCTL_HANDLER_ARGS)255 nvme_sysctl_num_recovery_nolock(SYSCTL_HANDLER_ARGS)
256 {
257 struct nvme_controller *ctrlr = arg1;
258 int64_t num;
259 int i;
260
261 num = ctrlr->adminq.num_recovery_nolock;
262
263 if (ctrlr->ioq != NULL) {
264 for (i = 0; i < ctrlr->num_io_queues; i++)
265 num += ctrlr->ioq[i].num_recovery_nolock;
266 }
267
268 return (sysctl_handle_64(oidp, &num, 0, req));
269 }
270
271 static int
nvme_sysctl_reset_stats(SYSCTL_HANDLER_ARGS)272 nvme_sysctl_reset_stats(SYSCTL_HANDLER_ARGS)
273 {
274 struct nvme_controller *ctrlr = arg1;
275 uint32_t i, val = 0;
276
277 int error = sysctl_handle_int(oidp, &val, 0, req);
278
279 if (error)
280 return (error);
281
282 if (val != 0) {
283 nvme_qpair_reset_stats(&ctrlr->adminq);
284
285 if (ctrlr->ioq != NULL) {
286 for (i = 0; i < ctrlr->num_io_queues; i++)
287 nvme_qpair_reset_stats(&ctrlr->ioq[i]);
288 }
289 }
290
291 return (0);
292 }
293
294 static void
nvme_sysctl_initialize_queue(struct nvme_qpair * qpair,struct sysctl_ctx_list * ctrlr_ctx,struct sysctl_oid * que_tree)295 nvme_sysctl_initialize_queue(struct nvme_qpair *qpair,
296 struct sysctl_ctx_list *ctrlr_ctx, struct sysctl_oid *que_tree)
297 {
298 struct sysctl_oid_list *que_list = SYSCTL_CHILDREN(que_tree);
299
300 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_entries",
301 CTLFLAG_RD, &qpair->num_entries, 0,
302 "Number of entries in hardware queue");
303 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_trackers",
304 CTLFLAG_RD, &qpair->num_trackers, 0,
305 "Number of trackers pre-allocated for this queue pair");
306 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_head",
307 CTLFLAG_RD, &qpair->sq_head, 0,
308 "Current head of submission queue (as observed by driver)");
309 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_tail",
310 CTLFLAG_RD, &qpair->sq_tail, 0,
311 "Current tail of submission queue (as observed by driver)");
312 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "cq_head",
313 CTLFLAG_RD, &qpair->cq_head, 0,
314 "Current head of completion queue (as observed by driver)");
315
316 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_cmds",
317 CTLFLAG_RD, &qpair->num_cmds, "Number of commands submitted");
318 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_intr_handler_calls",
319 CTLFLAG_RD, &qpair->num_intr_handler_calls,
320 "Number of times interrupt handler was invoked (will typically be "
321 "less than number of actual interrupts generated due to "
322 "coalescing)");
323 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_retries",
324 CTLFLAG_RD, &qpair->num_retries, "Number of commands retried");
325 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_failures",
326 CTLFLAG_RD, &qpair->num_failures,
327 "Number of commands ending in failure after all retries");
328 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_ignored",
329 CTLFLAG_RD, &qpair->num_ignored,
330 "Number of interrupts posted, but were administratively ignored");
331 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_recovery_nolock",
332 CTLFLAG_RD, &qpair->num_recovery_nolock,
333 "Number of times that we failed to lock recovery in the ISR");
334
335 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "recovery",
336 CTLFLAG_RW, &qpair->recovery_state, 0,
337 "Current recovery state of the queue");
338
339 SYSCTL_ADD_PROC(ctrlr_ctx, que_list, OID_AUTO,
340 "dump_debug", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
341 qpair, 0, nvme_sysctl_dump_debug, "IU", "Dump debug data");
342 }
343
344 void
nvme_sysctl_initialize_ctrlr(struct nvme_controller * ctrlr)345 nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr)
346 {
347 struct sysctl_ctx_list *ctrlr_ctx;
348 struct sysctl_oid *ctrlr_tree, *que_tree, *ioq_tree;
349 struct sysctl_oid_list *ctrlr_list, *ioq_list;
350 #define QUEUE_NAME_LENGTH 16
351 char queue_name[QUEUE_NAME_LENGTH];
352 int i;
353
354 ctrlr_ctx = device_get_sysctl_ctx(ctrlr->dev);
355 ctrlr_tree = device_get_sysctl_tree(ctrlr->dev);
356 ctrlr_list = SYSCTL_CHILDREN(ctrlr_tree);
357
358 SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "num_io_queues",
359 CTLFLAG_RD, &ctrlr->num_io_queues, 0,
360 "Number of I/O queue pairs");
361
362 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
363 "int_coal_time", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
364 ctrlr, 0, nvme_sysctl_int_coal_time, "IU",
365 "Interrupt coalescing timeout (in microseconds)");
366
367 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
368 "int_coal_threshold",
369 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, ctrlr, 0,
370 nvme_sysctl_int_coal_threshold, "IU",
371 "Interrupt coalescing threshold");
372
373 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
374 "admin_timeout_period", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
375 &ctrlr->admin_timeout_period, 0, nvme_sysctl_timeout_period, "IU",
376 "Timeout period for Admin queue (in seconds)");
377
378 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
379 "timeout_period", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
380 &ctrlr->timeout_period, 0, nvme_sysctl_timeout_period, "IU",
381 "Timeout period for I/O queues (in seconds)");
382
383 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
384 "num_cmds", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
385 ctrlr, 0, nvme_sysctl_num_cmds, "IU",
386 "Number of commands submitted");
387
388 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
389 "num_intr_handler_calls",
390 CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, ctrlr, 0,
391 nvme_sysctl_num_intr_handler_calls, "IU",
392 "Number of times interrupt handler was invoked (will "
393 "typically be less than number of actual interrupts "
394 "generated due to coalescing)");
395
396 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
397 "num_retries", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
398 ctrlr, 0, nvme_sysctl_num_retries, "IU",
399 "Number of commands retried");
400
401 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
402 "num_failures", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
403 ctrlr, 0, nvme_sysctl_num_failures, "IU",
404 "Number of commands ending in failure after all retries");
405
406 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
407 "num_ignored", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
408 ctrlr, 0, nvme_sysctl_num_ignored, "IU",
409 "Number of interrupts ignored administratively");
410
411 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
412 "num_recovery_nolock", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
413 ctrlr, 0, nvme_sysctl_num_recovery_nolock, "IU",
414 "Number of times that we failed to lock recovery in the ISR");
415
416 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
417 "reset_stats", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, ctrlr,
418 0, nvme_sysctl_reset_stats, "IU", "Reset statistics to zero");
419
420 SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "cap_lo",
421 CTLFLAG_RD, &ctrlr->cap_lo, 0,
422 "Low 32-bits of capacities for the drive");
423
424 SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "cap_hi",
425 CTLFLAG_RD, &ctrlr->cap_hi, 0,
426 "Hi 32-bits of capacities for the drive");
427
428 SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "fail_on_reset",
429 CTLFLAG_RD, &ctrlr->fail_on_reset, 0,
430 "Pretend the next reset fails and fail the controller");
431
432 que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO, "adminq",
433 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Admin Queue");
434
435 nvme_sysctl_initialize_queue(&ctrlr->adminq, ctrlr_ctx, que_tree);
436
437 /*
438 * Make sure that we've constructed the I/O queues before setting up the
439 * sysctls. Failed controllers won't allocate it, but we want the rest
440 * of the sysctls to diagnose things.
441 */
442 if (ctrlr->ioq != NULL) {
443 ioq_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO,
444 "ioq", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "I/O Queues");
445 ioq_list = SYSCTL_CHILDREN(ioq_tree);
446
447 for (i = 0; i < ctrlr->num_io_queues; i++) {
448 snprintf(queue_name, QUEUE_NAME_LENGTH, "%d", i);
449 que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ioq_list, OID_AUTO,
450 queue_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "IO Queue");
451 nvme_sysctl_initialize_queue(&ctrlr->ioq[i], ctrlr_ctx,
452 que_tree);
453 }
454 }
455
456 SYSCTL_ADD_COUNTER_U64(ctrlr_ctx, ctrlr_list, OID_AUTO, "alignment_splits",
457 CTLFLAG_RD, &ctrlr->alignment_splits,
458 "Number of times we split the I/O alignment for drives with preferred alignment");
459 }
460