1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2012-2016 Intel Corporation 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include "opt_nvme.h" 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/bus.h> 35 #include <sys/sysctl.h> 36 37 #include "nvme_private.h" 38 39 #ifndef NVME_USE_NVD 40 #define NVME_USE_NVD 0 41 #endif 42 43 int nvme_use_nvd = NVME_USE_NVD; 44 bool nvme_verbose_cmd_dump = false; 45 46 SYSCTL_NODE(_hw, OID_AUTO, nvme, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 47 "NVMe sysctl tunables"); 48 SYSCTL_INT(_hw_nvme, OID_AUTO, use_nvd, CTLFLAG_RDTUN, 49 &nvme_use_nvd, 1, "1 = Create NVD devices, 0 = Create NDA devices"); 50 SYSCTL_BOOL(_hw_nvme, OID_AUTO, verbose_cmd_dump, CTLFLAG_RWTUN, 51 &nvme_verbose_cmd_dump, 0, 52 "enable verbose command printing when a command fails"); 53 54 static void 55 nvme_dump_queue(struct nvme_qpair *qpair) 56 { 57 struct nvme_completion *cpl; 58 struct nvme_command *cmd; 59 int i; 60 61 printf("id:%04Xh phase:%d\n", qpair->id, qpair->phase); 62 63 printf("Completion queue:\n"); 64 for (i = 0; i < qpair->num_entries; i++) { 65 cpl = &qpair->cpl[i]; 66 printf("%05d: ", i); 67 nvme_qpair_print_completion(qpair, cpl); 68 } 69 70 printf("Submission queue:\n"); 71 for (i = 0; i < qpair->num_entries; i++) { 72 cmd = &qpair->cmd[i]; 73 printf("%05d: ", i); 74 nvme_qpair_print_command(qpair, cmd); 75 } 76 } 77 78 static int 79 nvme_sysctl_dump_debug(SYSCTL_HANDLER_ARGS) 80 { 81 struct nvme_qpair *qpair = arg1; 82 uint32_t val = 0; 83 84 int error = sysctl_handle_int(oidp, &val, 0, req); 85 86 if (error) 87 return (error); 88 89 if (val != 0) 90 nvme_dump_queue(qpair); 91 92 return (0); 93 } 94 95 static int 96 nvme_sysctl_int_coal_time(SYSCTL_HANDLER_ARGS) 97 { 98 struct nvme_controller *ctrlr = arg1; 99 uint32_t oldval = ctrlr->int_coal_time; 100 int error = sysctl_handle_int(oidp, &ctrlr->int_coal_time, 0, 101 req); 102 103 if (error) 104 return (error); 105 106 if (oldval != ctrlr->int_coal_time) 107 nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, 108 ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL, 109 NULL); 110 111 return (0); 112 } 113 114 static int 115 nvme_sysctl_int_coal_threshold(SYSCTL_HANDLER_ARGS) 116 { 117 struct nvme_controller *ctrlr = arg1; 118 uint32_t oldval = ctrlr->int_coal_threshold; 119 int error = sysctl_handle_int(oidp, &ctrlr->int_coal_threshold, 0, 120 req); 121 122 if (error) 123 return (error); 124 125 if (oldval != ctrlr->int_coal_threshold) 126 nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, 127 ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL, 128 NULL); 129 130 return (0); 131 } 132 133 static int 134 nvme_sysctl_timeout_period(SYSCTL_HANDLER_ARGS) 135 { 136 uint32_t *ptr = arg1; 137 uint32_t newval = *ptr; 138 int error = sysctl_handle_int(oidp, &newval, 0, req); 139 140 if (error || (req->newptr == NULL)) 141 return (error); 142 143 if (newval > NVME_MAX_TIMEOUT_PERIOD || 144 newval < NVME_MIN_TIMEOUT_PERIOD) { 145 return (EINVAL); 146 } else { 147 *ptr = newval; 148 } 149 150 return (0); 151 } 152 153 static void 154 nvme_qpair_reset_stats(struct nvme_qpair *qpair) 155 { 156 157 /* 158 * Reset the values. Due to sanity checks in 159 * nvme_qpair_process_completions, we reset the number of interrupt 160 * calls to 1. 161 */ 162 qpair->num_cmds = 0; 163 qpair->num_intr_handler_calls = 1; 164 qpair->num_retries = 0; 165 qpair->num_failures = 0; 166 qpair->num_ignored = 0; 167 qpair->num_recovery_nolock = 0; 168 } 169 170 static int 171 nvme_sysctl_num_cmds(SYSCTL_HANDLER_ARGS) 172 { 173 struct nvme_controller *ctrlr = arg1; 174 int64_t num_cmds = 0; 175 int i; 176 177 num_cmds = ctrlr->adminq.num_cmds; 178 179 if (ctrlr->ioq != NULL) { 180 for (i = 0; i < ctrlr->num_io_queues; i++) 181 num_cmds += ctrlr->ioq[i].num_cmds; 182 } 183 184 return (sysctl_handle_64(oidp, &num_cmds, 0, req)); 185 } 186 187 static int 188 nvme_sysctl_num_intr_handler_calls(SYSCTL_HANDLER_ARGS) 189 { 190 struct nvme_controller *ctrlr = arg1; 191 int64_t num_intr_handler_calls = 0; 192 int i; 193 194 num_intr_handler_calls = ctrlr->adminq.num_intr_handler_calls; 195 196 if (ctrlr->ioq != NULL) { 197 for (i = 0; i < ctrlr->num_io_queues; i++) 198 num_intr_handler_calls += ctrlr->ioq[i].num_intr_handler_calls; 199 } 200 201 return (sysctl_handle_64(oidp, &num_intr_handler_calls, 0, req)); 202 } 203 204 static int 205 nvme_sysctl_num_retries(SYSCTL_HANDLER_ARGS) 206 { 207 struct nvme_controller *ctrlr = arg1; 208 int64_t num_retries = 0; 209 int i; 210 211 num_retries = ctrlr->adminq.num_retries; 212 213 if (ctrlr->ioq != NULL) { 214 for (i = 0; i < ctrlr->num_io_queues; i++) 215 num_retries += ctrlr->ioq[i].num_retries; 216 } 217 218 return (sysctl_handle_64(oidp, &num_retries, 0, req)); 219 } 220 221 static int 222 nvme_sysctl_num_failures(SYSCTL_HANDLER_ARGS) 223 { 224 struct nvme_controller *ctrlr = arg1; 225 int64_t num_failures = 0; 226 int i; 227 228 num_failures = ctrlr->adminq.num_failures; 229 230 if (ctrlr->ioq != NULL) { 231 for (i = 0; i < ctrlr->num_io_queues; i++) 232 num_failures += ctrlr->ioq[i].num_failures; 233 } 234 235 return (sysctl_handle_64(oidp, &num_failures, 0, req)); 236 } 237 238 static int 239 nvme_sysctl_num_ignored(SYSCTL_HANDLER_ARGS) 240 { 241 struct nvme_controller *ctrlr = arg1; 242 int64_t num_ignored = 0; 243 int i; 244 245 num_ignored = ctrlr->adminq.num_ignored; 246 247 if (ctrlr->ioq != NULL) { 248 for (i = 0; i < ctrlr->num_io_queues; i++) 249 num_ignored += ctrlr->ioq[i].num_ignored; 250 } 251 252 return (sysctl_handle_64(oidp, &num_ignored, 0, req)); 253 } 254 255 static int 256 nvme_sysctl_num_recovery_nolock(SYSCTL_HANDLER_ARGS) 257 { 258 struct nvme_controller *ctrlr = arg1; 259 int64_t num; 260 int i; 261 262 num = ctrlr->adminq.num_recovery_nolock; 263 264 if (ctrlr->ioq != NULL) { 265 for (i = 0; i < ctrlr->num_io_queues; i++) 266 num += ctrlr->ioq[i].num_recovery_nolock; 267 } 268 269 return (sysctl_handle_64(oidp, &num, 0, req)); 270 } 271 272 static int 273 nvme_sysctl_reset_stats(SYSCTL_HANDLER_ARGS) 274 { 275 struct nvme_controller *ctrlr = arg1; 276 uint32_t i, val = 0; 277 278 int error = sysctl_handle_int(oidp, &val, 0, req); 279 280 if (error) 281 return (error); 282 283 if (val != 0) { 284 nvme_qpair_reset_stats(&ctrlr->adminq); 285 286 if (ctrlr->ioq != NULL) { 287 for (i = 0; i < ctrlr->num_io_queues; i++) 288 nvme_qpair_reset_stats(&ctrlr->ioq[i]); 289 } 290 } 291 292 return (0); 293 } 294 295 static void 296 nvme_sysctl_initialize_queue(struct nvme_qpair *qpair, 297 struct sysctl_ctx_list *ctrlr_ctx, struct sysctl_oid *que_tree) 298 { 299 struct sysctl_oid_list *que_list = SYSCTL_CHILDREN(que_tree); 300 301 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_entries", 302 CTLFLAG_RD, &qpair->num_entries, 0, 303 "Number of entries in hardware queue"); 304 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_trackers", 305 CTLFLAG_RD, &qpair->num_trackers, 0, 306 "Number of trackers pre-allocated for this queue pair"); 307 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_head", 308 CTLFLAG_RD, &qpair->sq_head, 0, 309 "Current head of submission queue (as observed by driver)"); 310 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_tail", 311 CTLFLAG_RD, &qpair->sq_tail, 0, 312 "Current tail of submission queue (as observed by driver)"); 313 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "cq_head", 314 CTLFLAG_RD, &qpair->cq_head, 0, 315 "Current head of completion queue (as observed by driver)"); 316 317 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_cmds", 318 CTLFLAG_RD, &qpair->num_cmds, "Number of commands submitted"); 319 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_intr_handler_calls", 320 CTLFLAG_RD, &qpair->num_intr_handler_calls, 321 "Number of times interrupt handler was invoked (will typically be " 322 "less than number of actual interrupts generated due to " 323 "coalescing)"); 324 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_retries", 325 CTLFLAG_RD, &qpair->num_retries, "Number of commands retried"); 326 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_failures", 327 CTLFLAG_RD, &qpair->num_failures, 328 "Number of commands ending in failure after all retries"); 329 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_ignored", 330 CTLFLAG_RD, &qpair->num_ignored, 331 "Number of interrupts posted, but were administratively ignored"); 332 SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_recovery_nolock", 333 CTLFLAG_RD, &qpair->num_recovery_nolock, 334 "Number of times that we failed to lock recovery in the ISR"); 335 336 SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "recovery", 337 CTLFLAG_RW, &qpair->recovery_state, 0, 338 "Current recovery state of the queue"); 339 340 SYSCTL_ADD_PROC(ctrlr_ctx, que_list, OID_AUTO, 341 "dump_debug", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, 342 qpair, 0, nvme_sysctl_dump_debug, "IU", "Dump debug data"); 343 } 344 345 void 346 nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr) 347 { 348 struct sysctl_ctx_list *ctrlr_ctx; 349 struct sysctl_oid *ctrlr_tree, *que_tree, *ioq_tree; 350 struct sysctl_oid_list *ctrlr_list, *ioq_list; 351 #define QUEUE_NAME_LENGTH 16 352 char queue_name[QUEUE_NAME_LENGTH]; 353 int i; 354 355 ctrlr_ctx = device_get_sysctl_ctx(ctrlr->dev); 356 ctrlr_tree = device_get_sysctl_tree(ctrlr->dev); 357 ctrlr_list = SYSCTL_CHILDREN(ctrlr_tree); 358 359 SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "num_io_queues", 360 CTLFLAG_RD, &ctrlr->num_io_queues, 0, 361 "Number of I/O queue pairs"); 362 363 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, 364 "int_coal_time", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, 365 ctrlr, 0, nvme_sysctl_int_coal_time, "IU", 366 "Interrupt coalescing timeout (in microseconds)"); 367 368 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, 369 "int_coal_threshold", 370 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, ctrlr, 0, 371 nvme_sysctl_int_coal_threshold, "IU", 372 "Interrupt coalescing threshold"); 373 374 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, 375 "admin_timeout_period", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, 376 &ctrlr->admin_timeout_period, 0, nvme_sysctl_timeout_period, "IU", 377 "Timeout period for Admin queue (in seconds)"); 378 379 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, 380 "timeout_period", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, 381 &ctrlr->timeout_period, 0, nvme_sysctl_timeout_period, "IU", 382 "Timeout period for I/O queues (in seconds)"); 383 384 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, 385 "num_cmds", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, 386 ctrlr, 0, nvme_sysctl_num_cmds, "IU", 387 "Number of commands submitted"); 388 389 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, 390 "num_intr_handler_calls", 391 CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, ctrlr, 0, 392 nvme_sysctl_num_intr_handler_calls, "IU", 393 "Number of times interrupt handler was invoked (will " 394 "typically be less than number of actual interrupts " 395 "generated due to coalescing)"); 396 397 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, 398 "num_retries", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, 399 ctrlr, 0, nvme_sysctl_num_retries, "IU", 400 "Number of commands retried"); 401 402 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, 403 "num_failures", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, 404 ctrlr, 0, nvme_sysctl_num_failures, "IU", 405 "Number of commands ending in failure after all retries"); 406 407 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, 408 "num_ignored", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, 409 ctrlr, 0, nvme_sysctl_num_ignored, "IU", 410 "Number of interrupts ignored administratively"); 411 412 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, 413 "num_recovery_nolock", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, 414 ctrlr, 0, nvme_sysctl_num_recovery_nolock, "IU", 415 "Number of times that we failed to lock recovery in the ISR"); 416 417 SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, 418 "reset_stats", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, ctrlr, 419 0, nvme_sysctl_reset_stats, "IU", "Reset statistics to zero"); 420 421 SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "cap_lo", 422 CTLFLAG_RD, &ctrlr->cap_lo, 0, 423 "Low 32-bits of capacities for the drive"); 424 425 SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "cap_hi", 426 CTLFLAG_RD, &ctrlr->cap_hi, 0, 427 "Hi 32-bits of capacities for the drive"); 428 429 SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "fail_on_reset", 430 CTLFLAG_RD, &ctrlr->fail_on_reset, 0, 431 "Pretend the next reset fails and fail the controller"); 432 433 que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO, "adminq", 434 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Admin Queue"); 435 436 nvme_sysctl_initialize_queue(&ctrlr->adminq, ctrlr_ctx, que_tree); 437 438 /* 439 * Make sure that we've constructed the I/O queues before setting up the 440 * sysctls. Failed controllers won't allocate it, but we want the rest 441 * of the sysctls to diagnose things. 442 */ 443 if (ctrlr->ioq != NULL) { 444 ioq_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO, 445 "ioq", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "I/O Queues"); 446 ioq_list = SYSCTL_CHILDREN(ioq_tree); 447 448 for (i = 0; i < ctrlr->num_io_queues; i++) { 449 snprintf(queue_name, QUEUE_NAME_LENGTH, "%d", i); 450 que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ioq_list, OID_AUTO, 451 queue_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "IO Queue"); 452 nvme_sysctl_initialize_queue(&ctrlr->ioq[i], ctrlr_ctx, 453 que_tree); 454 } 455 } 456 457 SYSCTL_ADD_COUNTER_U64(ctrlr_ctx, ctrlr_list, OID_AUTO, "alignment_splits", 458 CTLFLAG_RD, &ctrlr->alignment_splits, 459 "Number of times we split the I/O alignment for drives with preferred alignment"); 460 } 461