1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2024 Oxide Computer Company 14 */ 15 16 /* 17 * This implements the general locking routines. See the big theory section 18 * 'ioctls, Errors, and Exclusive Access' for more information. 19 */ 20 21 #include <sys/stddef.h> 22 #include <sys/nvme.h> 23 24 #include "nvme_reg.h" 25 #include "nvme_var.h" 26 27 /* 28 * Do we have a writer or someone pending. Note, some cases require checking 29 * both of these and others do not. Please see each individual check for the 30 * nuance here. As a general rule of thumb, when locking, the pending writers 31 * are important. However, when passing the lock on to the next owner (the 32 * handoff functions below), one doesn't check it. 33 */ 34 static boolean_t 35 nvme_rwlock_wr_or_pend(nvme_lock_t *lock) 36 { 37 return (lock->nl_writer != NULL || 38 list_is_empty(&lock->nl_pend_writers) == 0); 39 } 40 41 /* 42 * Taking a namespace read lock requires that there is no writer (or pending) on 43 * the controller and the namespace. 44 */ 45 static boolean_t 46 nvme_rwlock_block_ns_rdlock(nvme_t *nvme, nvme_namespace_t *ns) 47 { 48 return (nvme_rwlock_wr_or_pend(&nvme->n_lock) || 49 nvme_rwlock_wr_or_pend(&ns->ns_lock)); 50 } 51 52 /* 53 * The following entities all block a namespace write lock from being taken: 54 * 55 * 1) Any active or pending writer on the controller lock. They block and starve 56 * namespace writers respectively. 57 * 2) Any active or pending writers on the namespace lock. We must wait in line. 58 * 3) Any active readers on the namespace lock. We ignore pending namespace 59 * readers as by definition that implies some other situation will cause 60 * this. 61 */ 62 static boolean_t 63 nvme_rwlock_block_ns_wrlock(nvme_t *nvme, nvme_namespace_t *ns) 64 { 65 return (nvme_rwlock_wr_or_pend(&nvme->n_lock) || 66 nvme_rwlock_wr_or_pend(&ns->ns_lock) || 67 list_is_empty(&ns->ns_lock.nl_readers) == 0); 68 } 69 70 71 /* 72 * The only thing that blocks acquisition of a controller read lock is if 73 * there are outstanding or pending writers on the controller lock. We can 74 * ignore the state of all namespaces here. 75 */ 76 static boolean_t 77 nvme_rwlock_block_ctrl_rdlock(nvme_t *nvme) 78 { 79 return (nvme_rwlock_wr_or_pend(&nvme->n_lock)); 80 } 81 82 /* 83 * Taking the controller write lock is the most challenging of all, but also 84 * takes priority. The following all block a controller write lock from being 85 * taken: 86 * 87 * 1) Any controller write lock or pending write 88 * 2) Any controller read lock. We skip pending reads because if they exist, 89 * some other situation causes that that will trip us. 90 * 3) Any namespace having a write lock. We ignore pending writes because by 91 * definition there is some condition that causes that to be the case. 92 * 4) Any read lock on a namespace. We ignore pending reads like in the 93 * controller case. 94 */ 95 static boolean_t 96 nvme_rwlock_block_ctrl_wrlock(nvme_t *nvme) 97 { 98 if (nvme_rwlock_wr_or_pend(&nvme->n_lock) || 99 list_is_empty(&nvme->n_lock.nl_readers) == 0) { 100 return (B_TRUE); 101 } 102 103 for (uint32_t i = 1; i <= nvme->n_namespace_count; i++) { 104 nvme_namespace_t *ns = nvme_nsid2ns(nvme, i); 105 if (ns->ns_lock.nl_writer != NULL || 106 list_is_empty(&ns->ns_lock.nl_readers) == 0) { 107 return (B_TRUE); 108 } 109 } 110 111 return (B_FALSE); 112 } 113 114 /* 115 * Answer can we hand off the world to a pending controller write lock. This has 116 * similar rules to the above; however, we critically _ignore_ pending 117 * controller write lock holds, as the assumption is that they are here, so the 118 * only consideration from above are controller reader locks and namespace 119 * locks. 120 */ 121 static boolean_t 122 nvme_rwlock_handoff_ctrl_wrlock(nvme_t *nvme) 123 { 124 /* See nvme_rwlock_wakeup() for on why this can be done. */ 125 ASSERT3P(nvme->n_lock.nl_writer, ==, NULL); 126 127 if (list_is_empty(&nvme->n_lock.nl_readers) == 0) { 128 return (B_FALSE); 129 } 130 131 for (uint32_t i = 1; i <= nvme->n_namespace_count; i++) { 132 nvme_namespace_t *ns = nvme_nsid2ns(nvme, i); 133 if (ns->ns_lock.nl_writer != NULL || 134 list_is_empty(&ns->ns_lock.nl_readers) == 0) { 135 return (B_FALSE); 136 } 137 } 138 139 return (B_TRUE); 140 } 141 142 /* 143 * Namespace handoff variant. It skips pending writers on the namespace lock, 144 * but fully considers them on the controller due to their priority. Otherwise 145 * this follows the same rules as the normal blocking check. 146 */ 147 static boolean_t 148 nvme_rwlock_handoff_ns_wrlock(nvme_t *nvme, nvme_namespace_t *ns) 149 { 150 if (nvme_rwlock_wr_or_pend(&nvme->n_lock) || 151 list_is_empty(&nvme->n_lock.nl_readers) == 0) { 152 return (B_FALSE); 153 } 154 155 if (ns->ns_lock.nl_writer != NULL || 156 list_is_empty(&ns->ns_lock.nl_readers) == 0) { 157 return (B_FALSE); 158 } 159 160 return (B_TRUE); 161 } 162 163 static void 164 nvme_rwlock_rdlock(nvme_minor_lock_info_t *info, nvme_lock_t *lock) 165 { 166 ASSERT3U(list_is_empty(&lock->nl_pend_writers), !=, 0); 167 ASSERT3P(lock->nl_writer, ==, NULL); 168 ASSERT3U(info->nli_state, ==, NVME_LOCK_STATE_UNLOCKED); 169 ASSERT3U(list_link_active(&info->nli_node), ==, 0); 170 ASSERT3P(info->nli_minor, !=, NULL); 171 ASSERT3P(info->nli_nvme, !=, NULL); 172 ASSERT3U(info->nli_curlevel, ==, NVME_LOCK_L_READ); 173 174 info->nli_state = NVME_LOCK_STATE_ACQUIRED; 175 info->nli_last_change = gethrtime(); 176 info->nli_acq_kthread = (uintptr_t)curthread; 177 info->nli_acq_pid = (uint32_t)curproc->p_pid; 178 179 list_insert_tail(&lock->nl_readers, info); 180 lock->nl_nread_locks++; 181 } 182 183 static void 184 nvme_rwlock_wrlock(nvme_minor_lock_info_t *info, nvme_lock_t *lock) 185 { 186 ASSERT3P(lock->nl_writer, ==, NULL); 187 ASSERT3U(info->nli_state, ==, NVME_LOCK_STATE_UNLOCKED); 188 ASSERT3U(list_link_active(&info->nli_node), ==, 0); 189 ASSERT3P(info->nli_minor, !=, NULL); 190 ASSERT3P(info->nli_nvme, !=, NULL); 191 192 info->nli_state = NVME_LOCK_STATE_ACQUIRED; 193 info->nli_curlevel = NVME_LOCK_L_WRITE; 194 info->nli_last_change = gethrtime(); 195 info->nli_acq_kthread = (uintptr_t)curthread; 196 info->nli_acq_pid = (uint32_t)curproc->p_pid; 197 198 lock->nl_writer = info; 199 lock->nl_nwrite_locks++; 200 } 201 202 #ifdef DEBUG 203 /* 204 * This is just a sanity check for our lock logic. 205 */ 206 static boolean_t 207 nvme_rwlock_is_reader(nvme_lock_t *lock, const nvme_minor_lock_info_t *info) 208 { 209 for (nvme_minor_lock_info_t *i = list_head(&lock->nl_readers); 210 i != NULL; i = list_next(&lock->nl_readers, i)) { 211 if (i == info) { 212 return (B_TRUE); 213 } 214 } 215 return (B_FALSE); 216 } 217 #endif 218 219 static void 220 nvme_rwlock_signal_one(nvme_minor_lock_info_t *info, nvme_ioctl_errno_t err) 221 { 222 ASSERT3P(info->nli_ioc, !=, NULL); 223 ASSERT3P(info->nli_minor, !=, NULL); 224 ASSERT3P(info->nli_state, !=, NVME_LOCK_STATE_BLOCKED); 225 226 if (err == NVME_IOCTL_E_OK) { 227 nvme_ioctl_success(info->nli_ioc); 228 } else { 229 (void) nvme_ioctl_error(info->nli_ioc, err, 0, 0); 230 } 231 232 cv_signal(&info->nli_minor->nm_cv); 233 } 234 235 static void 236 nvme_rwlock_wakeup_readers(nvme_lock_t *lock) 237 { 238 nvme_minor_lock_info_t *info; 239 240 if (list_is_empty(&lock->nl_pend_readers) != 0) { 241 return; 242 } 243 244 ASSERT3U(list_is_empty(&lock->nl_readers), !=, 0); 245 ASSERT3P(lock->nl_writer, ==, NULL); 246 ASSERT3U(list_is_empty(&lock->nl_pend_writers), !=, 0); 247 while ((info = list_remove_head(&lock->nl_pend_readers)) != NULL) { 248 info->nli_state = NVME_LOCK_STATE_UNLOCKED; 249 nvme_rwlock_rdlock(info, lock); 250 nvme_rwlock_signal_one(info, NVME_IOCTL_E_OK); 251 } 252 } 253 254 /* 255 * An unlock occurred somewhere. We need to evaluate the total state of the 256 * world. An unlock of a namespace can allow a controller lock to proceed. On 257 * the other hand, dropping the controller write lock allows every namespace to 258 * proceed. While we know the context of where the unlock occurred, it's simpler 259 * right now to just allow everything to continue. This is somewhat expensive, 260 * but this can be sped up with more cached information when it's justified. We 261 * process things in the following order: 262 * 263 * 1) Evaluate if someone can now take a controller write lock. If so, wake up 264 * the head of the list and then all subsequent processing is done. 265 * 2) Evaluate if there are pending readers for the controller. If so, wake up 266 * each and every waiter. Always continue to namespaces in this case. 267 * 268 * For each namespace: 269 * 270 * 1) Evaluate if there are pending writers and they can take the write lock. If 271 * so, wake up the head of the list. If so, continue to the next namespace. 272 * 2) Otherwise, if there are pending readers. If so, wake up each and every 273 * reader. Continue onto the next namespace. 274 */ 275 static void 276 nvme_rwlock_wakeup(nvme_t *nvme) 277 { 278 nvme_lock_t *ctrl_lock = &nvme->n_lock; 279 280 /* 281 * This assertion may seem weird, but it's actually a bit of an 282 * invariant. When the controller's write lock is taken, by definition 283 * there are no other locks that can be taken. Therefore if we were 284 * somehow unable to unlock a lock on this controller, then we'd be 285 * violating our rules. 286 */ 287 VERIFY3P(ctrl_lock->nl_writer, ==, NULL); 288 289 /* 290 * If there are pending writers, either one of them will be woken up or 291 * no one will. Writers trump readers, but it's possible that we may not 292 * be able to wake up a waiting writer yet. If we take this arm, we 293 * should not process anything else. The same logic applies in the 294 * namespace case as well. 295 */ 296 if (list_is_empty(&ctrl_lock->nl_pend_writers) == 0) { 297 nvme_minor_lock_info_t *info; 298 299 if (!nvme_rwlock_handoff_ctrl_wrlock(nvme)) 300 return; 301 302 /* 303 * We opt to indicate that this is unlocked ahead of 304 * taking the lock for state tracking purposes. 305 */ 306 info = list_remove_head(&ctrl_lock->nl_pend_writers); 307 info->nli_state = NVME_LOCK_STATE_UNLOCKED; 308 nvme_rwlock_wrlock(info, ctrl_lock); 309 nvme_rwlock_signal_one(info, NVME_IOCTL_E_OK); 310 return; 311 } 312 313 nvme_rwlock_wakeup_readers(ctrl_lock); 314 for (uint32_t i = 1; i <= nvme->n_namespace_count; i++) { 315 nvme_namespace_t *ns = nvme_nsid2ns(nvme, i); 316 nvme_lock_t *ns_lock = &ns->ns_lock; 317 318 if (list_is_empty(&ns_lock->nl_pend_writers) == 0) { 319 nvme_minor_lock_info_t *info; 320 321 if (!nvme_rwlock_handoff_ns_wrlock(nvme, ns)) 322 continue; 323 324 info = list_remove_head(&ns_lock->nl_pend_writers); 325 info->nli_state = NVME_LOCK_STATE_UNLOCKED; 326 nvme_rwlock_wrlock(info, ns_lock); 327 nvme_rwlock_signal_one(info, NVME_IOCTL_E_OK); 328 } else { 329 nvme_rwlock_wakeup_readers(ns_lock); 330 } 331 } 332 } 333 334 /* 335 * This cleans up all the state in the minor for returning without a lock held. 336 */ 337 static void 338 nvme_rwunlock_cleanup_minor(nvme_minor_lock_info_t *info) 339 { 340 info->nli_lock = NULL; 341 info->nli_state = NVME_LOCK_STATE_UNLOCKED; 342 info->nli_curlevel = 0; 343 info->nli_ns = NULL; 344 } 345 346 /* 347 * We've been asked to unlock a lock. Not only must we remove our hold from this 348 * lock, we must go through and wake up the next waiter. The waiters that we 349 * have to wake up vary depending on our lock. See section 'ioctls, Errors, and 350 * Exclusive Access' in the theory statement for more information. 351 */ 352 353 void 354 nvme_rwunlock(nvme_minor_lock_info_t *info, nvme_lock_t *lock) 355 { 356 nvme_t *const nvme = info->nli_nvme; 357 boolean_t is_read; 358 359 VERIFY(MUTEX_HELD(&nvme->n_minor_mutex)); 360 VERIFY3P(info->nli_lock, ==, lock); 361 VERIFY(info->nli_curlevel == NVME_LOCK_L_READ || 362 info->nli_curlevel == NVME_LOCK_L_WRITE); 363 is_read = info->nli_curlevel == NVME_LOCK_L_READ; 364 365 /* 366 * First we need to remove this minor from the lock and clean up all of 367 * the state this lock in the info structure. 368 */ 369 info->nli_last_change = gethrtime(); 370 if (is_read) { 371 VERIFY3U(list_link_active(&info->nli_node), !=, 0); 372 ASSERT3U(nvme_rwlock_is_reader(lock, info), ==, B_TRUE); 373 list_remove(&lock->nl_readers, info); 374 } else { 375 VERIFY3U(list_link_active(&info->nli_node), ==, 0); 376 VERIFY3P(lock->nl_writer, ==, info); 377 lock->nl_writer = NULL; 378 } 379 380 nvme_rwunlock_cleanup_minor(info); 381 nvme_rwlock_wakeup(nvme); 382 } 383 384 /* 385 * We were just interrupted due to a signal. However, just because our block was 386 * interrupted due to a signal doesn't mean that other activity didn't occur. In 387 * particular, the signal wake up could race with a subsequent wake up that was 388 * due to the device being removed or actually acquiring the lock. Depending on 389 * which state we were in, we need to perform the appropriate clean up. In all 390 * cases, the signal trumps all, which may mean actually unlocking! 391 */ 392 static void 393 nvme_rwlock_signal(nvme_minor_lock_info_t *info, nvme_lock_t *lock, 394 boolean_t is_read) 395 { 396 ASSERT3P(info->nli_ioc, !=, NULL); 397 398 /* 399 * We're changing the state here, so update the minor's last change 400 * time. 401 */ 402 info->nli_last_change = gethrtime(); 403 lock->nl_nsignals++; 404 405 /* 406 * This is the simplest case. We've already been removed from the lock 407 * that we're on. All we need to do is change the error to indicate that 408 * we received a signal. 409 */ 410 if (info->nli_state == NVME_LOCK_STATE_UNLOCKED) { 411 ASSERT3P(info->nli_lock, ==, NULL); 412 (void) nvme_ioctl_error(info->nli_ioc, 413 NVME_IOCTL_E_LOCK_WAIT_SIGNAL, 0, 0); 414 lock->nl_nsig_unlock++; 415 return; 416 } 417 418 /* 419 * For all others, the lock should be set here. 420 */ 421 ASSERT3P(info->nli_lock, ==, lock); 422 423 /* 424 * For someone that was blocked, we need to remove them from the pending 425 * lists. 426 */ 427 if (info->nli_state == NVME_LOCK_STATE_BLOCKED) { 428 ASSERT3S(list_link_active(&info->nli_node), !=, 0); 429 if (is_read) { 430 list_remove(&lock->nl_pend_readers, info); 431 } else { 432 list_remove(&lock->nl_pend_writers, info); 433 } 434 435 nvme_rwunlock_cleanup_minor(info); 436 (void) nvme_ioctl_error(info->nli_ioc, 437 NVME_IOCTL_E_LOCK_WAIT_SIGNAL, 0, 0); 438 lock->nl_nsig_blocks++; 439 return; 440 } 441 442 /* 443 * Now, the most nuanced thing that we need to do. We need to unlock 444 * this node. We synthesize an unlock request and submit that. 445 */ 446 lock->nl_nsig_acq++; 447 nvme_rwunlock(info, lock); 448 } 449 450 /* 451 * Here we need to implement our read-write lock policy. Refer to the big theory 452 * statement for more information. Here's a summary of the priority that's 453 * relevant here: 454 * 455 * 1) Waiting writers starve waiting readers 456 * 2) Waiting writers for the controller starve all namespace writers and 457 * readers 458 * 3) A read lock can be taken if there are no pending or active writers on the 459 * lock (and the controller lock for a namespace). 460 */ 461 void 462 nvme_rwlock(nvme_minor_t *minor, nvme_ioctl_lock_t *req) 463 { 464 nvme_t *const nvme = minor->nm_ctrl; 465 const boolean_t is_nonblock = (req->nil_flags & 466 NVME_LOCK_F_DONT_BLOCK) != 0; 467 const boolean_t is_read = req->nil_level == NVME_LOCK_L_READ; 468 const boolean_t is_ctrl = req->nil_ent == NVME_LOCK_E_CTRL; 469 nvme_minor_lock_info_t *info; 470 nvme_lock_t *lock; 471 boolean_t waiters; 472 hrtime_t sleep_time; 473 474 VERIFY(MUTEX_HELD(&nvme->n_minor_mutex)); 475 476 if (is_ctrl) { 477 info = &minor->nm_ctrl_lock; 478 lock = &nvme->n_lock; 479 480 if (is_read) { 481 waiters = nvme_rwlock_block_ctrl_rdlock(nvme); 482 } else { 483 waiters = nvme_rwlock_block_ctrl_wrlock(nvme); 484 } 485 } else { 486 nvme_namespace_t *ns; 487 const uint32_t nsid = req->nil_common.nioc_nsid; 488 info = &minor->nm_ns_lock; 489 490 VERIFY3U(req->nil_ent, ==, NVME_LOCK_E_NS); 491 ns = nvme_nsid2ns(nvme, nsid); 492 minor->nm_ns_lock.nli_ns = ns; 493 lock = &ns->ns_lock; 494 495 if (is_read) { 496 waiters = nvme_rwlock_block_ns_rdlock(nvme, ns); 497 } else { 498 waiters = nvme_rwlock_block_ns_wrlock(nvme, ns); 499 } 500 } 501 502 /* 503 * Set the information that indicates what kind of lock we're attempting 504 * to acquire and that we're operating on. 505 */ 506 info->nli_curlevel = is_read ? NVME_LOCK_L_READ : NVME_LOCK_L_WRITE; 507 info->nli_lock = lock; 508 509 510 /* 511 * We think we can get the lock, hurrah. 512 */ 513 if (!waiters) { 514 if (is_read) { 515 nvme_rwlock_rdlock(info, lock); 516 } else { 517 nvme_rwlock_wrlock(info, lock); 518 } 519 (void) nvme_ioctl_success(&req->nil_common); 520 return; 521 } 522 523 /* 524 * We failed to get the lock. At this point we will set ourselves up to 525 * block. Once we go to sleep on the CV, our assumption is that anyone 526 * who has woken us up will have filled in the information the status of 527 * this operation and therefore after this point, all we have to do is 528 * return. 529 */ 530 if (is_nonblock) { 531 nvme_rwunlock_cleanup_minor(info); 532 lock->nl_nnonblock++; 533 (void) nvme_ioctl_error(&req->nil_common, 534 NVME_IOCTL_E_LOCK_WOULD_BLOCK, 0, 0); 535 return; 536 } 537 538 ASSERT3P(info->nli_ioc, ==, NULL); 539 info->nli_ioc = &req->nil_common; 540 if (is_read) { 541 list_insert_tail(&lock->nl_pend_readers, info); 542 lock->nl_npend_reads++; 543 } else { 544 list_insert_tail(&lock->nl_pend_writers, info); 545 lock->nl_npend_writes++; 546 } 547 548 ASSERT3U(info->nli_state, ==, NVME_LOCK_STATE_UNLOCKED); 549 info->nli_state = NVME_LOCK_STATE_BLOCKED; 550 sleep_time = gethrtime(); 551 info->nli_last_change = sleep_time; 552 while (info->nli_state == NVME_LOCK_STATE_BLOCKED) { 553 /* 554 * Block until we receive a signal. Note, a signal trumps all 555 * other processing. We may be woken up here because we acquired 556 * a lock, we may also end up woken up here if the controller is 557 * marked as dead. 558 */ 559 if (cv_wait_sig(&minor->nm_cv, &nvme->n_minor_mutex) == 0) { 560 nvme_rwlock_signal(info, lock, is_read); 561 break; 562 } 563 } 564 565 /* 566 * Before we return, clean up and sanity check our state. 567 */ 568 info->nli_ioc = NULL; 569 #ifdef DEBUG 570 ASSERT3S(info->nli_last_change, !=, sleep_time); 571 if (info->nli_state == NVME_LOCK_STATE_UNLOCKED) { 572 ASSERT3S(list_link_active(&info->nli_node), ==, 0); 573 ASSERT3P(info->nli_ns, ==, NULL); 574 ASSERT3U(req->nil_common.nioc_drv_err, !=, NVME_IOCTL_E_OK); 575 } else { 576 ASSERT3U(info->nli_state, ==, NVME_LOCK_STATE_ACQUIRED); 577 ASSERT3U(req->nil_common.nioc_drv_err, ==, NVME_IOCTL_E_OK); 578 if (is_read) { 579 ASSERT3S(list_link_active(&info->nli_node), !=, 0); 580 } else { 581 ASSERT3P(lock->nl_writer, ==, info); 582 } 583 } 584 ASSERT3P(info->nli_minor, ==, minor); 585 ASSERT3P(info->nli_nvme, ==, minor->nm_ctrl); 586 #endif 587 } 588 589 /* 590 * This is used to clean up a single minor that was blocking trying to get a 591 * lock prior to a controller going dead. In particular, the key here is we need 592 * to change its state to unlocked by cleaning it up and then signal it to wake 593 * up and process things. The clean up also helps deal with the case of a racing 594 * signal, though it does leave the state a little awkward in this intermediate 595 * moment; however, since it's been removed from a list that's really the proper 596 * action and no one can issue new lock ioctls at this point. 597 */ 598 static void 599 nvme_rwlock_ctrl_dead_cleanup_one(nvme_t *nvme, nvme_minor_lock_info_t *info) 600 { 601 ASSERT3U(info->nli_state, ==, NVME_LOCK_STATE_BLOCKED); 602 ASSERT3P(info->nli_ioc, !=, NULL); 603 604 /* 605 * Update the last time this has changed for our snaity checks. 606 */ 607 info->nli_last_change = gethrtime(); 608 nvme_rwunlock_cleanup_minor(info); 609 nvme_rwlock_signal_one(info, nvme->n_dead_status); 610 } 611 612 /* 613 * We've just been informed that this controller has set n_dead. This is most 614 * unfortunate for anyone trying to actively use it right now and we must notify 615 * them. Anyone who has successfully obtained a lock gets to keep it until they 616 * drop it (hopefully soon). Anyone who is asleep should be kicked out being 617 * told they are not getting it. 618 * 619 * The moment we grab n_minor_mutex, no other state here can change. So we can 620 * go ahead and wake up all waiters with impunity. This is being called from the 621 * nvme_dead_taskq. 622 */ 623 void 624 nvme_rwlock_ctrl_dead(void *arg) 625 { 626 nvme_t *nvme = arg; 627 nvme_lock_t *ctrl_lock = &nvme->n_lock; 628 nvme_minor_lock_info_t *info; 629 630 mutex_enter(&nvme->n_minor_mutex); 631 for (uint32_t i = 1; i <= nvme->n_namespace_count; i++) { 632 nvme_namespace_t *ns = nvme_nsid2ns(nvme, i); 633 nvme_lock_t *ns_lock = &ns->ns_lock; 634 635 while ((info = list_remove_head(&ns_lock->nl_pend_readers)) != 636 NULL) { 637 nvme_rwlock_ctrl_dead_cleanup_one(nvme, info); 638 } 639 640 while ((info = list_remove_head(&ns_lock->nl_pend_writers)) != 641 NULL) { 642 nvme_rwlock_ctrl_dead_cleanup_one(nvme, info); 643 } 644 } 645 646 while ((info = list_remove_head(&ctrl_lock->nl_pend_readers)) != NULL) { 647 nvme_rwlock_ctrl_dead_cleanup_one(nvme, info); 648 } 649 650 while ((info = list_remove_head(&ctrl_lock->nl_pend_writers)) != NULL) { 651 652 nvme_rwlock_ctrl_dead_cleanup_one(nvme, info); 653 } 654 mutex_exit(&nvme->n_minor_mutex); 655 } 656 657 void 658 nvme_lock_fini(nvme_lock_t *lock) 659 { 660 VERIFY3P(lock->nl_writer, ==, NULL); 661 list_destroy(&lock->nl_pend_writers); 662 list_destroy(&lock->nl_pend_readers); 663 list_destroy(&lock->nl_readers); 664 } 665 666 void 667 nvme_lock_init(nvme_lock_t *lock) 668 { 669 list_create(&lock->nl_readers, sizeof (nvme_minor_lock_info_t), 670 offsetof(nvme_minor_lock_info_t, nli_node)); 671 list_create(&lock->nl_pend_readers, sizeof (nvme_minor_lock_info_t), 672 offsetof(nvme_minor_lock_info_t, nli_node)); 673 list_create(&lock->nl_pend_writers, sizeof (nvme_minor_lock_info_t), 674 offsetof(nvme_minor_lock_info_t, nli_node)); 675 } 676