1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2024 Oxide Computer Company 14 */ 15 16 /* 17 * This implements the general locking routines. See the big theory section 18 * 'ioctls, Errors, and Exclusive Access' for more information. 19 */ 20 21 #include <sys/stddef.h> 22 #include <sys/nvme.h> 23 24 #include "nvme_reg.h" 25 #include "nvme_var.h" 26 27 /* 28 * Do we have a writer or someone pending. Note, some cases require checking 29 * both of these and others do not. Please see each individual check for the 30 * nuance here. As a general rule of thumb, when locking, the pending writers 31 * are important. However, when passing the lock on to the next owner (the 32 * handoff functions below), one doesn't check it. 33 */ 34 static boolean_t 35 nvme_rwlock_wr_or_pend(nvme_lock_t *lock) 36 { 37 return (lock->nl_writer != NULL || 38 list_is_empty(&lock->nl_pend_writers) == 0); 39 } 40 41 /* 42 * Taking a namespace read lock requires that there is no writer (or pending) on 43 * the controller and the namespace. 44 */ 45 static boolean_t 46 nvme_rwlock_block_ns_rdlock(nvme_t *nvme, nvme_namespace_t *ns) 47 { 48 return (nvme_rwlock_wr_or_pend(&nvme->n_lock) || 49 nvme_rwlock_wr_or_pend(&ns->ns_lock)); 50 } 51 52 /* 53 * The following entities all block a namespace write lock from being taken: 54 * 55 * 1) Any active or pending writer on the controller lock. They block and starve 56 * namespace writers respectively. 57 * 2) Any active or pending writers on the namespace lock. We must wait in line. 58 * 3) Any active readers on the namespace lock. We ignore pending namespace 59 * readers as by definition that implies some other situation will cause 60 * this. 61 */ 62 static boolean_t 63 nvme_rwlock_block_ns_wrlock(nvme_t *nvme, nvme_namespace_t *ns) 64 { 65 return (nvme_rwlock_wr_or_pend(&nvme->n_lock) || 66 nvme_rwlock_wr_or_pend(&ns->ns_lock) || 67 list_is_empty(&ns->ns_lock.nl_readers) == 0); 68 } 69 70 /* 71 * The only thing that blocks acquisition of a controller read lock is if 72 * there are outstanding or pending writers on the controller lock. We can 73 * ignore the state of all namespaces here. 74 */ 75 static boolean_t 76 nvme_rwlock_block_ctrl_rdlock(nvme_t *nvme) 77 { 78 return (nvme_rwlock_wr_or_pend(&nvme->n_lock)); 79 } 80 81 /* 82 * Taking the controller write lock is the most challenging of all, but also 83 * takes priority. The following all block a controller write lock from being 84 * taken: 85 * 86 * 1) Any controller write lock or pending write 87 * 2) Any controller read lock. We skip pending reads because if they exist, 88 * some other situation causes that that will trip us. 89 * 3) Any namespace having a write lock. We ignore pending writes because by 90 * definition there is some condition that causes that to be the case. 91 * 4) Any read lock on a namespace. We ignore pending reads like in the 92 * controller case. 93 */ 94 static boolean_t 95 nvme_rwlock_block_ctrl_wrlock(nvme_t *nvme) 96 { 97 if (nvme_rwlock_wr_or_pend(&nvme->n_lock) || 98 list_is_empty(&nvme->n_lock.nl_readers) == 0) { 99 return (B_TRUE); 100 } 101 102 for (uint32_t i = 1; i <= nvme->n_namespace_count; i++) { 103 nvme_namespace_t *ns = nvme_nsid2ns(nvme, i); 104 if (ns->ns_lock.nl_writer != NULL || 105 list_is_empty(&ns->ns_lock.nl_readers) == 0) { 106 return (B_TRUE); 107 } 108 } 109 110 return (B_FALSE); 111 } 112 113 /* 114 * Answer can we hand off the world to a pending controller write lock. This has 115 * similar rules to the above; however, we critically _ignore_ pending 116 * controller write lock holds, as the assumption is that they are here, so the 117 * only consideration from above are controller reader locks and namespace 118 * locks. 119 */ 120 static boolean_t 121 nvme_rwlock_handoff_ctrl_wrlock(nvme_t *nvme) 122 { 123 /* See nvme_rwlock_wakeup() for on why this can be done. */ 124 ASSERT3P(nvme->n_lock.nl_writer, ==, NULL); 125 126 if (list_is_empty(&nvme->n_lock.nl_readers) == 0) { 127 return (B_FALSE); 128 } 129 130 for (uint32_t i = 1; i <= nvme->n_namespace_count; i++) { 131 nvme_namespace_t *ns = nvme_nsid2ns(nvme, i); 132 if (ns->ns_lock.nl_writer != NULL || 133 list_is_empty(&ns->ns_lock.nl_readers) == 0) { 134 return (B_FALSE); 135 } 136 } 137 138 return (B_TRUE); 139 } 140 141 /* 142 * Namespace handoff variant. It skips pending writers on the namespace lock, 143 * but fully considers them on the controller due to their priority. Otherwise 144 * this follows the same rules as the normal blocking check. 145 */ 146 static boolean_t 147 nvme_rwlock_handoff_ns_wrlock(nvme_t *nvme, nvme_namespace_t *ns) 148 { 149 if (nvme_rwlock_wr_or_pend(&nvme->n_lock) || 150 list_is_empty(&nvme->n_lock.nl_readers) == 0) { 151 return (B_FALSE); 152 } 153 154 if (ns->ns_lock.nl_writer != NULL || 155 list_is_empty(&ns->ns_lock.nl_readers) == 0) { 156 return (B_FALSE); 157 } 158 159 return (B_TRUE); 160 } 161 162 static void 163 nvme_rwlock_rdlock(nvme_minor_lock_info_t *info, nvme_lock_t *lock) 164 { 165 ASSERT3U(list_is_empty(&lock->nl_pend_writers), !=, 0); 166 ASSERT3P(lock->nl_writer, ==, NULL); 167 ASSERT3U(info->nli_state, ==, NVME_LOCK_STATE_UNLOCKED); 168 ASSERT3U(list_link_active(&info->nli_node), ==, 0); 169 ASSERT3P(info->nli_minor, !=, NULL); 170 ASSERT3P(info->nli_nvme, !=, NULL); 171 ASSERT3U(info->nli_curlevel, ==, NVME_LOCK_L_READ); 172 173 info->nli_state = NVME_LOCK_STATE_ACQUIRED; 174 info->nli_last_change = gethrtime(); 175 info->nli_acq_kthread = (uintptr_t)curthread; 176 info->nli_acq_pid = (uint32_t)curproc->p_pid; 177 178 list_insert_tail(&lock->nl_readers, info); 179 lock->nl_nread_locks++; 180 } 181 182 static void 183 nvme_rwlock_wrlock(nvme_minor_lock_info_t *info, nvme_lock_t *lock) 184 { 185 ASSERT3P(lock->nl_writer, ==, NULL); 186 ASSERT3U(info->nli_state, ==, NVME_LOCK_STATE_UNLOCKED); 187 ASSERT3U(list_link_active(&info->nli_node), ==, 0); 188 ASSERT3P(info->nli_minor, !=, NULL); 189 ASSERT3P(info->nli_nvme, !=, NULL); 190 191 info->nli_state = NVME_LOCK_STATE_ACQUIRED; 192 info->nli_curlevel = NVME_LOCK_L_WRITE; 193 info->nli_last_change = gethrtime(); 194 info->nli_acq_kthread = (uintptr_t)curthread; 195 info->nli_acq_pid = (uint32_t)curproc->p_pid; 196 197 lock->nl_writer = info; 198 lock->nl_nwrite_locks++; 199 } 200 201 #ifdef DEBUG 202 /* 203 * This is just a sanity check for our lock logic. 204 */ 205 static boolean_t 206 nvme_rwlock_is_reader(nvme_lock_t *lock, const nvme_minor_lock_info_t *info) 207 { 208 for (nvme_minor_lock_info_t *i = list_head(&lock->nl_readers); 209 i != NULL; i = list_next(&lock->nl_readers, i)) { 210 if (i == info) { 211 return (B_TRUE); 212 } 213 } 214 return (B_FALSE); 215 } 216 #endif 217 218 static void 219 nvme_rwlock_signal_one(nvme_minor_lock_info_t *info, nvme_ioctl_errno_t err) 220 { 221 ASSERT3P(info->nli_ioc, !=, NULL); 222 ASSERT3P(info->nli_minor, !=, NULL); 223 ASSERT3P(info->nli_state, !=, NVME_LOCK_STATE_BLOCKED); 224 225 if (err == NVME_IOCTL_E_OK) { 226 nvme_ioctl_success(info->nli_ioc); 227 } else { 228 (void) nvme_ioctl_error(info->nli_ioc, err, 0, 0); 229 } 230 231 cv_signal(&info->nli_minor->nm_cv); 232 } 233 234 static void 235 nvme_rwlock_wakeup_readers(nvme_lock_t *lock) 236 { 237 nvme_minor_lock_info_t *info; 238 239 if (list_is_empty(&lock->nl_pend_readers) != 0) { 240 return; 241 } 242 243 ASSERT3U(list_is_empty(&lock->nl_readers), !=, 0); 244 ASSERT3P(lock->nl_writer, ==, NULL); 245 ASSERT3U(list_is_empty(&lock->nl_pend_writers), !=, 0); 246 while ((info = list_remove_head(&lock->nl_pend_readers)) != NULL) { 247 info->nli_state = NVME_LOCK_STATE_UNLOCKED; 248 nvme_rwlock_rdlock(info, lock); 249 nvme_rwlock_signal_one(info, NVME_IOCTL_E_OK); 250 } 251 } 252 253 /* 254 * An unlock occurred somewhere. We need to evaluate the total state of the 255 * world. An unlock of a namespace can allow a controller lock to proceed. On 256 * the other hand, dropping the controller write lock allows every namespace to 257 * proceed. While we know the context of where the unlock occurred, it's simpler 258 * right now to just allow everything to continue. This is somewhat expensive, 259 * but this can be sped up with more cached information when it's justified. We 260 * process things in the following order: 261 * 262 * 1) Evaluate if someone can now take a controller write lock. If so, wake up 263 * the head of the list and then all subsequent processing is done. 264 * 2) Evaluate if there are pending readers for the controller. If so, wake up 265 * each and every waiter. Always continue to namespaces in this case. 266 * 267 * For each namespace: 268 * 269 * 1) Evaluate if there are pending writers and they can take the write lock. If 270 * so, wake up the head of the list. If so, continue to the next namespace. 271 * 2) Otherwise, if there are pending readers. If so, wake up each and every 272 * reader. Continue onto the next namespace. 273 */ 274 static void 275 nvme_rwlock_wakeup(nvme_t *nvme) 276 { 277 nvme_lock_t *ctrl_lock = &nvme->n_lock; 278 279 /* 280 * This assertion may seem weird, but it's actually a bit of an 281 * invariant. When the controller's write lock is taken, by definition 282 * there are no other locks that can be taken. Therefore if we were 283 * somehow unable to unlock a lock on this controller, then we'd be 284 * violating our rules. 285 */ 286 VERIFY3P(ctrl_lock->nl_writer, ==, NULL); 287 288 /* 289 * If there are pending writers, either one of them will be woken up or 290 * no one will. Writers trump readers, but it's possible that we may not 291 * be able to wake up a waiting writer yet. If we take this arm, we 292 * should not process anything else. The same logic applies in the 293 * namespace case as well. 294 */ 295 if (list_is_empty(&ctrl_lock->nl_pend_writers) == 0) { 296 nvme_minor_lock_info_t *info; 297 298 if (!nvme_rwlock_handoff_ctrl_wrlock(nvme)) 299 return; 300 301 /* 302 * We opt to indicate that this is unlocked ahead of 303 * taking the lock for state tracking purposes. 304 */ 305 info = list_remove_head(&ctrl_lock->nl_pend_writers); 306 info->nli_state = NVME_LOCK_STATE_UNLOCKED; 307 nvme_rwlock_wrlock(info, ctrl_lock); 308 nvme_rwlock_signal_one(info, NVME_IOCTL_E_OK); 309 return; 310 } 311 312 nvme_rwlock_wakeup_readers(ctrl_lock); 313 for (uint32_t i = 1; i <= nvme->n_namespace_count; i++) { 314 nvme_namespace_t *ns = nvme_nsid2ns(nvme, i); 315 nvme_lock_t *ns_lock = &ns->ns_lock; 316 317 if (list_is_empty(&ns_lock->nl_pend_writers) == 0) { 318 nvme_minor_lock_info_t *info; 319 320 if (!nvme_rwlock_handoff_ns_wrlock(nvme, ns)) 321 continue; 322 323 info = list_remove_head(&ns_lock->nl_pend_writers); 324 info->nli_state = NVME_LOCK_STATE_UNLOCKED; 325 nvme_rwlock_wrlock(info, ns_lock); 326 nvme_rwlock_signal_one(info, NVME_IOCTL_E_OK); 327 } else { 328 nvme_rwlock_wakeup_readers(ns_lock); 329 } 330 } 331 } 332 333 /* 334 * This cleans up all the state in the minor for returning without a lock held. 335 */ 336 static void 337 nvme_rwunlock_cleanup_minor(nvme_minor_lock_info_t *info) 338 { 339 info->nli_lock = NULL; 340 info->nli_state = NVME_LOCK_STATE_UNLOCKED; 341 info->nli_curlevel = 0; 342 info->nli_ns = NULL; 343 } 344 345 /* 346 * We've been asked to unlock a lock. Not only must we remove our hold from this 347 * lock, we must go through and wake up the next waiter. The waiters that we 348 * have to wake up vary depending on our lock. See section 'ioctls, Errors, and 349 * Exclusive Access' in the theory statement for more information. 350 */ 351 352 void 353 nvme_rwunlock(nvme_minor_lock_info_t *info, nvme_lock_t *lock) 354 { 355 nvme_t *const nvme = info->nli_nvme; 356 boolean_t is_read; 357 358 VERIFY(MUTEX_HELD(&nvme->n_minor_mutex)); 359 VERIFY3P(info->nli_lock, ==, lock); 360 VERIFY(info->nli_curlevel == NVME_LOCK_L_READ || 361 info->nli_curlevel == NVME_LOCK_L_WRITE); 362 is_read = info->nli_curlevel == NVME_LOCK_L_READ; 363 364 /* 365 * First we need to remove this minor from the lock and clean up all of 366 * the state this lock in the info structure. 367 */ 368 info->nli_last_change = gethrtime(); 369 if (is_read) { 370 VERIFY3U(list_link_active(&info->nli_node), !=, 0); 371 ASSERT3U(nvme_rwlock_is_reader(lock, info), ==, B_TRUE); 372 list_remove(&lock->nl_readers, info); 373 } else { 374 VERIFY3U(list_link_active(&info->nli_node), ==, 0); 375 VERIFY3P(lock->nl_writer, ==, info); 376 lock->nl_writer = NULL; 377 } 378 379 nvme_rwunlock_cleanup_minor(info); 380 nvme_rwlock_wakeup(nvme); 381 } 382 383 /* 384 * We were just interrupted due to a signal. However, just because our block was 385 * interrupted due to a signal doesn't mean that other activity didn't occur. In 386 * particular, the signal wake up could race with a subsequent wake up that was 387 * due to the device being removed or actually acquiring the lock. Depending on 388 * which state we were in, we need to perform the appropriate clean up. In all 389 * cases, the signal trumps all, which may mean actually unlocking! 390 */ 391 static void 392 nvme_rwlock_signal(nvme_minor_lock_info_t *info, nvme_lock_t *lock, 393 boolean_t is_read) 394 { 395 ASSERT3P(info->nli_ioc, !=, NULL); 396 397 /* 398 * We're changing the state here, so update the minor's last change 399 * time. 400 */ 401 info->nli_last_change = gethrtime(); 402 lock->nl_nsignals++; 403 404 /* 405 * This is the simplest case. We've already been removed from the lock 406 * that we're on. All we need to do is change the error to indicate that 407 * we received a signal. 408 */ 409 if (info->nli_state == NVME_LOCK_STATE_UNLOCKED) { 410 ASSERT3P(info->nli_lock, ==, NULL); 411 (void) nvme_ioctl_error(info->nli_ioc, 412 NVME_IOCTL_E_LOCK_WAIT_SIGNAL, 0, 0); 413 lock->nl_nsig_unlock++; 414 return; 415 } 416 417 /* 418 * For all others, the lock should be set here. 419 */ 420 ASSERT3P(info->nli_lock, ==, lock); 421 422 /* 423 * For someone that was blocked, we need to remove them from the pending 424 * lists. 425 */ 426 if (info->nli_state == NVME_LOCK_STATE_BLOCKED) { 427 ASSERT3S(list_link_active(&info->nli_node), !=, 0); 428 if (is_read) { 429 list_remove(&lock->nl_pend_readers, info); 430 } else { 431 list_remove(&lock->nl_pend_writers, info); 432 } 433 434 nvme_rwunlock_cleanup_minor(info); 435 (void) nvme_ioctl_error(info->nli_ioc, 436 NVME_IOCTL_E_LOCK_WAIT_SIGNAL, 0, 0); 437 lock->nl_nsig_blocks++; 438 return; 439 } 440 441 /* 442 * Now, the most nuanced thing that we need to do. We need to unlock 443 * this node. We synthesize an unlock request and submit that. 444 */ 445 lock->nl_nsig_acq++; 446 nvme_rwunlock(info, lock); 447 } 448 449 /* 450 * Here we need to implement our read-write lock policy. Refer to the big theory 451 * statement for more information. Here's a summary of the priority that's 452 * relevant here: 453 * 454 * 1) Waiting writers starve waiting readers 455 * 2) Waiting writers for the controller starve all namespace writers and 456 * readers 457 * 3) A read lock can be taken if there are no pending or active writers on the 458 * lock (and the controller lock for a namespace). 459 */ 460 void 461 nvme_rwlock(nvme_minor_t *minor, nvme_ioctl_lock_t *req) 462 { 463 nvme_t *const nvme = minor->nm_ctrl; 464 const boolean_t is_nonblock = (req->nil_flags & 465 NVME_LOCK_F_DONT_BLOCK) != 0; 466 const boolean_t is_read = req->nil_level == NVME_LOCK_L_READ; 467 const boolean_t is_ctrl = req->nil_ent == NVME_LOCK_E_CTRL; 468 nvme_minor_lock_info_t *info; 469 nvme_lock_t *lock; 470 boolean_t waiters; 471 hrtime_t sleep_time; 472 473 VERIFY(MUTEX_HELD(&nvme->n_minor_mutex)); 474 475 if (is_ctrl) { 476 info = &minor->nm_ctrl_lock; 477 lock = &nvme->n_lock; 478 479 if (is_read) { 480 waiters = nvme_rwlock_block_ctrl_rdlock(nvme); 481 } else { 482 waiters = nvme_rwlock_block_ctrl_wrlock(nvme); 483 } 484 } else { 485 nvme_namespace_t *ns; 486 const uint32_t nsid = req->nil_common.nioc_nsid; 487 info = &minor->nm_ns_lock; 488 489 VERIFY3U(req->nil_ent, ==, NVME_LOCK_E_NS); 490 ns = nvme_nsid2ns(nvme, nsid); 491 minor->nm_ns_lock.nli_ns = ns; 492 lock = &ns->ns_lock; 493 494 if (is_read) { 495 waiters = nvme_rwlock_block_ns_rdlock(nvme, ns); 496 } else { 497 waiters = nvme_rwlock_block_ns_wrlock(nvme, ns); 498 } 499 } 500 501 /* 502 * Set the information that indicates what kind of lock we're attempting 503 * to acquire and that we're operating on. 504 */ 505 info->nli_curlevel = is_read ? NVME_LOCK_L_READ : NVME_LOCK_L_WRITE; 506 info->nli_lock = lock; 507 508 /* 509 * We think we can get the lock, hurrah. 510 */ 511 if (!waiters) { 512 if (is_read) { 513 nvme_rwlock_rdlock(info, lock); 514 } else { 515 nvme_rwlock_wrlock(info, lock); 516 } 517 (void) nvme_ioctl_success(&req->nil_common); 518 return; 519 } 520 521 /* 522 * We failed to get the lock. At this point we will set ourselves up to 523 * block. Once we go to sleep on the CV, our assumption is that anyone 524 * who has woken us up will have filled in the information the status of 525 * this operation and therefore after this point, all we have to do is 526 * return. 527 */ 528 if (is_nonblock) { 529 nvme_rwunlock_cleanup_minor(info); 530 lock->nl_nnonblock++; 531 (void) nvme_ioctl_error(&req->nil_common, 532 NVME_IOCTL_E_LOCK_WOULD_BLOCK, 0, 0); 533 return; 534 } 535 536 ASSERT3P(info->nli_ioc, ==, NULL); 537 info->nli_ioc = &req->nil_common; 538 if (is_read) { 539 list_insert_tail(&lock->nl_pend_readers, info); 540 lock->nl_npend_reads++; 541 } else { 542 list_insert_tail(&lock->nl_pend_writers, info); 543 lock->nl_npend_writes++; 544 } 545 546 ASSERT3U(info->nli_state, ==, NVME_LOCK_STATE_UNLOCKED); 547 info->nli_state = NVME_LOCK_STATE_BLOCKED; 548 sleep_time = gethrtime(); 549 info->nli_last_change = sleep_time; 550 while (info->nli_state == NVME_LOCK_STATE_BLOCKED) { 551 /* 552 * Block until we receive a signal. Note, a signal trumps all 553 * other processing. We may be woken up here because we acquired 554 * a lock, we may also end up woken up here if the controller is 555 * marked as dead. 556 */ 557 if (cv_wait_sig(&minor->nm_cv, &nvme->n_minor_mutex) == 0) { 558 nvme_rwlock_signal(info, lock, is_read); 559 break; 560 } 561 } 562 563 /* 564 * Before we return, clean up and sanity check our state. 565 */ 566 info->nli_ioc = NULL; 567 #ifdef DEBUG 568 ASSERT3S(info->nli_last_change, !=, sleep_time); 569 if (info->nli_state == NVME_LOCK_STATE_UNLOCKED) { 570 ASSERT3S(list_link_active(&info->nli_node), ==, 0); 571 ASSERT3P(info->nli_ns, ==, NULL); 572 ASSERT3U(req->nil_common.nioc_drv_err, !=, NVME_IOCTL_E_OK); 573 } else { 574 ASSERT3U(info->nli_state, ==, NVME_LOCK_STATE_ACQUIRED); 575 ASSERT3U(req->nil_common.nioc_drv_err, ==, NVME_IOCTL_E_OK); 576 if (is_read) { 577 ASSERT3S(list_link_active(&info->nli_node), !=, 0); 578 } else { 579 ASSERT3P(lock->nl_writer, ==, info); 580 } 581 } 582 ASSERT3P(info->nli_minor, ==, minor); 583 ASSERT3P(info->nli_nvme, ==, minor->nm_ctrl); 584 #endif 585 } 586 587 /* 588 * This is used to clean up a single minor that was blocking trying to get a 589 * lock prior to a controller going dead. In particular, the key here is we need 590 * to change its state to unlocked by cleaning it up and then signal it to wake 591 * up and process things. The clean up also helps deal with the case of a racing 592 * signal, though it does leave the state a little awkward in this intermediate 593 * moment; however, since it's been removed from a list that's really the proper 594 * action and no one can issue new lock ioctls at this point. 595 */ 596 static void 597 nvme_rwlock_ctrl_dead_cleanup_one(nvme_t *nvme, nvme_minor_lock_info_t *info) 598 { 599 ASSERT3U(info->nli_state, ==, NVME_LOCK_STATE_BLOCKED); 600 ASSERT3P(info->nli_ioc, !=, NULL); 601 602 /* 603 * Update the last time this has changed for our snaity checks. 604 */ 605 info->nli_last_change = gethrtime(); 606 nvme_rwunlock_cleanup_minor(info); 607 nvme_rwlock_signal_one(info, nvme->n_dead_status); 608 } 609 610 /* 611 * We've just been informed that this controller has set n_dead. This is most 612 * unfortunate for anyone trying to actively use it right now and we must notify 613 * them. Anyone who has successfully obtained a lock gets to keep it until they 614 * drop it (hopefully soon). Anyone who is asleep should be kicked out being 615 * told they are not getting it. 616 * 617 * The moment we grab n_minor_mutex, no other state here can change. So we can 618 * go ahead and wake up all waiters with impunity. This is being called from the 619 * nvme_dead_taskq. 620 */ 621 void 622 nvme_rwlock_ctrl_dead(void *arg) 623 { 624 nvme_t *nvme = arg; 625 nvme_lock_t *ctrl_lock = &nvme->n_lock; 626 nvme_minor_lock_info_t *info; 627 628 mutex_enter(&nvme->n_minor_mutex); 629 for (uint32_t i = 1; i <= nvme->n_namespace_count; i++) { 630 nvme_namespace_t *ns = nvme_nsid2ns(nvme, i); 631 nvme_lock_t *ns_lock = &ns->ns_lock; 632 633 while ((info = list_remove_head(&ns_lock->nl_pend_readers)) != 634 NULL) { 635 nvme_rwlock_ctrl_dead_cleanup_one(nvme, info); 636 } 637 638 while ((info = list_remove_head(&ns_lock->nl_pend_writers)) != 639 NULL) { 640 nvme_rwlock_ctrl_dead_cleanup_one(nvme, info); 641 } 642 } 643 644 while ((info = list_remove_head(&ctrl_lock->nl_pend_readers)) != NULL) { 645 nvme_rwlock_ctrl_dead_cleanup_one(nvme, info); 646 } 647 648 while ((info = list_remove_head(&ctrl_lock->nl_pend_writers)) != NULL) { 649 650 nvme_rwlock_ctrl_dead_cleanup_one(nvme, info); 651 } 652 mutex_exit(&nvme->n_minor_mutex); 653 } 654 655 void 656 nvme_lock_fini(nvme_lock_t *lock) 657 { 658 VERIFY3P(lock->nl_writer, ==, NULL); 659 list_destroy(&lock->nl_pend_writers); 660 list_destroy(&lock->nl_pend_readers); 661 list_destroy(&lock->nl_readers); 662 } 663 664 void 665 nvme_lock_init(nvme_lock_t *lock) 666 { 667 list_create(&lock->nl_readers, sizeof (nvme_minor_lock_info_t), 668 offsetof(nvme_minor_lock_info_t, nli_node)); 669 list_create(&lock->nl_pend_readers, sizeof (nvme_minor_lock_info_t), 670 offsetof(nvme_minor_lock_info_t, nli_node)); 671 list_create(&lock->nl_pend_writers, sizeof (nvme_minor_lock_info_t), 672 offsetof(nvme_minor_lock_info_t, nli_node)); 673 } 674