1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * This file and its contents are supplied under the terms of the 6 * Common Development and Distribution License ("CDDL"), version 1.0. 7 * You may only use this file in accordance with the terms of version 8 * 1.0 of the CDDL. 9 * 10 * A full copy of the text of the CDDL should have accompanied this 11 * source. A copy of the CDDL is also available via the Internet at 12 * http://www.illumos.org/license/CDDL. 13 * 14 * CDDL HEADER END 15 */ 16 17 /* 18 * Copyright (c) 2017, 2020 by Delphix. All rights reserved. 19 */ 20 21 /* 22 * ZTHR Infrastructure 23 * =================== 24 * 25 * ZTHR threads are used for isolated operations that span multiple txgs 26 * within a SPA. They generally exist from SPA creation/loading and until 27 * the SPA is exported/destroyed. The ideal requirements for an operation 28 * to be modeled with a zthr are the following: 29 * 30 * 1] The operation needs to run over multiple txgs. 31 * 2] There is be a single point of reference in memory or on disk that 32 * indicates whether the operation should run/is running or has 33 * stopped. 34 * 35 * If the operation satisfies the above then the following rules guarantee 36 * a certain level of correctness: 37 * 38 * 1] Any thread EXCEPT the zthr changes the work indicator from stopped 39 * to running but not the opposite. 40 * 2] Only the zthr can change the work indicator from running to stopped 41 * (e.g. when it is done) but not the opposite. 42 * 43 * This way a normal zthr cycle should go like this: 44 * 45 * 1] An external thread changes the work indicator from stopped to 46 * running and wakes up the zthr. 47 * 2] The zthr wakes up, checks the indicator and starts working. 48 * 3] When the zthr is done, it changes the indicator to stopped, allowing 49 * a new cycle to start. 50 * 51 * Besides being awakened by other threads, a zthr can be configured 52 * during creation to wakeup on its own after a specified interval 53 * [see zthr_create_timer()]. 54 * 55 * Note: ZTHR threads are NOT a replacement for generic threads! Please 56 * ensure that they fit your use-case well before using them. 57 * 58 * == ZTHR creation 59 * 60 * Every zthr needs four inputs to start running: 61 * 62 * 1] A user-defined checker function (checkfunc) that decides whether 63 * the zthr should start working or go to sleep. The function should 64 * return TRUE when the zthr needs to work or FALSE to let it sleep, 65 * and should adhere to the following signature: 66 * boolean_t checkfunc_name(void *args, zthr_t *t); 67 * 68 * 2] A user-defined ZTHR function (func) which the zthr executes when 69 * it is not sleeping. The function should adhere to the following 70 * signature type: 71 * void func_name(void *args, zthr_t *t); 72 * 73 * 3] A void args pointer that will be passed to checkfunc and func 74 * implicitly by the infrastructure. 75 * 76 * 4] A name for the thread. This string must be valid for the lifetime 77 * of the zthr. 78 * 79 * The reason why the above API needs two different functions, 80 * instead of one that both checks and does the work, has to do with 81 * the zthr's internal state lock (zthr_state_lock) and the allowed 82 * cancellation windows. We want to hold the zthr_state_lock while 83 * running checkfunc but not while running func. This way the zthr 84 * can be cancelled while doing work and not while checking for work. 85 * 86 * To start a zthr: 87 * zthr_t *zthr_pointer = zthr_create(checkfunc, func, args, 88 * pri); 89 * or 90 * zthr_t *zthr_pointer = zthr_create_timer(checkfunc, func, 91 * args, max_sleep, pri); 92 * 93 * After that you should be able to wakeup, cancel, and resume the 94 * zthr from another thread using the zthr_pointer. 95 * 96 * NOTE: ZTHR threads could potentially wake up spuriously and the 97 * user should take this into account when writing a checkfunc. 98 * [see ZTHR state transitions] 99 * 100 * == ZTHR wakeup 101 * 102 * ZTHR wakeup should be used when new work is added for the zthr. The 103 * sleeping zthr will wakeup, see that it has more work to complete 104 * and proceed. This can be invoked from open or syncing context. 105 * 106 * To wakeup a zthr: 107 * zthr_wakeup(zthr_t *t) 108 * 109 * == ZTHR cancellation and resumption 110 * 111 * ZTHR threads must be cancelled when their SPA is being exported 112 * or when they need to be paused so they don't interfere with other 113 * operations. 114 * 115 * To cancel a zthr: 116 * zthr_cancel(zthr_pointer); 117 * 118 * To resume it: 119 * zthr_resume(zthr_pointer); 120 * 121 * ZTHR cancel and resume should be invoked in open context during the 122 * lifecycle of the pool as it is imported, exported or destroyed. 123 * 124 * A zthr will implicitly check if it has received a cancellation 125 * signal every time func returns and every time it wakes up [see 126 * ZTHR state transitions below]. 127 * 128 * At times, waiting for the zthr's func to finish its job may take 129 * time. This may be very time-consuming for some operations that 130 * need to cancel the SPA's zthrs (e.g spa_export). For this scenario 131 * the user can explicitly make their ZTHR function aware of incoming 132 * cancellation signals using zthr_iscancelled(). A common pattern for 133 * that looks like this: 134 * 135 * int 136 * func_name(void *args, zthr_t *t) 137 * { 138 * ... <unpack args> ... 139 * while (!work_done && !zthr_iscancelled(t)) { 140 * ... <do more work> ... 141 * } 142 * } 143 * 144 * == ZTHR cleanup 145 * 146 * Cancelling a zthr doesn't clean up its metadata (internal locks, 147 * function pointers to func and checkfunc, etc..). This is because 148 * we want to keep them around in case we want to resume the execution 149 * of the zthr later. Similarly for zthrs that exit themselves. 150 * 151 * To completely cleanup a zthr, cancel it first to ensure that it 152 * is not running and then use zthr_destroy(). 153 * 154 * == ZTHR state transitions 155 * 156 * zthr creation 157 * + 158 * | 159 * | woke up 160 * | +--------------+ sleep 161 * | | ^ 162 * | | | 163 * | | | FALSE 164 * | | | 165 * v v FALSE + 166 * cancelled? +---------> checkfunc? 167 * + ^ + 168 * | | | 169 * | | | TRUE 170 * | | | 171 * | | func returned v 172 * | +---------------+ func 173 * | 174 * | TRUE 175 * | 176 * v 177 * zthr stopped running 178 * 179 * == Implementation of ZTHR requests 180 * 181 * ZTHR cancel and resume are requests on a zthr to change its 182 * internal state. These requests are serialized using the 183 * zthr_request_lock, while changes in its internal state are 184 * protected by the zthr_state_lock. A request will first acquire 185 * the zthr_request_lock and then immediately acquire the 186 * zthr_state_lock. We do this so that incoming requests are 187 * serialized using the request lock, while still allowing us 188 * to use the state lock for thread communication via zthr_cv. 189 * 190 * ZTHR wakeup broadcasts to zthr_cv, causing sleeping threads 191 * to wakeup. It acquires the zthr_state_lock but not the 192 * zthr_request_lock, so that a wakeup on a zthr in the middle 193 * of being cancelled will not block. 194 */ 195 196 #include <sys/zfs_context.h> 197 #include <sys/zthr.h> 198 199 struct zthr { 200 /* running thread doing the work */ 201 kthread_t *zthr_thread; 202 203 /* lock protecting internal data & invariants */ 204 kmutex_t zthr_state_lock; 205 206 /* mutex that serializes external requests */ 207 kmutex_t zthr_request_lock; 208 209 /* notification mechanism for requests */ 210 kcondvar_t zthr_cv; 211 212 /* flag set to true if we are canceling the zthr */ 213 boolean_t zthr_cancel; 214 215 /* flag set to true if we are waiting for the zthr to finish */ 216 boolean_t zthr_haswaiters; 217 kcondvar_t zthr_wait_cv; 218 /* 219 * maximum amount of time that the zthr is spent sleeping; 220 * if this is 0, the thread doesn't wake up until it gets 221 * signaled. 222 */ 223 hrtime_t zthr_sleep_timeout; 224 225 /* Thread priority */ 226 pri_t zthr_pri; 227 228 /* consumer-provided callbacks & data */ 229 zthr_checkfunc_t *zthr_checkfunc; 230 zthr_func_t *zthr_func; 231 void *zthr_arg; 232 const char *zthr_name; 233 }; 234 235 static __attribute__((noreturn)) void 236 zthr_procedure(void *arg) 237 { 238 zthr_t *t = arg; 239 240 mutex_enter(&t->zthr_state_lock); 241 ASSERT3P(t->zthr_thread, ==, curthread); 242 243 while (!t->zthr_cancel) { 244 if (t->zthr_checkfunc(t->zthr_arg, t)) { 245 mutex_exit(&t->zthr_state_lock); 246 t->zthr_func(t->zthr_arg, t); 247 mutex_enter(&t->zthr_state_lock); 248 } else { 249 if (t->zthr_sleep_timeout == 0) { 250 cv_wait_idle(&t->zthr_cv, &t->zthr_state_lock); 251 } else { 252 (void) cv_timedwait_idle_hires(&t->zthr_cv, 253 &t->zthr_state_lock, t->zthr_sleep_timeout, 254 MSEC2NSEC(1), 0); 255 } 256 } 257 if (t->zthr_haswaiters) { 258 t->zthr_haswaiters = B_FALSE; 259 cv_broadcast(&t->zthr_wait_cv); 260 } 261 } 262 263 /* 264 * Clear out the kernel thread metadata and notify the 265 * zthr_cancel() thread that we've stopped running. 266 */ 267 t->zthr_thread = NULL; 268 t->zthr_cancel = B_FALSE; 269 cv_broadcast(&t->zthr_cv); 270 271 mutex_exit(&t->zthr_state_lock); 272 thread_exit(); 273 } 274 275 zthr_t * 276 zthr_create(const char *zthr_name, zthr_checkfunc_t *checkfunc, 277 zthr_func_t *func, void *arg, pri_t pri) 278 { 279 return (zthr_create_timer(zthr_name, checkfunc, 280 func, arg, (hrtime_t)0, pri)); 281 } 282 283 /* 284 * Create a zthr with specified maximum sleep time. If the time 285 * in sleeping state exceeds max_sleep, a wakeup(do the check and 286 * start working if required) will be triggered. 287 */ 288 zthr_t * 289 zthr_create_timer(const char *zthr_name, zthr_checkfunc_t *checkfunc, 290 zthr_func_t *func, void *arg, hrtime_t max_sleep, pri_t pri) 291 { 292 zthr_t *t = kmem_zalloc(sizeof (*t), KM_SLEEP); 293 mutex_init(&t->zthr_state_lock, NULL, MUTEX_DEFAULT, NULL); 294 mutex_init(&t->zthr_request_lock, NULL, MUTEX_DEFAULT, NULL); 295 cv_init(&t->zthr_cv, NULL, CV_DEFAULT, NULL); 296 cv_init(&t->zthr_wait_cv, NULL, CV_DEFAULT, NULL); 297 298 mutex_enter(&t->zthr_state_lock); 299 t->zthr_checkfunc = checkfunc; 300 t->zthr_func = func; 301 t->zthr_arg = arg; 302 t->zthr_sleep_timeout = max_sleep; 303 t->zthr_name = zthr_name; 304 t->zthr_pri = pri; 305 306 t->zthr_thread = thread_create_named(zthr_name, NULL, 0, 307 zthr_procedure, t, 0, &p0, TS_RUN, pri); 308 309 mutex_exit(&t->zthr_state_lock); 310 311 return (t); 312 } 313 314 void 315 zthr_destroy(zthr_t *t) 316 { 317 ASSERT(!MUTEX_HELD(&t->zthr_state_lock)); 318 ASSERT(!MUTEX_HELD(&t->zthr_request_lock)); 319 VERIFY3P(t->zthr_thread, ==, NULL); 320 mutex_destroy(&t->zthr_request_lock); 321 mutex_destroy(&t->zthr_state_lock); 322 cv_destroy(&t->zthr_cv); 323 cv_destroy(&t->zthr_wait_cv); 324 kmem_free(t, sizeof (*t)); 325 } 326 327 /* 328 * Wake up the zthr if it is sleeping. If the thread has been cancelled 329 * or is in the process of being cancelled, this is a no-op. 330 */ 331 void 332 zthr_wakeup(zthr_t *t) 333 { 334 mutex_enter(&t->zthr_state_lock); 335 336 /* 337 * There are 5 states that we can find the zthr when issuing 338 * this broadcast: 339 * 340 * [1] The common case of the thread being asleep, at which 341 * point the broadcast will wake it up. 342 * [2] The thread has been cancelled. Waking up a cancelled 343 * thread is a no-op. Any work that is still left to be 344 * done should be handled the next time the thread is 345 * resumed. 346 * [3] The thread is doing work and is already up, so this 347 * is basically a no-op. 348 * [4] The thread was just created/resumed, in which case the 349 * behavior is similar to [3]. 350 * [5] The thread is in the middle of being cancelled, which 351 * will be a no-op. 352 */ 353 cv_broadcast(&t->zthr_cv); 354 355 mutex_exit(&t->zthr_state_lock); 356 } 357 358 /* 359 * Sends a cancel request to the zthr and blocks until the zthr is 360 * cancelled. If the zthr is not running (e.g. has been cancelled 361 * already), this is a no-op. Note that this function should not be 362 * called from syncing context as it could deadlock with the zthr_func. 363 */ 364 void 365 zthr_cancel(zthr_t *t) 366 { 367 mutex_enter(&t->zthr_request_lock); 368 mutex_enter(&t->zthr_state_lock); 369 370 /* 371 * Since we are holding the zthr_state_lock at this point 372 * we can find the state in one of the following 4 states: 373 * 374 * [1] The thread has already been cancelled, therefore 375 * there is nothing for us to do. 376 * [2] The thread is sleeping so we set the flag, broadcast 377 * the CV and wait for it to exit. 378 * [3] The thread is doing work, in which case we just set 379 * the flag and wait for it to finish. 380 * [4] The thread was just created/resumed, in which case 381 * the behavior is similar to [3]. 382 * 383 * Since requests are serialized, by the time that we get 384 * control back we expect that the zthr is cancelled and 385 * not running anymore. 386 */ 387 if (t->zthr_thread != NULL) { 388 t->zthr_cancel = B_TRUE; 389 390 /* broadcast in case the zthr is sleeping */ 391 cv_broadcast(&t->zthr_cv); 392 393 while (t->zthr_thread != NULL) 394 cv_wait(&t->zthr_cv, &t->zthr_state_lock); 395 396 ASSERT(!t->zthr_cancel); 397 } 398 399 mutex_exit(&t->zthr_state_lock); 400 mutex_exit(&t->zthr_request_lock); 401 } 402 403 /* 404 * Sends a resume request to the supplied zthr. If the zthr is already 405 * running this is a no-op. Note that this function should not be 406 * called from syncing context as it could deadlock with the zthr_func. 407 */ 408 void 409 zthr_resume(zthr_t *t) 410 { 411 mutex_enter(&t->zthr_request_lock); 412 mutex_enter(&t->zthr_state_lock); 413 414 ASSERT3P(&t->zthr_checkfunc, !=, NULL); 415 ASSERT3P(&t->zthr_func, !=, NULL); 416 ASSERT(!t->zthr_cancel); 417 ASSERT(!t->zthr_haswaiters); 418 419 /* 420 * There are 4 states that we find the zthr in at this point 421 * given the locks that we hold: 422 * 423 * [1] The zthr was cancelled, so we spawn a new thread for 424 * the zthr (common case). 425 * [2] The zthr is running at which point this is a no-op. 426 * [3] The zthr is sleeping at which point this is a no-op. 427 * [4] The zthr was just spawned at which point this is a 428 * no-op. 429 */ 430 if (t->zthr_thread == NULL) { 431 t->zthr_thread = thread_create_named(t->zthr_name, NULL, 0, 432 zthr_procedure, t, 0, &p0, TS_RUN, t->zthr_pri); 433 } 434 435 mutex_exit(&t->zthr_state_lock); 436 mutex_exit(&t->zthr_request_lock); 437 } 438 439 /* 440 * This function is intended to be used by the zthr itself 441 * (specifically the zthr_func callback provided) to check 442 * if another thread has signaled it to stop running before 443 * doing some expensive operation. 444 * 445 * returns TRUE if we are in the middle of trying to cancel 446 * this thread. 447 * 448 * returns FALSE otherwise. 449 */ 450 boolean_t 451 zthr_iscancelled(zthr_t *t) 452 { 453 ASSERT3P(t->zthr_thread, ==, curthread); 454 455 /* 456 * The majority of the functions here grab zthr_request_lock 457 * first and then zthr_state_lock. This function only grabs 458 * the zthr_state_lock. That is because this function should 459 * only be called from the zthr_func to check if someone has 460 * issued a zthr_cancel() on the thread. If there is a zthr_cancel() 461 * happening concurrently, attempting to grab the request lock 462 * here would result in a deadlock. 463 * 464 * By grabbing only the zthr_state_lock this function is allowed 465 * to run concurrently with a zthr_cancel() request. 466 */ 467 mutex_enter(&t->zthr_state_lock); 468 boolean_t cancelled = t->zthr_cancel; 469 mutex_exit(&t->zthr_state_lock); 470 return (cancelled); 471 } 472 473 boolean_t 474 zthr_iscurthread(zthr_t *t) 475 { 476 return (t->zthr_thread == curthread); 477 } 478 479 /* 480 * Wait for the zthr to finish its current function. Similar to 481 * zthr_iscancelled, you can use zthr_has_waiters to have the zthr_func end 482 * early. Unlike zthr_cancel, the thread is not destroyed. If the zthr was 483 * sleeping or cancelled, return immediately. 484 */ 485 void 486 zthr_wait_cycle_done(zthr_t *t) 487 { 488 mutex_enter(&t->zthr_state_lock); 489 490 /* 491 * Since we are holding the zthr_state_lock at this point 492 * we can find the state in one of the following 5 states: 493 * 494 * [1] The thread has already cancelled, therefore 495 * there is nothing for us to do. 496 * [2] The thread is sleeping so we set the flag, broadcast 497 * the CV and wait for it to exit. 498 * [3] The thread is doing work, in which case we just set 499 * the flag and wait for it to finish. 500 * [4] The thread was just created/resumed, in which case 501 * the behavior is similar to [3]. 502 * [5] The thread is the middle of being cancelled, which is 503 * similar to [3]. We'll wait for the cancel, which is 504 * waiting for the zthr func. 505 * 506 * Since requests are serialized, by the time that we get 507 * control back we expect that the zthr has completed it's 508 * zthr_func. 509 */ 510 if (t->zthr_thread != NULL) { 511 t->zthr_haswaiters = B_TRUE; 512 513 /* broadcast in case the zthr is sleeping */ 514 cv_broadcast(&t->zthr_cv); 515 516 while ((t->zthr_haswaiters) && (t->zthr_thread != NULL)) 517 cv_wait(&t->zthr_wait_cv, &t->zthr_state_lock); 518 519 ASSERT(!t->zthr_haswaiters); 520 } 521 522 mutex_exit(&t->zthr_state_lock); 523 } 524 525 /* 526 * This function is intended to be used by the zthr itself 527 * to check if another thread is waiting on it to finish 528 * 529 * returns TRUE if we have been asked to finish. 530 * 531 * returns FALSE otherwise. 532 */ 533 boolean_t 534 zthr_has_waiters(zthr_t *t) 535 { 536 ASSERT3P(t->zthr_thread, ==, curthread); 537 538 mutex_enter(&t->zthr_state_lock); 539 540 /* 541 * Similarly to zthr_iscancelled(), we only grab the 542 * zthr_state_lock so that the zthr itself can use this 543 * to check for the request. 544 */ 545 boolean_t has_waiters = t->zthr_haswaiters; 546 mutex_exit(&t->zthr_state_lock); 547 return (has_waiters); 548 } 549