1 /*- 2 * Copyright (c) 2017 Hans Petter Selasky 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <linux/workqueue.h> 31 #include <linux/wait.h> 32 #include <linux/compat.h> 33 #include <linux/spinlock.h> 34 35 #include <sys/kernel.h> 36 37 /* 38 * Define all work struct states 39 */ 40 enum { 41 WORK_ST_IDLE, /* idle - not started */ 42 WORK_ST_TIMER, /* timer is being started */ 43 WORK_ST_TASK, /* taskqueue is being queued */ 44 WORK_ST_EXEC, /* callback is being called */ 45 WORK_ST_CANCEL, /* cancel is being requested */ 46 WORK_ST_MAX, 47 }; 48 49 /* 50 * Define global workqueues 51 */ 52 static struct workqueue_struct *linux_system_short_wq; 53 static struct workqueue_struct *linux_system_long_wq; 54 55 struct workqueue_struct *system_wq; 56 struct workqueue_struct *system_long_wq; 57 struct workqueue_struct *system_unbound_wq; 58 struct workqueue_struct *system_power_efficient_wq; 59 60 static int linux_default_wq_cpus = 4; 61 62 static void linux_delayed_work_timer_fn(void *); 63 64 /* 65 * This function atomically updates the work state and returns the 66 * previous state at the time of update. 67 */ 68 static uint8_t 69 linux_update_state(atomic_t *v, const uint8_t *pstate) 70 { 71 int c, old; 72 73 c = v->counter; 74 75 while ((old = atomic_cmpxchg(v, c, pstate[c])) != c) 76 c = old; 77 78 return (c); 79 } 80 81 /* 82 * A LinuxKPI task is allowed to free itself inside the callback function 83 * and cannot safely be referred after the callback function has 84 * completed. This function gives the linux_work_fn() function a hint, 85 * that the task is not going away and can have its state checked 86 * again. Without this extra hint LinuxKPI tasks cannot be serialized 87 * accross multiple worker threads. 88 */ 89 static bool 90 linux_work_exec_unblock(struct work_struct *work) 91 { 92 struct workqueue_struct *wq; 93 struct work_exec *exec; 94 bool retval = 0; 95 96 wq = work->work_queue; 97 if (unlikely(wq == NULL)) 98 goto done; 99 100 WQ_EXEC_LOCK(wq); 101 TAILQ_FOREACH(exec, &wq->exec_head, entry) { 102 if (exec->target == work) { 103 exec->target = NULL; 104 retval = 1; 105 break; 106 } 107 } 108 WQ_EXEC_UNLOCK(wq); 109 done: 110 return (retval); 111 } 112 113 static void 114 linux_delayed_work_enqueue(struct delayed_work *dwork) 115 { 116 struct taskqueue *tq; 117 118 tq = dwork->work.work_queue->taskqueue; 119 taskqueue_enqueue(tq, &dwork->work.work_task); 120 } 121 122 /* 123 * This function queues the given work structure on the given 124 * workqueue. It returns non-zero if the work was successfully 125 * [re-]queued. Else the work is already pending for completion. 126 */ 127 bool 128 linux_queue_work_on(int cpu __unused, struct workqueue_struct *wq, 129 struct work_struct *work) 130 { 131 static const uint8_t states[WORK_ST_MAX] __aligned(8) = { 132 [WORK_ST_IDLE] = WORK_ST_TASK, /* start queuing task */ 133 [WORK_ST_TIMER] = WORK_ST_TIMER, /* NOP */ 134 [WORK_ST_TASK] = WORK_ST_TASK, /* NOP */ 135 [WORK_ST_EXEC] = WORK_ST_TASK, /* queue task another time */ 136 [WORK_ST_CANCEL] = WORK_ST_TASK, /* start queuing task again */ 137 }; 138 139 if (atomic_read(&wq->draining) != 0) 140 return (!work_pending(work)); 141 142 switch (linux_update_state(&work->state, states)) { 143 case WORK_ST_EXEC: 144 case WORK_ST_CANCEL: 145 if (linux_work_exec_unblock(work) != 0) 146 return (1); 147 /* FALLTHROUGH */ 148 case WORK_ST_IDLE: 149 work->work_queue = wq; 150 taskqueue_enqueue(wq->taskqueue, &work->work_task); 151 return (1); 152 default: 153 return (0); /* already on a queue */ 154 } 155 } 156 157 /* 158 * This function queues the given work structure on the given 159 * workqueue after a given delay in ticks. It returns non-zero if the 160 * work was successfully [re-]queued. Else the work is already pending 161 * for completion. 162 */ 163 bool 164 linux_queue_delayed_work_on(int cpu, struct workqueue_struct *wq, 165 struct delayed_work *dwork, unsigned delay) 166 { 167 static const uint8_t states[WORK_ST_MAX] __aligned(8) = { 168 [WORK_ST_IDLE] = WORK_ST_TIMER, /* start timeout */ 169 [WORK_ST_TIMER] = WORK_ST_TIMER, /* NOP */ 170 [WORK_ST_TASK] = WORK_ST_TASK, /* NOP */ 171 [WORK_ST_EXEC] = WORK_ST_TIMER, /* start timeout */ 172 [WORK_ST_CANCEL] = WORK_ST_TIMER, /* start timeout */ 173 }; 174 175 if (atomic_read(&wq->draining) != 0) 176 return (!work_pending(&dwork->work)); 177 178 switch (linux_update_state(&dwork->work.state, states)) { 179 case WORK_ST_EXEC: 180 case WORK_ST_CANCEL: 181 if (delay == 0 && linux_work_exec_unblock(&dwork->work) != 0) { 182 dwork->timer.expires = jiffies; 183 return (1); 184 } 185 /* FALLTHROUGH */ 186 case WORK_ST_IDLE: 187 dwork->work.work_queue = wq; 188 dwork->timer.expires = jiffies + delay; 189 190 if (delay == 0) { 191 linux_delayed_work_enqueue(dwork); 192 } else if (unlikely(cpu != WORK_CPU_UNBOUND)) { 193 mtx_lock(&dwork->timer.mtx); 194 callout_reset_on(&dwork->timer.callout, delay, 195 &linux_delayed_work_timer_fn, dwork, cpu); 196 mtx_unlock(&dwork->timer.mtx); 197 } else { 198 mtx_lock(&dwork->timer.mtx); 199 callout_reset(&dwork->timer.callout, delay, 200 &linux_delayed_work_timer_fn, dwork); 201 mtx_unlock(&dwork->timer.mtx); 202 } 203 return (1); 204 default: 205 return (0); /* already on a queue */ 206 } 207 } 208 209 void 210 linux_work_fn(void *context, int pending) 211 { 212 static const uint8_t states[WORK_ST_MAX] __aligned(8) = { 213 [WORK_ST_IDLE] = WORK_ST_IDLE, /* NOP */ 214 [WORK_ST_TIMER] = WORK_ST_EXEC, /* delayed work w/o timeout */ 215 [WORK_ST_TASK] = WORK_ST_EXEC, /* call callback */ 216 [WORK_ST_EXEC] = WORK_ST_IDLE, /* complete callback */ 217 [WORK_ST_CANCEL] = WORK_ST_EXEC, /* failed to cancel */ 218 }; 219 struct work_struct *work; 220 struct workqueue_struct *wq; 221 struct work_exec exec; 222 223 linux_set_current(curthread); 224 225 /* setup local variables */ 226 work = context; 227 wq = work->work_queue; 228 229 /* store target pointer */ 230 exec.target = work; 231 232 /* insert executor into list */ 233 WQ_EXEC_LOCK(wq); 234 TAILQ_INSERT_TAIL(&wq->exec_head, &exec, entry); 235 while (1) { 236 switch (linux_update_state(&work->state, states)) { 237 case WORK_ST_TIMER: 238 case WORK_ST_TASK: 239 case WORK_ST_CANCEL: 240 WQ_EXEC_UNLOCK(wq); 241 242 /* call work function */ 243 work->func(work); 244 245 WQ_EXEC_LOCK(wq); 246 /* check if unblocked */ 247 if (exec.target != work) { 248 /* reapply block */ 249 exec.target = work; 250 break; 251 } 252 /* FALLTHROUGH */ 253 default: 254 goto done; 255 } 256 } 257 done: 258 /* remove executor from list */ 259 TAILQ_REMOVE(&wq->exec_head, &exec, entry); 260 WQ_EXEC_UNLOCK(wq); 261 } 262 263 void 264 linux_delayed_work_fn(void *context, int pending) 265 { 266 struct delayed_work *dwork = context; 267 268 /* 269 * Make sure the timer belonging to the delayed work gets 270 * drained before invoking the work function. Else the timer 271 * mutex may still be in use which can lead to use-after-free 272 * situations, because the work function might free the work 273 * structure before returning. 274 */ 275 callout_drain(&dwork->timer.callout); 276 277 linux_work_fn(&dwork->work, pending); 278 } 279 280 static void 281 linux_delayed_work_timer_fn(void *arg) 282 { 283 static const uint8_t states[WORK_ST_MAX] __aligned(8) = { 284 [WORK_ST_IDLE] = WORK_ST_IDLE, /* NOP */ 285 [WORK_ST_TIMER] = WORK_ST_TASK, /* start queueing task */ 286 [WORK_ST_TASK] = WORK_ST_TASK, /* NOP */ 287 [WORK_ST_EXEC] = WORK_ST_EXEC, /* NOP */ 288 [WORK_ST_CANCEL] = WORK_ST_TASK, /* failed to cancel */ 289 }; 290 struct delayed_work *dwork = arg; 291 292 switch (linux_update_state(&dwork->work.state, states)) { 293 case WORK_ST_TIMER: 294 case WORK_ST_CANCEL: 295 linux_delayed_work_enqueue(dwork); 296 break; 297 default: 298 break; 299 } 300 } 301 302 /* 303 * This function cancels the given work structure in a synchronous 304 * fashion. It returns non-zero if the work was successfully 305 * cancelled. Else the work was already cancelled. 306 */ 307 bool 308 linux_cancel_work_sync(struct work_struct *work) 309 { 310 static const uint8_t states[WORK_ST_MAX] __aligned(8) = { 311 [WORK_ST_IDLE] = WORK_ST_IDLE, /* NOP */ 312 [WORK_ST_TIMER] = WORK_ST_TIMER, /* can't happen */ 313 [WORK_ST_TASK] = WORK_ST_IDLE, /* cancel and drain */ 314 [WORK_ST_EXEC] = WORK_ST_IDLE, /* too late, drain */ 315 [WORK_ST_CANCEL] = WORK_ST_IDLE, /* cancel and drain */ 316 }; 317 struct taskqueue *tq; 318 319 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 320 "linux_cancel_work_sync() might sleep"); 321 322 switch (linux_update_state(&work->state, states)) { 323 case WORK_ST_IDLE: 324 case WORK_ST_TIMER: 325 return (0); 326 case WORK_ST_EXEC: 327 tq = work->work_queue->taskqueue; 328 if (taskqueue_cancel(tq, &work->work_task, NULL) != 0) 329 taskqueue_drain(tq, &work->work_task); 330 return (0); 331 default: 332 tq = work->work_queue->taskqueue; 333 if (taskqueue_cancel(tq, &work->work_task, NULL) != 0) 334 taskqueue_drain(tq, &work->work_task); 335 return (1); 336 } 337 } 338 339 /* 340 * This function atomically stops the timer and callback. The timer 341 * callback will not be called after this function returns. This 342 * functions returns true when the timeout was cancelled. Else the 343 * timeout was not started or has already been called. 344 */ 345 static inline bool 346 linux_cancel_timer(struct delayed_work *dwork, bool drain) 347 { 348 bool cancelled; 349 350 mtx_lock(&dwork->timer.mtx); 351 cancelled = (callout_stop(&dwork->timer.callout) == 1); 352 mtx_unlock(&dwork->timer.mtx); 353 354 /* check if we should drain */ 355 if (drain) 356 callout_drain(&dwork->timer.callout); 357 return (cancelled); 358 } 359 360 /* 361 * This function cancels the given delayed work structure in a 362 * non-blocking fashion. It returns non-zero if the work was 363 * successfully cancelled. Else the work may still be busy or already 364 * cancelled. 365 */ 366 bool 367 linux_cancel_delayed_work(struct delayed_work *dwork) 368 { 369 static const uint8_t states[WORK_ST_MAX] __aligned(8) = { 370 [WORK_ST_IDLE] = WORK_ST_IDLE, /* NOP */ 371 [WORK_ST_TIMER] = WORK_ST_CANCEL, /* try to cancel */ 372 [WORK_ST_TASK] = WORK_ST_CANCEL, /* try to cancel */ 373 [WORK_ST_EXEC] = WORK_ST_EXEC, /* NOP */ 374 [WORK_ST_CANCEL] = WORK_ST_CANCEL, /* NOP */ 375 }; 376 struct taskqueue *tq; 377 378 switch (linux_update_state(&dwork->work.state, states)) { 379 case WORK_ST_TIMER: 380 case WORK_ST_CANCEL: 381 if (linux_cancel_timer(dwork, 0)) { 382 atomic_cmpxchg(&dwork->work.state, 383 WORK_ST_CANCEL, WORK_ST_IDLE); 384 return (1); 385 } 386 /* FALLTHROUGH */ 387 case WORK_ST_TASK: 388 tq = dwork->work.work_queue->taskqueue; 389 if (taskqueue_cancel(tq, &dwork->work.work_task, NULL) == 0) { 390 atomic_cmpxchg(&dwork->work.state, 391 WORK_ST_CANCEL, WORK_ST_IDLE); 392 return (1); 393 } 394 /* FALLTHROUGH */ 395 default: 396 return (0); 397 } 398 } 399 400 /* 401 * This function cancels the given work structure in a synchronous 402 * fashion. It returns non-zero if the work was successfully 403 * cancelled. Else the work was already cancelled. 404 */ 405 bool 406 linux_cancel_delayed_work_sync(struct delayed_work *dwork) 407 { 408 static const uint8_t states[WORK_ST_MAX] __aligned(8) = { 409 [WORK_ST_IDLE] = WORK_ST_IDLE, /* NOP */ 410 [WORK_ST_TIMER] = WORK_ST_IDLE, /* cancel and drain */ 411 [WORK_ST_TASK] = WORK_ST_IDLE, /* cancel and drain */ 412 [WORK_ST_EXEC] = WORK_ST_IDLE, /* too late, drain */ 413 [WORK_ST_CANCEL] = WORK_ST_IDLE, /* cancel and drain */ 414 }; 415 struct taskqueue *tq; 416 417 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 418 "linux_cancel_delayed_work_sync() might sleep"); 419 420 switch (linux_update_state(&dwork->work.state, states)) { 421 case WORK_ST_IDLE: 422 return (0); 423 case WORK_ST_EXEC: 424 tq = dwork->work.work_queue->taskqueue; 425 if (taskqueue_cancel(tq, &dwork->work.work_task, NULL) != 0) 426 taskqueue_drain(tq, &dwork->work.work_task); 427 return (0); 428 case WORK_ST_TIMER: 429 case WORK_ST_CANCEL: 430 if (linux_cancel_timer(dwork, 1)) { 431 /* 432 * Make sure taskqueue is also drained before 433 * returning: 434 */ 435 tq = dwork->work.work_queue->taskqueue; 436 taskqueue_drain(tq, &dwork->work.work_task); 437 return (1); 438 } 439 /* FALLTHROUGH */ 440 default: 441 tq = dwork->work.work_queue->taskqueue; 442 if (taskqueue_cancel(tq, &dwork->work.work_task, NULL) != 0) 443 taskqueue_drain(tq, &dwork->work.work_task); 444 return (1); 445 } 446 } 447 448 /* 449 * This function waits until the given work structure is completed. 450 * It returns non-zero if the work was successfully 451 * waited for. Else the work was not waited for. 452 */ 453 bool 454 linux_flush_work(struct work_struct *work) 455 { 456 struct taskqueue *tq; 457 int retval; 458 459 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 460 "linux_flush_work() might sleep"); 461 462 switch (atomic_read(&work->state)) { 463 case WORK_ST_IDLE: 464 return (0); 465 default: 466 tq = work->work_queue->taskqueue; 467 retval = taskqueue_poll_is_busy(tq, &work->work_task); 468 taskqueue_drain(tq, &work->work_task); 469 return (retval); 470 } 471 } 472 473 /* 474 * This function waits until the given delayed work structure is 475 * completed. It returns non-zero if the work was successfully waited 476 * for. Else the work was not waited for. 477 */ 478 bool 479 linux_flush_delayed_work(struct delayed_work *dwork) 480 { 481 struct taskqueue *tq; 482 int retval; 483 484 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 485 "linux_flush_delayed_work() might sleep"); 486 487 switch (atomic_read(&dwork->work.state)) { 488 case WORK_ST_IDLE: 489 return (0); 490 case WORK_ST_TIMER: 491 if (linux_cancel_timer(dwork, 1)) 492 linux_delayed_work_enqueue(dwork); 493 /* FALLTHROUGH */ 494 default: 495 tq = dwork->work.work_queue->taskqueue; 496 retval = taskqueue_poll_is_busy(tq, &dwork->work.work_task); 497 taskqueue_drain(tq, &dwork->work.work_task); 498 return (retval); 499 } 500 } 501 502 /* 503 * This function returns true if the given work is pending, and not 504 * yet executing: 505 */ 506 bool 507 linux_work_pending(struct work_struct *work) 508 { 509 switch (atomic_read(&work->state)) { 510 case WORK_ST_TIMER: 511 case WORK_ST_TASK: 512 case WORK_ST_CANCEL: 513 return (1); 514 default: 515 return (0); 516 } 517 } 518 519 /* 520 * This function returns true if the given work is busy. 521 */ 522 bool 523 linux_work_busy(struct work_struct *work) 524 { 525 struct taskqueue *tq; 526 527 switch (atomic_read(&work->state)) { 528 case WORK_ST_IDLE: 529 return (0); 530 case WORK_ST_EXEC: 531 tq = work->work_queue->taskqueue; 532 return (taskqueue_poll_is_busy(tq, &work->work_task)); 533 default: 534 return (1); 535 } 536 } 537 538 struct workqueue_struct * 539 linux_create_workqueue_common(const char *name, int cpus) 540 { 541 struct workqueue_struct *wq; 542 543 /* 544 * If zero CPUs are specified use the default number of CPUs: 545 */ 546 if (cpus == 0) 547 cpus = linux_default_wq_cpus; 548 549 wq = kmalloc(sizeof(*wq), M_WAITOK | M_ZERO); 550 wq->taskqueue = taskqueue_create(name, M_WAITOK, 551 taskqueue_thread_enqueue, &wq->taskqueue); 552 atomic_set(&wq->draining, 0); 553 taskqueue_start_threads(&wq->taskqueue, cpus, PWAIT, "%s", name); 554 TAILQ_INIT(&wq->exec_head); 555 mtx_init(&wq->exec_mtx, "linux_wq_exec", NULL, MTX_DEF); 556 557 return (wq); 558 } 559 560 void 561 linux_destroy_workqueue(struct workqueue_struct *wq) 562 { 563 atomic_inc(&wq->draining); 564 drain_workqueue(wq); 565 taskqueue_free(wq->taskqueue); 566 mtx_destroy(&wq->exec_mtx); 567 kfree(wq); 568 } 569 570 void 571 linux_init_delayed_work(struct delayed_work *dwork, work_func_t func) 572 { 573 memset(dwork, 0, sizeof(*dwork)); 574 dwork->work.func = func; 575 TASK_INIT(&dwork->work.work_task, 0, linux_delayed_work_fn, dwork); 576 mtx_init(&dwork->timer.mtx, spin_lock_name("lkpi-dwork"), NULL, 577 MTX_DEF | MTX_NOWITNESS); 578 callout_init_mtx(&dwork->timer.callout, &dwork->timer.mtx, 0); 579 } 580 581 static void 582 linux_work_init(void *arg) 583 { 584 int max_wq_cpus = mp_ncpus + 1; 585 586 /* avoid deadlock when there are too few threads */ 587 if (max_wq_cpus < 4) 588 max_wq_cpus = 4; 589 590 /* set default number of CPUs */ 591 linux_default_wq_cpus = max_wq_cpus; 592 593 linux_system_short_wq = alloc_workqueue("linuxkpi_short_wq", 0, max_wq_cpus); 594 linux_system_long_wq = alloc_workqueue("linuxkpi_long_wq", 0, max_wq_cpus); 595 596 /* populate the workqueue pointers */ 597 system_long_wq = linux_system_long_wq; 598 system_wq = linux_system_short_wq; 599 system_power_efficient_wq = linux_system_short_wq; 600 system_unbound_wq = linux_system_short_wq; 601 } 602 SYSINIT(linux_work_init, SI_SUB_TASKQ, SI_ORDER_THIRD, linux_work_init, NULL); 603 604 static void 605 linux_work_uninit(void *arg) 606 { 607 destroy_workqueue(linux_system_short_wq); 608 destroy_workqueue(linux_system_long_wq); 609 610 /* clear workqueue pointers */ 611 system_long_wq = NULL; 612 system_wq = NULL; 613 system_power_efficient_wq = NULL; 614 system_unbound_wq = NULL; 615 } 616 SYSUNINIT(linux_work_uninit, SI_SUB_TASKQ, SI_ORDER_THIRD, linux_work_uninit, NULL); 617