1 /*- 2 * Copyright (c) 2000 Doug Rabson 3 * Copyright (c) 2014 Jeff Roberson 4 * Copyright (c) 2016 Matthew Macy 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/bus.h> 35 #include <sys/cpuset.h> 36 #include <sys/interrupt.h> 37 #include <sys/kernel.h> 38 #include <sys/kthread.h> 39 #include <sys/libkern.h> 40 #include <sys/limits.h> 41 #include <sys/lock.h> 42 #include <sys/malloc.h> 43 #include <sys/mutex.h> 44 #include <sys/proc.h> 45 #include <sys/sched.h> 46 #include <sys/smp.h> 47 #include <sys/gtaskqueue.h> 48 #include <sys/unistd.h> 49 #include <machine/stdarg.h> 50 51 static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues"); 52 static void gtaskqueue_thread_enqueue(void *); 53 static void gtaskqueue_thread_loop(void *arg); 54 static int task_is_running(struct gtaskqueue *queue, struct gtask *gtask); 55 static void gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask); 56 57 TASKQGROUP_DEFINE(softirq, mp_ncpus, 1); 58 TASKQGROUP_DEFINE(config, 1, 1); 59 60 struct gtaskqueue_busy { 61 struct gtask *tb_running; 62 TAILQ_ENTRY(gtaskqueue_busy) tb_link; 63 }; 64 65 static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1; 66 67 struct gtaskqueue { 68 STAILQ_HEAD(, gtask) tq_queue; 69 gtaskqueue_enqueue_fn tq_enqueue; 70 void *tq_context; 71 char *tq_name; 72 TAILQ_HEAD(, gtaskqueue_busy) tq_active; 73 struct mtx tq_mutex; 74 struct thread **tq_threads; 75 int tq_tcount; 76 int tq_spin; 77 int tq_flags; 78 int tq_callouts; 79 taskqueue_callback_fn tq_callbacks[TASKQUEUE_NUM_CALLBACKS]; 80 void *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS]; 81 }; 82 83 #define TQ_FLAGS_ACTIVE (1 << 0) 84 #define TQ_FLAGS_BLOCKED (1 << 1) 85 #define TQ_FLAGS_UNLOCKED_ENQUEUE (1 << 2) 86 87 #define DT_CALLOUT_ARMED (1 << 0) 88 89 #define TQ_LOCK(tq) \ 90 do { \ 91 if ((tq)->tq_spin) \ 92 mtx_lock_spin(&(tq)->tq_mutex); \ 93 else \ 94 mtx_lock(&(tq)->tq_mutex); \ 95 } while (0) 96 #define TQ_ASSERT_LOCKED(tq) mtx_assert(&(tq)->tq_mutex, MA_OWNED) 97 98 #define TQ_UNLOCK(tq) \ 99 do { \ 100 if ((tq)->tq_spin) \ 101 mtx_unlock_spin(&(tq)->tq_mutex); \ 102 else \ 103 mtx_unlock(&(tq)->tq_mutex); \ 104 } while (0) 105 #define TQ_ASSERT_UNLOCKED(tq) mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED) 106 107 #ifdef INVARIANTS 108 static void 109 gtask_dump(struct gtask *gtask) 110 { 111 printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n", 112 gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context); 113 } 114 #endif 115 116 static __inline int 117 TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm, 118 int t) 119 { 120 if (tq->tq_spin) 121 return (msleep_spin(p, m, wm, t)); 122 return (msleep(p, m, pri, wm, t)); 123 } 124 125 static struct gtaskqueue * 126 _gtaskqueue_create(const char *name, int mflags, 127 taskqueue_enqueue_fn enqueue, void *context, 128 int mtxflags, const char *mtxname __unused) 129 { 130 struct gtaskqueue *queue; 131 char *tq_name; 132 133 tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO); 134 if (!tq_name) 135 return (NULL); 136 137 snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue"); 138 139 queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO); 140 if (!queue) { 141 free(tq_name, M_GTASKQUEUE); 142 return (NULL); 143 } 144 145 STAILQ_INIT(&queue->tq_queue); 146 TAILQ_INIT(&queue->tq_active); 147 queue->tq_enqueue = enqueue; 148 queue->tq_context = context; 149 queue->tq_name = tq_name; 150 queue->tq_spin = (mtxflags & MTX_SPIN) != 0; 151 queue->tq_flags |= TQ_FLAGS_ACTIVE; 152 if (enqueue == gtaskqueue_thread_enqueue) 153 queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE; 154 mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags); 155 156 return (queue); 157 } 158 159 160 /* 161 * Signal a taskqueue thread to terminate. 162 */ 163 static void 164 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq) 165 { 166 167 while (tq->tq_tcount > 0 || tq->tq_callouts > 0) { 168 wakeup(tq); 169 TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0); 170 } 171 } 172 173 static void 174 gtaskqueue_free(struct gtaskqueue *queue) 175 { 176 177 TQ_LOCK(queue); 178 queue->tq_flags &= ~TQ_FLAGS_ACTIVE; 179 gtaskqueue_terminate(queue->tq_threads, queue); 180 KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?")); 181 KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks")); 182 mtx_destroy(&queue->tq_mutex); 183 free(queue->tq_threads, M_GTASKQUEUE); 184 free(queue->tq_name, M_GTASKQUEUE); 185 free(queue, M_GTASKQUEUE); 186 } 187 188 /* 189 * Wait for all to complete, then prevent it from being enqueued 190 */ 191 void 192 grouptask_block(struct grouptask *grouptask) 193 { 194 struct gtaskqueue *queue = grouptask->gt_taskqueue; 195 struct gtask *gtask = &grouptask->gt_task; 196 197 #ifdef INVARIANTS 198 if (queue == NULL) { 199 gtask_dump(gtask); 200 panic("queue == NULL"); 201 } 202 #endif 203 TQ_LOCK(queue); 204 gtask->ta_flags |= TASK_NOENQUEUE; 205 gtaskqueue_drain_locked(queue, gtask); 206 TQ_UNLOCK(queue); 207 } 208 209 void 210 grouptask_unblock(struct grouptask *grouptask) 211 { 212 struct gtaskqueue *queue = grouptask->gt_taskqueue; 213 struct gtask *gtask = &grouptask->gt_task; 214 215 #ifdef INVARIANTS 216 if (queue == NULL) { 217 gtask_dump(gtask); 218 panic("queue == NULL"); 219 } 220 #endif 221 TQ_LOCK(queue); 222 gtask->ta_flags &= ~TASK_NOENQUEUE; 223 TQ_UNLOCK(queue); 224 } 225 226 int 227 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask) 228 { 229 #ifdef INVARIANTS 230 if (queue == NULL) { 231 gtask_dump(gtask); 232 panic("queue == NULL"); 233 } 234 #endif 235 TQ_LOCK(queue); 236 if (gtask->ta_flags & TASK_ENQUEUED) { 237 TQ_UNLOCK(queue); 238 return (0); 239 } 240 if (gtask->ta_flags & TASK_NOENQUEUE) { 241 TQ_UNLOCK(queue); 242 return (EAGAIN); 243 } 244 STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link); 245 gtask->ta_flags |= TASK_ENQUEUED; 246 TQ_UNLOCK(queue); 247 if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0) 248 queue->tq_enqueue(queue->tq_context); 249 return (0); 250 } 251 252 static void 253 gtaskqueue_task_nop_fn(void *context) 254 { 255 } 256 257 /* 258 * Block until all currently queued tasks in this taskqueue 259 * have begun execution. Tasks queued during execution of 260 * this function are ignored. 261 */ 262 static void 263 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue) 264 { 265 struct gtask t_barrier; 266 267 if (STAILQ_EMPTY(&queue->tq_queue)) 268 return; 269 270 /* 271 * Enqueue our barrier after all current tasks, but with 272 * the highest priority so that newly queued tasks cannot 273 * pass it. Because of the high priority, we can not use 274 * taskqueue_enqueue_locked directly (which drops the lock 275 * anyway) so just insert it at tail while we have the 276 * queue lock. 277 */ 278 GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier); 279 STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link); 280 t_barrier.ta_flags |= TASK_ENQUEUED; 281 282 /* 283 * Once the barrier has executed, all previously queued tasks 284 * have completed or are currently executing. 285 */ 286 while (t_barrier.ta_flags & TASK_ENQUEUED) 287 TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0); 288 } 289 290 /* 291 * Block until all currently executing tasks for this taskqueue 292 * complete. Tasks that begin execution during the execution 293 * of this function are ignored. 294 */ 295 static void 296 gtaskqueue_drain_tq_active(struct gtaskqueue *queue) 297 { 298 struct gtaskqueue_busy tb_marker, *tb_first; 299 300 if (TAILQ_EMPTY(&queue->tq_active)) 301 return; 302 303 /* Block taskq_terminate().*/ 304 queue->tq_callouts++; 305 306 /* 307 * Wait for all currently executing taskqueue threads 308 * to go idle. 309 */ 310 tb_marker.tb_running = TB_DRAIN_WAITER; 311 TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link); 312 while (TAILQ_FIRST(&queue->tq_active) != &tb_marker) 313 TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0); 314 TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link); 315 316 /* 317 * Wakeup any other drain waiter that happened to queue up 318 * without any intervening active thread. 319 */ 320 tb_first = TAILQ_FIRST(&queue->tq_active); 321 if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER) 322 wakeup(tb_first); 323 324 /* Release taskqueue_terminate(). */ 325 queue->tq_callouts--; 326 if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0) 327 wakeup_one(queue->tq_threads); 328 } 329 330 void 331 gtaskqueue_block(struct gtaskqueue *queue) 332 { 333 334 TQ_LOCK(queue); 335 queue->tq_flags |= TQ_FLAGS_BLOCKED; 336 TQ_UNLOCK(queue); 337 } 338 339 void 340 gtaskqueue_unblock(struct gtaskqueue *queue) 341 { 342 343 TQ_LOCK(queue); 344 queue->tq_flags &= ~TQ_FLAGS_BLOCKED; 345 if (!STAILQ_EMPTY(&queue->tq_queue)) 346 queue->tq_enqueue(queue->tq_context); 347 TQ_UNLOCK(queue); 348 } 349 350 static void 351 gtaskqueue_run_locked(struct gtaskqueue *queue) 352 { 353 struct gtaskqueue_busy tb; 354 struct gtaskqueue_busy *tb_first; 355 struct gtask *gtask; 356 357 KASSERT(queue != NULL, ("tq is NULL")); 358 TQ_ASSERT_LOCKED(queue); 359 tb.tb_running = NULL; 360 361 while (STAILQ_FIRST(&queue->tq_queue)) { 362 TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link); 363 364 /* 365 * Carefully remove the first task from the queue and 366 * clear its TASK_ENQUEUED flag 367 */ 368 gtask = STAILQ_FIRST(&queue->tq_queue); 369 KASSERT(gtask != NULL, ("task is NULL")); 370 STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link); 371 gtask->ta_flags &= ~TASK_ENQUEUED; 372 tb.tb_running = gtask; 373 TQ_UNLOCK(queue); 374 375 KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL")); 376 gtask->ta_func(gtask->ta_context); 377 378 TQ_LOCK(queue); 379 tb.tb_running = NULL; 380 wakeup(gtask); 381 382 TAILQ_REMOVE(&queue->tq_active, &tb, tb_link); 383 tb_first = TAILQ_FIRST(&queue->tq_active); 384 if (tb_first != NULL && 385 tb_first->tb_running == TB_DRAIN_WAITER) 386 wakeup(tb_first); 387 } 388 } 389 390 static int 391 task_is_running(struct gtaskqueue *queue, struct gtask *gtask) 392 { 393 struct gtaskqueue_busy *tb; 394 395 TQ_ASSERT_LOCKED(queue); 396 TAILQ_FOREACH(tb, &queue->tq_active, tb_link) { 397 if (tb->tb_running == gtask) 398 return (1); 399 } 400 return (0); 401 } 402 403 static int 404 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask) 405 { 406 407 if (gtask->ta_flags & TASK_ENQUEUED) 408 STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link); 409 gtask->ta_flags &= ~TASK_ENQUEUED; 410 return (task_is_running(queue, gtask) ? EBUSY : 0); 411 } 412 413 int 414 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask) 415 { 416 int error; 417 418 TQ_LOCK(queue); 419 error = gtaskqueue_cancel_locked(queue, gtask); 420 TQ_UNLOCK(queue); 421 422 return (error); 423 } 424 425 static void 426 gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask) 427 { 428 while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask)) 429 TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0); 430 } 431 432 void 433 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask) 434 { 435 436 if (!queue->tq_spin) 437 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); 438 439 TQ_LOCK(queue); 440 gtaskqueue_drain_locked(queue, gtask); 441 TQ_UNLOCK(queue); 442 } 443 444 void 445 gtaskqueue_drain_all(struct gtaskqueue *queue) 446 { 447 448 if (!queue->tq_spin) 449 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); 450 451 TQ_LOCK(queue); 452 gtaskqueue_drain_tq_queue(queue); 453 gtaskqueue_drain_tq_active(queue); 454 TQ_UNLOCK(queue); 455 } 456 457 static int 458 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri, 459 cpuset_t *mask, const char *name, va_list ap) 460 { 461 char ktname[MAXCOMLEN + 1]; 462 struct thread *td; 463 struct gtaskqueue *tq; 464 int i, error; 465 466 if (count <= 0) 467 return (EINVAL); 468 469 vsnprintf(ktname, sizeof(ktname), name, ap); 470 tq = *tqp; 471 472 tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE, 473 M_NOWAIT | M_ZERO); 474 if (tq->tq_threads == NULL) { 475 printf("%s: no memory for %s threads\n", __func__, ktname); 476 return (ENOMEM); 477 } 478 479 for (i = 0; i < count; i++) { 480 if (count == 1) 481 error = kthread_add(gtaskqueue_thread_loop, tqp, NULL, 482 &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname); 483 else 484 error = kthread_add(gtaskqueue_thread_loop, tqp, NULL, 485 &tq->tq_threads[i], RFSTOPPED, 0, 486 "%s_%d", ktname, i); 487 if (error) { 488 /* should be ok to continue, taskqueue_free will dtrt */ 489 printf("%s: kthread_add(%s): error %d", __func__, 490 ktname, error); 491 tq->tq_threads[i] = NULL; /* paranoid */ 492 } else 493 tq->tq_tcount++; 494 } 495 for (i = 0; i < count; i++) { 496 if (tq->tq_threads[i] == NULL) 497 continue; 498 td = tq->tq_threads[i]; 499 if (mask) { 500 error = cpuset_setthread(td->td_tid, mask); 501 /* 502 * Failing to pin is rarely an actual fatal error; 503 * it'll just affect performance. 504 */ 505 if (error) 506 printf("%s: curthread=%llu: can't pin; " 507 "error=%d\n", 508 __func__, 509 (unsigned long long) td->td_tid, 510 error); 511 } 512 thread_lock(td); 513 sched_prio(td, pri); 514 sched_add(td, SRQ_BORING); 515 thread_unlock(td); 516 } 517 518 return (0); 519 } 520 521 static int 522 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri, 523 const char *name, ...) 524 { 525 va_list ap; 526 int error; 527 528 va_start(ap, name); 529 error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap); 530 va_end(ap); 531 return (error); 532 } 533 534 static inline void 535 gtaskqueue_run_callback(struct gtaskqueue *tq, 536 enum taskqueue_callback_type cb_type) 537 { 538 taskqueue_callback_fn tq_callback; 539 540 TQ_ASSERT_UNLOCKED(tq); 541 tq_callback = tq->tq_callbacks[cb_type]; 542 if (tq_callback != NULL) 543 tq_callback(tq->tq_cb_contexts[cb_type]); 544 } 545 546 static void 547 gtaskqueue_thread_loop(void *arg) 548 { 549 struct gtaskqueue **tqp, *tq; 550 551 tqp = arg; 552 tq = *tqp; 553 gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT); 554 TQ_LOCK(tq); 555 while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) { 556 /* XXX ? */ 557 gtaskqueue_run_locked(tq); 558 /* 559 * Because taskqueue_run() can drop tq_mutex, we need to 560 * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the 561 * meantime, which means we missed a wakeup. 562 */ 563 if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0) 564 break; 565 TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0); 566 } 567 gtaskqueue_run_locked(tq); 568 /* 569 * This thread is on its way out, so just drop the lock temporarily 570 * in order to call the shutdown callback. This allows the callback 571 * to look at the taskqueue, even just before it dies. 572 */ 573 TQ_UNLOCK(tq); 574 gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN); 575 TQ_LOCK(tq); 576 577 /* rendezvous with thread that asked us to terminate */ 578 tq->tq_tcount--; 579 wakeup_one(tq->tq_threads); 580 TQ_UNLOCK(tq); 581 kthread_exit(); 582 } 583 584 static void 585 gtaskqueue_thread_enqueue(void *context) 586 { 587 struct gtaskqueue **tqp, *tq; 588 589 tqp = context; 590 tq = *tqp; 591 wakeup_one(tq); 592 } 593 594 595 static struct gtaskqueue * 596 gtaskqueue_create_fast(const char *name, int mflags, 597 taskqueue_enqueue_fn enqueue, void *context) 598 { 599 return _gtaskqueue_create(name, mflags, enqueue, context, 600 MTX_SPIN, "fast_taskqueue"); 601 } 602 603 604 struct taskqgroup_cpu { 605 LIST_HEAD(, grouptask) tgc_tasks; 606 struct gtaskqueue *tgc_taskq; 607 int tgc_cnt; 608 int tgc_cpu; 609 }; 610 611 struct taskqgroup { 612 struct taskqgroup_cpu tqg_queue[MAXCPU]; 613 struct mtx tqg_lock; 614 const char * tqg_name; 615 int tqg_adjusting; 616 int tqg_stride; 617 int tqg_cnt; 618 }; 619 620 struct taskq_bind_task { 621 struct gtask bt_task; 622 int bt_cpuid; 623 }; 624 625 static void 626 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu) 627 { 628 struct taskqgroup_cpu *qcpu; 629 630 qcpu = &qgroup->tqg_queue[idx]; 631 LIST_INIT(&qcpu->tgc_tasks); 632 qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK, 633 taskqueue_thread_enqueue, &qcpu->tgc_taskq); 634 gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT, 635 "%s_%d", qgroup->tqg_name, idx); 636 qcpu->tgc_cpu = cpu; 637 } 638 639 static void 640 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx) 641 { 642 643 gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq); 644 } 645 646 /* 647 * Find the taskq with least # of tasks that doesn't currently have any 648 * other queues from the uniq identifier. 649 */ 650 static int 651 taskqgroup_find(struct taskqgroup *qgroup, void *uniq) 652 { 653 struct grouptask *n; 654 int i, idx, mincnt; 655 int strict; 656 657 mtx_assert(&qgroup->tqg_lock, MA_OWNED); 658 if (qgroup->tqg_cnt == 0) 659 return (0); 660 idx = -1; 661 mincnt = INT_MAX; 662 /* 663 * Two passes; First scan for a queue with the least tasks that 664 * does not already service this uniq id. If that fails simply find 665 * the queue with the least total tasks; 666 */ 667 for (strict = 1; mincnt == INT_MAX; strict = 0) { 668 for (i = 0; i < qgroup->tqg_cnt; i++) { 669 if (qgroup->tqg_queue[i].tgc_cnt > mincnt) 670 continue; 671 if (strict) { 672 LIST_FOREACH(n, 673 &qgroup->tqg_queue[i].tgc_tasks, gt_list) 674 if (n->gt_uniq == uniq) 675 break; 676 if (n != NULL) 677 continue; 678 } 679 mincnt = qgroup->tqg_queue[i].tgc_cnt; 680 idx = i; 681 } 682 } 683 if (idx == -1) 684 panic("taskqgroup_find: Failed to pick a qid."); 685 686 return (idx); 687 } 688 689 /* 690 * smp_started is unusable since it is not set for UP kernels or even for 691 * SMP kernels when there is 1 CPU. This is usually handled by adding a 692 * (mp_ncpus == 1) test, but that would be broken here since we need to 693 * to synchronize with the SI_SUB_SMP ordering. Even in the pure SMP case 694 * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP. 695 * 696 * So maintain our own flag. It must be set after all CPUs are started 697 * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed 698 * adjustment is properly delayed. SI_ORDER_FOURTH is clearly before 699 * SI_ORDER_ANY and unclearly after the CPUs are started. It would be 700 * simpler for adjustment to pass a flag indicating if it is delayed. 701 */ 702 703 static int tqg_smp_started; 704 705 static void 706 tqg_record_smp_started(void *arg) 707 { 708 tqg_smp_started = 1; 709 } 710 711 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH, 712 tqg_record_smp_started, NULL); 713 714 void 715 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask, 716 void *uniq, int irq, const char *name) 717 { 718 cpuset_t mask; 719 int qid, error; 720 721 gtask->gt_uniq = uniq; 722 snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask"); 723 gtask->gt_irq = irq; 724 gtask->gt_cpu = -1; 725 mtx_lock(&qgroup->tqg_lock); 726 qid = taskqgroup_find(qgroup, uniq); 727 qgroup->tqg_queue[qid].tgc_cnt++; 728 LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); 729 gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; 730 if (irq != -1 && tqg_smp_started) { 731 gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu; 732 CPU_ZERO(&mask); 733 CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask); 734 mtx_unlock(&qgroup->tqg_lock); 735 error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask); 736 if (error) 737 printf("%s: setaffinity failed for %s: %d\n", __func__, gtask->gt_name, error); 738 } else 739 mtx_unlock(&qgroup->tqg_lock); 740 } 741 742 static void 743 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask) 744 { 745 cpuset_t mask; 746 int qid, cpu, error; 747 748 mtx_lock(&qgroup->tqg_lock); 749 qid = taskqgroup_find(qgroup, gtask->gt_uniq); 750 cpu = qgroup->tqg_queue[qid].tgc_cpu; 751 if (gtask->gt_irq != -1) { 752 mtx_unlock(&qgroup->tqg_lock); 753 754 CPU_ZERO(&mask); 755 CPU_SET(cpu, &mask); 756 error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask); 757 mtx_lock(&qgroup->tqg_lock); 758 if (error) 759 printf("%s: %s setaffinity failed: %d\n", __func__, gtask->gt_name, error); 760 761 } 762 qgroup->tqg_queue[qid].tgc_cnt++; 763 764 LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, 765 gt_list); 766 MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL); 767 gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; 768 mtx_unlock(&qgroup->tqg_lock); 769 } 770 771 int 772 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask, 773 void *uniq, int cpu, int irq, const char *name) 774 { 775 cpuset_t mask; 776 int i, qid, error; 777 778 qid = -1; 779 gtask->gt_uniq = uniq; 780 snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask"); 781 gtask->gt_irq = irq; 782 gtask->gt_cpu = cpu; 783 mtx_lock(&qgroup->tqg_lock); 784 if (tqg_smp_started) { 785 for (i = 0; i < qgroup->tqg_cnt; i++) 786 if (qgroup->tqg_queue[i].tgc_cpu == cpu) { 787 qid = i; 788 break; 789 } 790 if (qid == -1) { 791 mtx_unlock(&qgroup->tqg_lock); 792 printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu); 793 return (EINVAL); 794 } 795 } else 796 qid = 0; 797 qgroup->tqg_queue[qid].tgc_cnt++; 798 LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); 799 gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; 800 cpu = qgroup->tqg_queue[qid].tgc_cpu; 801 mtx_unlock(&qgroup->tqg_lock); 802 803 CPU_ZERO(&mask); 804 CPU_SET(cpu, &mask); 805 if (irq != -1 && tqg_smp_started) { 806 error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask); 807 if (error) 808 printf("%s: setaffinity failed: %d\n", __func__, error); 809 } 810 return (0); 811 } 812 813 static int 814 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask) 815 { 816 cpuset_t mask; 817 int i, qid, irq, cpu, error; 818 819 qid = -1; 820 irq = gtask->gt_irq; 821 cpu = gtask->gt_cpu; 822 MPASS(tqg_smp_started); 823 mtx_lock(&qgroup->tqg_lock); 824 for (i = 0; i < qgroup->tqg_cnt; i++) 825 if (qgroup->tqg_queue[i].tgc_cpu == cpu) { 826 qid = i; 827 break; 828 } 829 if (qid == -1) { 830 mtx_unlock(&qgroup->tqg_lock); 831 printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu); 832 return (EINVAL); 833 } 834 qgroup->tqg_queue[qid].tgc_cnt++; 835 LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); 836 MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL); 837 gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; 838 mtx_unlock(&qgroup->tqg_lock); 839 840 CPU_ZERO(&mask); 841 CPU_SET(cpu, &mask); 842 843 if (irq != -1) { 844 error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask); 845 if (error) 846 printf("%s: setaffinity failed: %d\n", __func__, error); 847 } 848 return (0); 849 } 850 851 void 852 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask) 853 { 854 int i; 855 856 grouptask_block(gtask); 857 mtx_lock(&qgroup->tqg_lock); 858 for (i = 0; i < qgroup->tqg_cnt; i++) 859 if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue) 860 break; 861 if (i == qgroup->tqg_cnt) 862 panic("taskqgroup_detach: task %s not in group\n", gtask->gt_name); 863 qgroup->tqg_queue[i].tgc_cnt--; 864 LIST_REMOVE(gtask, gt_list); 865 mtx_unlock(&qgroup->tqg_lock); 866 gtask->gt_taskqueue = NULL; 867 gtask->gt_task.ta_flags &= ~TASK_NOENQUEUE; 868 } 869 870 static void 871 taskqgroup_binder(void *ctx) 872 { 873 struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx; 874 cpuset_t mask; 875 int error; 876 877 CPU_ZERO(&mask); 878 CPU_SET(gtask->bt_cpuid, &mask); 879 error = cpuset_setthread(curthread->td_tid, &mask); 880 thread_lock(curthread); 881 sched_bind(curthread, gtask->bt_cpuid); 882 thread_unlock(curthread); 883 884 if (error) 885 printf("%s: setaffinity failed: %d\n", __func__, 886 error); 887 free(gtask, M_DEVBUF); 888 } 889 890 static void 891 taskqgroup_bind(struct taskqgroup *qgroup) 892 { 893 struct taskq_bind_task *gtask; 894 int i; 895 896 /* 897 * Bind taskqueue threads to specific CPUs, if they have been assigned 898 * one. 899 */ 900 if (qgroup->tqg_cnt == 1) 901 return; 902 903 for (i = 0; i < qgroup->tqg_cnt; i++) { 904 gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK); 905 GTASK_INIT(>ask->bt_task, 0, 0, taskqgroup_binder, gtask); 906 gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu; 907 grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq, 908 >ask->bt_task); 909 } 910 } 911 912 static void 913 taskqgroup_config_init(void *arg) 914 { 915 struct taskqgroup *qgroup = qgroup_config; 916 LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL); 917 918 LIST_SWAP(>ask_head, &qgroup->tqg_queue[0].tgc_tasks, 919 grouptask, gt_list); 920 qgroup->tqg_queue[0].tgc_cnt = 0; 921 taskqgroup_cpu_create(qgroup, 0, 0); 922 923 qgroup->tqg_cnt = 1; 924 qgroup->tqg_stride = 1; 925 } 926 927 SYSINIT(taskqgroup_config_init, SI_SUB_TASKQ, SI_ORDER_SECOND, 928 taskqgroup_config_init, NULL); 929 930 static int 931 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride) 932 { 933 LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL); 934 struct grouptask *gtask; 935 int i, k, old_cnt, old_cpu, cpu; 936 937 mtx_assert(&qgroup->tqg_lock, MA_OWNED); 938 939 if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) { 940 printf("%s: failed cnt: %d stride: %d " 941 "mp_ncpus: %d tqg_smp_started: %d\n", 942 __func__, cnt, stride, mp_ncpus, tqg_smp_started); 943 return (EINVAL); 944 } 945 if (qgroup->tqg_adjusting) { 946 printf("%s failed: adjusting\n", __func__); 947 return (EBUSY); 948 } 949 qgroup->tqg_adjusting = 1; 950 old_cnt = qgroup->tqg_cnt; 951 old_cpu = 0; 952 if (old_cnt < cnt) 953 old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu; 954 mtx_unlock(&qgroup->tqg_lock); 955 /* 956 * Set up queue for tasks added before boot. 957 */ 958 if (old_cnt == 0) { 959 LIST_SWAP(>ask_head, &qgroup->tqg_queue[0].tgc_tasks, 960 grouptask, gt_list); 961 qgroup->tqg_queue[0].tgc_cnt = 0; 962 } 963 964 /* 965 * If new taskq threads have been added. 966 */ 967 cpu = old_cpu; 968 for (i = old_cnt; i < cnt; i++) { 969 taskqgroup_cpu_create(qgroup, i, cpu); 970 971 for (k = 0; k < stride; k++) 972 cpu = CPU_NEXT(cpu); 973 } 974 mtx_lock(&qgroup->tqg_lock); 975 qgroup->tqg_cnt = cnt; 976 qgroup->tqg_stride = stride; 977 978 /* 979 * Adjust drivers to use new taskqs. 980 */ 981 for (i = 0; i < old_cnt; i++) { 982 while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) { 983 LIST_REMOVE(gtask, gt_list); 984 qgroup->tqg_queue[i].tgc_cnt--; 985 LIST_INSERT_HEAD(>ask_head, gtask, gt_list); 986 } 987 } 988 mtx_unlock(&qgroup->tqg_lock); 989 990 while ((gtask = LIST_FIRST(>ask_head))) { 991 LIST_REMOVE(gtask, gt_list); 992 if (gtask->gt_cpu == -1) 993 taskqgroup_attach_deferred(qgroup, gtask); 994 else if (taskqgroup_attach_cpu_deferred(qgroup, gtask)) 995 taskqgroup_attach_deferred(qgroup, gtask); 996 } 997 998 #ifdef INVARIANTS 999 mtx_lock(&qgroup->tqg_lock); 1000 for (i = 0; i < qgroup->tqg_cnt; i++) { 1001 MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL); 1002 LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list) 1003 MPASS(gtask->gt_taskqueue != NULL); 1004 } 1005 mtx_unlock(&qgroup->tqg_lock); 1006 #endif 1007 /* 1008 * If taskq thread count has been reduced. 1009 */ 1010 for (i = cnt; i < old_cnt; i++) 1011 taskqgroup_cpu_remove(qgroup, i); 1012 1013 taskqgroup_bind(qgroup); 1014 1015 mtx_lock(&qgroup->tqg_lock); 1016 qgroup->tqg_adjusting = 0; 1017 1018 return (0); 1019 } 1020 1021 int 1022 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride) 1023 { 1024 int error; 1025 1026 mtx_lock(&qgroup->tqg_lock); 1027 error = _taskqgroup_adjust(qgroup, cnt, stride); 1028 mtx_unlock(&qgroup->tqg_lock); 1029 1030 return (error); 1031 } 1032 1033 struct taskqgroup * 1034 taskqgroup_create(const char *name) 1035 { 1036 struct taskqgroup *qgroup; 1037 1038 qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO); 1039 mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF); 1040 qgroup->tqg_name = name; 1041 LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks); 1042 1043 return (qgroup); 1044 } 1045 1046 void 1047 taskqgroup_destroy(struct taskqgroup *qgroup) 1048 { 1049 1050 } 1051 1052 void 1053 taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn, 1054 const char *name) 1055 { 1056 1057 GROUPTASK_INIT(gtask, 0, fn, ctx); 1058 taskqgroup_attach(qgroup_config, gtask, gtask, -1, name); 1059 } 1060 1061 void 1062 taskqgroup_config_gtask_deinit(struct grouptask *gtask) 1063 { 1064 taskqgroup_detach(qgroup_config, gtask); 1065 } 1066