1 /*- 2 * Copyright (c) 2000 Doug Rabson 3 * Copyright (c) 2014 Jeff Roberson 4 * Copyright (c) 2016 Matthew Macy 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/bus.h> 35 #include <sys/cpuset.h> 36 #include <sys/interrupt.h> 37 #include <sys/kernel.h> 38 #include <sys/kthread.h> 39 #include <sys/libkern.h> 40 #include <sys/limits.h> 41 #include <sys/lock.h> 42 #include <sys/malloc.h> 43 #include <sys/mutex.h> 44 #include <sys/proc.h> 45 #include <sys/sched.h> 46 #include <sys/smp.h> 47 #include <sys/gtaskqueue.h> 48 #include <sys/unistd.h> 49 #include <machine/stdarg.h> 50 51 static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues"); 52 static void gtaskqueue_thread_enqueue(void *); 53 static void gtaskqueue_thread_loop(void *arg); 54 static int _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri); 55 TASKQGROUP_DEFINE(softirq, mp_ncpus, 1, false, PI_SOFT); 56 57 struct gtaskqueue_busy { 58 struct gtask *tb_running; 59 TAILQ_ENTRY(gtaskqueue_busy) tb_link; 60 }; 61 62 struct gt_intr_thread { 63 int git_flags; /* (j) IT_* flags. */ 64 int git_need; /* Needs service. */ 65 }; 66 67 /* Interrupt thread flags kept in it_flags */ 68 #define IT_DEAD 0x000001 /* Thread is waiting to exit. */ 69 #define IT_WAIT 0x000002 /* Thread is waiting for completion. */ 70 71 static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1; 72 73 struct gtaskqueue { 74 STAILQ_HEAD(, gtask) tq_queue; 75 gtaskqueue_enqueue_fn tq_enqueue; 76 void *tq_context; 77 char *tq_name; 78 TAILQ_HEAD(, gtaskqueue_busy) tq_active; 79 struct mtx tq_mutex; 80 struct thread **tq_threads; 81 struct gt_intr_thread *tq_gt_intrs; 82 int tq_tcount; 83 int tq_spin; 84 int tq_flags; 85 int tq_callouts; 86 taskqueue_callback_fn tq_callbacks[TASKQUEUE_NUM_CALLBACKS]; 87 void *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS]; 88 }; 89 90 #define TQ_FLAGS_ACTIVE (1 << 0) 91 #define TQ_FLAGS_BLOCKED (1 << 1) 92 #define TQ_FLAGS_UNLOCKED_ENQUEUE (1 << 2) 93 #define TQ_FLAGS_INTR (1 << 3) 94 95 #define DT_CALLOUT_ARMED (1 << 0) 96 97 #define TQ_LOCK(tq) \ 98 do { \ 99 if ((tq)->tq_spin) \ 100 mtx_lock_spin(&(tq)->tq_mutex); \ 101 else \ 102 mtx_lock(&(tq)->tq_mutex); \ 103 } while (0) 104 #define TQ_ASSERT_LOCKED(tq) mtx_assert(&(tq)->tq_mutex, MA_OWNED) 105 106 #define TQ_UNLOCK(tq) \ 107 do { \ 108 if ((tq)->tq_spin) \ 109 mtx_unlock_spin(&(tq)->tq_mutex); \ 110 else \ 111 mtx_unlock(&(tq)->tq_mutex); \ 112 } while (0) 113 #define TQ_ASSERT_UNLOCKED(tq) mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED) 114 115 #ifdef INVARIANTS 116 static void 117 gtask_dump(struct gtask *gtask) 118 { 119 printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n", 120 gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context); 121 } 122 #endif 123 124 static __inline int 125 TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm, 126 int t) 127 { 128 if (tq->tq_spin) 129 return (msleep_spin(p, m, wm, t)); 130 return (msleep(p, m, pri, wm, t)); 131 } 132 133 static struct gtaskqueue * 134 _gtaskqueue_create(const char *name, int mflags, 135 taskqueue_enqueue_fn enqueue, void *context, 136 int mtxflags, const char *mtxname __unused) 137 { 138 struct gtaskqueue *queue; 139 char *tq_name; 140 141 tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO); 142 if (!tq_name) 143 return (NULL); 144 145 snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue"); 146 147 queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO); 148 if (!queue) 149 return (NULL); 150 151 STAILQ_INIT(&queue->tq_queue); 152 TAILQ_INIT(&queue->tq_active); 153 queue->tq_enqueue = enqueue; 154 queue->tq_context = context; 155 queue->tq_name = tq_name; 156 queue->tq_spin = (mtxflags & MTX_SPIN) != 0; 157 queue->tq_flags |= TQ_FLAGS_ACTIVE; 158 if (enqueue == gtaskqueue_thread_enqueue) 159 queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE; 160 mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags); 161 162 return (queue); 163 } 164 165 166 /* 167 * Signal a taskqueue thread to terminate. 168 */ 169 static void 170 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq) 171 { 172 173 while (tq->tq_tcount > 0 || tq->tq_callouts > 0) { 174 wakeup(tq); 175 TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0); 176 } 177 } 178 179 static void 180 gtaskqueue_free(struct gtaskqueue *queue) 181 { 182 183 TQ_LOCK(queue); 184 queue->tq_flags &= ~TQ_FLAGS_ACTIVE; 185 gtaskqueue_terminate(queue->tq_threads, queue); 186 KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?")); 187 KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks")); 188 mtx_destroy(&queue->tq_mutex); 189 free(queue->tq_threads, M_GTASKQUEUE); 190 free(queue->tq_name, M_GTASKQUEUE); 191 free(queue, M_GTASKQUEUE); 192 } 193 194 static void 195 schedule_ithread(struct gtaskqueue *queue) 196 { 197 struct proc *p; 198 struct thread *td; 199 struct gt_intr_thread *git; 200 201 MPASS(queue->tq_tcount == 1); 202 td = queue->tq_threads[0]; 203 git = &queue->tq_gt_intrs[0]; 204 p = td->td_proc; 205 206 atomic_store_rel_int(&git->git_need, 1); 207 thread_lock(td); 208 if (TD_AWAITING_INTR(td)) { 209 CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid, 210 td->td_name); 211 TD_CLR_IWAIT(td); 212 sched_add(td, SRQ_INTR); 213 } else { 214 CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d", 215 __func__, p->p_pid, td->td_name, git->git_need, td->td_state); 216 } 217 thread_unlock(td); 218 } 219 220 int 221 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask) 222 { 223 #ifdef INVARIANTS 224 if (queue == NULL) { 225 gtask_dump(gtask); 226 panic("queue == NULL"); 227 } 228 #endif 229 TQ_LOCK(queue); 230 if (gtask->ta_flags & TASK_ENQUEUED) { 231 TQ_UNLOCK(queue); 232 return (0); 233 } 234 STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link); 235 gtask->ta_flags |= TASK_ENQUEUED; 236 TQ_UNLOCK(queue); 237 if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0) { 238 if (queue->tq_flags & TQ_FLAGS_INTR) { 239 schedule_ithread(queue); 240 } else { 241 queue->tq_enqueue(queue->tq_context); 242 } 243 } 244 return (0); 245 } 246 247 static void 248 gtaskqueue_task_nop_fn(void *context) 249 { 250 } 251 252 /* 253 * Block until all currently queued tasks in this taskqueue 254 * have begun execution. Tasks queued during execution of 255 * this function are ignored. 256 */ 257 static void 258 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue) 259 { 260 struct gtask t_barrier; 261 262 if (STAILQ_EMPTY(&queue->tq_queue)) 263 return; 264 265 /* 266 * Enqueue our barrier after all current tasks, but with 267 * the highest priority so that newly queued tasks cannot 268 * pass it. Because of the high priority, we can not use 269 * taskqueue_enqueue_locked directly (which drops the lock 270 * anyway) so just insert it at tail while we have the 271 * queue lock. 272 */ 273 GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier); 274 STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link); 275 t_barrier.ta_flags |= TASK_ENQUEUED; 276 277 /* 278 * Once the barrier has executed, all previously queued tasks 279 * have completed or are currently executing. 280 */ 281 while (t_barrier.ta_flags & TASK_ENQUEUED) 282 TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0); 283 } 284 285 /* 286 * Block until all currently executing tasks for this taskqueue 287 * complete. Tasks that begin execution during the execution 288 * of this function are ignored. 289 */ 290 static void 291 gtaskqueue_drain_tq_active(struct gtaskqueue *queue) 292 { 293 struct gtaskqueue_busy tb_marker, *tb_first; 294 295 if (TAILQ_EMPTY(&queue->tq_active)) 296 return; 297 298 /* Block taskq_terminate().*/ 299 queue->tq_callouts++; 300 301 /* 302 * Wait for all currently executing taskqueue threads 303 * to go idle. 304 */ 305 tb_marker.tb_running = TB_DRAIN_WAITER; 306 TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link); 307 while (TAILQ_FIRST(&queue->tq_active) != &tb_marker) 308 TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0); 309 TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link); 310 311 /* 312 * Wakeup any other drain waiter that happened to queue up 313 * without any intervening active thread. 314 */ 315 tb_first = TAILQ_FIRST(&queue->tq_active); 316 if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER) 317 wakeup(tb_first); 318 319 /* Release taskqueue_terminate(). */ 320 queue->tq_callouts--; 321 if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0) 322 wakeup_one(queue->tq_threads); 323 } 324 325 void 326 gtaskqueue_block(struct gtaskqueue *queue) 327 { 328 329 TQ_LOCK(queue); 330 queue->tq_flags |= TQ_FLAGS_BLOCKED; 331 TQ_UNLOCK(queue); 332 } 333 334 void 335 gtaskqueue_unblock(struct gtaskqueue *queue) 336 { 337 338 TQ_LOCK(queue); 339 queue->tq_flags &= ~TQ_FLAGS_BLOCKED; 340 if (!STAILQ_EMPTY(&queue->tq_queue)) 341 queue->tq_enqueue(queue->tq_context); 342 TQ_UNLOCK(queue); 343 } 344 345 static void 346 gtaskqueue_run_locked(struct gtaskqueue *queue) 347 { 348 struct gtaskqueue_busy tb; 349 struct gtaskqueue_busy *tb_first; 350 struct gtask *gtask; 351 352 KASSERT(queue != NULL, ("tq is NULL")); 353 TQ_ASSERT_LOCKED(queue); 354 tb.tb_running = NULL; 355 356 while (STAILQ_FIRST(&queue->tq_queue)) { 357 TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link); 358 359 /* 360 * Carefully remove the first task from the queue and 361 * clear its TASK_ENQUEUED flag 362 */ 363 gtask = STAILQ_FIRST(&queue->tq_queue); 364 KASSERT(gtask != NULL, ("task is NULL")); 365 STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link); 366 gtask->ta_flags &= ~TASK_ENQUEUED; 367 tb.tb_running = gtask; 368 TQ_UNLOCK(queue); 369 370 KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL")); 371 gtask->ta_func(gtask->ta_context); 372 373 TQ_LOCK(queue); 374 tb.tb_running = NULL; 375 wakeup(gtask); 376 377 TAILQ_REMOVE(&queue->tq_active, &tb, tb_link); 378 tb_first = TAILQ_FIRST(&queue->tq_active); 379 if (tb_first != NULL && 380 tb_first->tb_running == TB_DRAIN_WAITER) 381 wakeup(tb_first); 382 } 383 } 384 385 static int 386 task_is_running(struct gtaskqueue *queue, struct gtask *gtask) 387 { 388 struct gtaskqueue_busy *tb; 389 390 TQ_ASSERT_LOCKED(queue); 391 TAILQ_FOREACH(tb, &queue->tq_active, tb_link) { 392 if (tb->tb_running == gtask) 393 return (1); 394 } 395 return (0); 396 } 397 398 static int 399 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask) 400 { 401 402 if (gtask->ta_flags & TASK_ENQUEUED) 403 STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link); 404 gtask->ta_flags &= ~TASK_ENQUEUED; 405 return (task_is_running(queue, gtask) ? EBUSY : 0); 406 } 407 408 int 409 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask) 410 { 411 int error; 412 413 TQ_LOCK(queue); 414 error = gtaskqueue_cancel_locked(queue, gtask); 415 TQ_UNLOCK(queue); 416 417 return (error); 418 } 419 420 void 421 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask) 422 { 423 424 if (!queue->tq_spin) 425 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); 426 427 TQ_LOCK(queue); 428 while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask)) 429 TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0); 430 TQ_UNLOCK(queue); 431 } 432 433 void 434 gtaskqueue_drain_all(struct gtaskqueue *queue) 435 { 436 437 if (!queue->tq_spin) 438 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); 439 440 TQ_LOCK(queue); 441 gtaskqueue_drain_tq_queue(queue); 442 gtaskqueue_drain_tq_active(queue); 443 TQ_UNLOCK(queue); 444 } 445 446 static int 447 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri, 448 cpuset_t *mask, bool intr, const char *name, va_list ap) 449 { 450 char ktname[MAXCOMLEN + 1]; 451 struct thread *td; 452 struct gtaskqueue *tq; 453 int i, error; 454 455 if (count <= 0) 456 return (EINVAL); 457 458 vsnprintf(ktname, sizeof(ktname), name, ap); 459 tq = *tqp; 460 461 tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE, 462 M_NOWAIT | M_ZERO); 463 if (tq->tq_threads == NULL) { 464 printf("%s: no memory for %s threads\n", __func__, ktname); 465 return (ENOMEM); 466 } 467 tq->tq_gt_intrs = malloc(sizeof(struct gt_intr_thread) * count, M_GTASKQUEUE, 468 M_NOWAIT | M_ZERO); 469 if (tq->tq_gt_intrs == NULL) { 470 printf("%s: no memory for %s intr info\n", __func__, ktname); 471 return (ENOMEM); 472 } 473 474 for (i = 0; i < count; i++) { 475 if (count == 1) 476 error = kthread_add(gtaskqueue_thread_loop, tqp, NULL, 477 &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname); 478 else 479 error = kthread_add(gtaskqueue_thread_loop, tqp, NULL, 480 &tq->tq_threads[i], RFSTOPPED, 0, 481 "%s_%d", ktname, i); 482 if (error) { 483 /* should be ok to continue, taskqueue_free will dtrt */ 484 printf("%s: kthread_add(%s): error %d", __func__, 485 ktname, error); 486 tq->tq_threads[i] = NULL; /* paranoid */ 487 } else 488 tq->tq_tcount++; 489 } 490 if (intr) 491 tq->tq_flags |= TQ_FLAGS_INTR; 492 493 for (i = 0; i < count; i++) { 494 if (tq->tq_threads[i] == NULL) 495 continue; 496 td = tq->tq_threads[i]; 497 if (mask) { 498 error = cpuset_setthread(td->td_tid, mask); 499 /* 500 * Failing to pin is rarely an actual fatal error; 501 * it'll just affect performance. 502 */ 503 if (error) 504 printf("%s: curthread=%llu: can't pin; " 505 "error=%d\n", 506 __func__, 507 (unsigned long long) td->td_tid, 508 error); 509 } 510 thread_lock(td); 511 sched_prio(td, pri); 512 if (intr) { 513 /* we need to schedule the thread from the interrupt handler for this to work */ 514 TD_SET_IWAIT(td); 515 sched_class(td, PRI_ITHD); 516 td->td_pflags |= TDP_ITHREAD; 517 } else { 518 sched_add(td, SRQ_BORING); 519 } 520 thread_unlock(td); 521 } 522 523 return (0); 524 } 525 526 static int 527 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri, 528 bool intr, const char *name, ...) 529 { 530 va_list ap; 531 int error; 532 533 va_start(ap, name); 534 error = _gtaskqueue_start_threads(tqp, count, pri, NULL, intr, name, ap); 535 va_end(ap); 536 return (error); 537 } 538 539 static inline void 540 gtaskqueue_run_callback(struct gtaskqueue *tq, 541 enum taskqueue_callback_type cb_type) 542 { 543 taskqueue_callback_fn tq_callback; 544 545 TQ_ASSERT_UNLOCKED(tq); 546 tq_callback = tq->tq_callbacks[cb_type]; 547 if (tq_callback != NULL) 548 tq_callback(tq->tq_cb_contexts[cb_type]); 549 } 550 551 static void 552 intr_thread_loop(struct gtaskqueue *tq) 553 { 554 struct gt_intr_thread *git; 555 struct thread *td; 556 557 git = &tq->tq_gt_intrs[0]; 558 td = tq->tq_threads[0]; 559 MPASS(tq->tq_tcount == 1); 560 561 while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) { 562 THREAD_NO_SLEEPING(); 563 while (atomic_cmpset_acq_int(&git->git_need, 1, 0) != 0) { 564 gtaskqueue_run_locked(tq); 565 } 566 THREAD_SLEEPING_OK(); 567 568 /* 569 * Because taskqueue_run() can drop tq_mutex, we need to 570 * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the 571 * meantime, which means we missed a wakeup. 572 */ 573 if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0) 574 break; 575 576 TQ_UNLOCK(tq); 577 WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread"); 578 mtx_assert(&Giant, MA_NOTOWNED); 579 thread_lock(td); 580 if (atomic_load_acq_int(&git->git_need) == 0 && 581 (git->git_flags & (IT_DEAD | IT_WAIT)) == 0) { 582 TD_SET_IWAIT(td); 583 mi_switch(SW_VOL | SWT_IWAIT, NULL); 584 } 585 #if 0 586 /* XXX is this something we want? */ 587 if (git->git_flags & IT_WAIT) { 588 wake = 1; 589 git->git_flags &= ~IT_WAIT; 590 } 591 #endif 592 thread_unlock(td); 593 TQ_LOCK(tq); 594 } 595 THREAD_NO_SLEEPING(); 596 gtaskqueue_run_locked(tq); 597 THREAD_SLEEPING_OK(); 598 } 599 600 static void 601 timeshare_thread_loop(struct gtaskqueue *tq) 602 { 603 while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) { 604 gtaskqueue_run_locked(tq); 605 /* 606 * Because taskqueue_run() can drop tq_mutex, we need to 607 * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the 608 * meantime, which means we missed a wakeup. 609 */ 610 if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0) 611 break; 612 TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0); 613 } 614 gtaskqueue_run_locked(tq); 615 } 616 617 static void 618 gtaskqueue_thread_loop(void *arg) 619 { 620 struct gtaskqueue **tqp, *tq; 621 622 tqp = arg; 623 tq = *tqp; 624 gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT); 625 TQ_LOCK(tq); 626 if (curthread->td_pflags & TDP_ITHREAD) { 627 intr_thread_loop(tq); 628 } else { 629 timeshare_thread_loop(tq); 630 } 631 632 /* 633 * This thread is on its way out, so just drop the lock temporarily 634 * in order to call the shutdown callback. This allows the callback 635 * to look at the taskqueue, even just before it dies. 636 */ 637 TQ_UNLOCK(tq); 638 gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN); 639 TQ_LOCK(tq); 640 641 /* rendezvous with thread that asked us to terminate */ 642 tq->tq_tcount--; 643 wakeup_one(tq->tq_threads); 644 TQ_UNLOCK(tq); 645 kthread_exit(); 646 } 647 648 static void 649 gtaskqueue_thread_enqueue(void *context) 650 { 651 struct gtaskqueue **tqp, *tq; 652 653 tqp = context; 654 tq = *tqp; 655 wakeup_one(tq); 656 } 657 658 659 static struct gtaskqueue * 660 gtaskqueue_create_fast(const char *name, int mflags, 661 taskqueue_enqueue_fn enqueue, void *context) 662 { 663 return _gtaskqueue_create(name, mflags, enqueue, context, 664 MTX_SPIN, "fast_taskqueue"); 665 } 666 667 668 struct taskqgroup_cpu { 669 LIST_HEAD(, grouptask) tgc_tasks; 670 struct gtaskqueue *tgc_taskq; 671 int tgc_cnt; 672 int tgc_cpu; 673 }; 674 675 struct taskqgroup { 676 struct taskqgroup_cpu tqg_queue[MAXCPU]; 677 struct mtx tqg_lock; 678 void (*adjust_func)(void*); 679 char * tqg_name; 680 int tqg_adjusting; 681 int tqg_stride; 682 int tqg_cnt; 683 int tqg_pri; 684 int tqg_flags; 685 bool tqg_intr; 686 }; 687 #define TQG_NEED_ADJUST 0x1 688 #define TQG_ADJUSTED 0x2 689 690 struct taskq_bind_task { 691 struct gtask bt_task; 692 int bt_cpuid; 693 }; 694 695 static void 696 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu, bool intr, int pri) 697 { 698 struct taskqgroup_cpu *qcpu; 699 700 qcpu = &qgroup->tqg_queue[idx]; 701 LIST_INIT(&qcpu->tgc_tasks); 702 qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK | M_ZERO, 703 taskqueue_thread_enqueue, &qcpu->tgc_taskq); 704 gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, pri, 705 intr, "%s_%d", qgroup->tqg_name, idx); 706 qcpu->tgc_cpu = cpu; 707 } 708 709 static void 710 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx) 711 { 712 713 gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq); 714 } 715 716 /* 717 * Find the taskq with least # of tasks that doesn't currently have any 718 * other queues from the uniq identifier. 719 */ 720 static int 721 taskqgroup_find(struct taskqgroup *qgroup, void *uniq) 722 { 723 struct grouptask *n; 724 int i, idx, mincnt; 725 int strict; 726 727 mtx_assert(&qgroup->tqg_lock, MA_OWNED); 728 if (qgroup->tqg_cnt == 0) 729 return (0); 730 idx = -1; 731 mincnt = INT_MAX; 732 /* 733 * Two passes; First scan for a queue with the least tasks that 734 * does not already service this uniq id. If that fails simply find 735 * the queue with the least total tasks; 736 */ 737 for (strict = 1; mincnt == INT_MAX; strict = 0) { 738 for (i = 0; i < qgroup->tqg_cnt; i++) { 739 if (qgroup->tqg_queue[i].tgc_cnt > mincnt) 740 continue; 741 if (strict) { 742 LIST_FOREACH(n, 743 &qgroup->tqg_queue[i].tgc_tasks, gt_list) 744 if (n->gt_uniq == uniq) 745 break; 746 if (n != NULL) 747 continue; 748 } 749 mincnt = qgroup->tqg_queue[i].tgc_cnt; 750 idx = i; 751 } 752 } 753 if (idx == -1) 754 panic("taskqgroup_find: Failed to pick a qid."); 755 756 return (idx); 757 } 758 759 /* 760 * smp_started is unusable since it is not set for UP kernels or even for 761 * SMP kernels when there is 1 CPU. This is usually handled by adding a 762 * (mp_ncpus == 1) test, but that would be broken here since we need to 763 * to synchronize with the SI_SUB_SMP ordering. Even in the pure SMP case 764 * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP. 765 * 766 * So maintain our own flag. It must be set after all CPUs are started 767 * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed 768 * adjustment is properly delayed. SI_ORDER_FOURTH is clearly before 769 * SI_ORDER_ANY and unclearly after the CPUs are started. It would be 770 * simpler for adjustment to pass a flag indicating if it is delayed. 771 */ 772 773 static int tqg_smp_started; 774 775 static void 776 tqg_record_smp_started(void *arg) 777 { 778 tqg_smp_started = 1; 779 } 780 781 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH, 782 tqg_record_smp_started, NULL); 783 784 void 785 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask, 786 void *uniq, int irq, char *name) 787 { 788 cpuset_t mask; 789 int qid, error; 790 791 gtask->gt_uniq = uniq; 792 gtask->gt_name = name; 793 gtask->gt_irq = irq; 794 gtask->gt_cpu = -1; 795 796 mtx_lock(&qgroup->tqg_lock); 797 qgroup->tqg_flags |= TQG_NEED_ADJUST; 798 mtx_unlock(&qgroup->tqg_lock); 799 800 if (tqg_smp_started && !(qgroup->tqg_flags & TQG_ADJUSTED)) 801 qgroup->adjust_func(NULL); 802 803 mtx_lock(&qgroup->tqg_lock); 804 qid = taskqgroup_find(qgroup, uniq); 805 qgroup->tqg_queue[qid].tgc_cnt++; 806 LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); 807 gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; 808 if (irq != -1 && tqg_smp_started) { 809 gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu; 810 CPU_ZERO(&mask); 811 CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask); 812 mtx_unlock(&qgroup->tqg_lock); 813 error = intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &mask); 814 if (error) 815 printf("taskqgroup_attach: setaffinity failed: %d\n", error); 816 } else 817 mtx_unlock(&qgroup->tqg_lock); 818 } 819 820 static void 821 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask) 822 { 823 cpuset_t mask; 824 int qid, cpu, error; 825 826 mtx_lock(&qgroup->tqg_lock); 827 qid = taskqgroup_find(qgroup, gtask->gt_uniq); 828 cpu = qgroup->tqg_queue[qid].tgc_cpu; 829 if (gtask->gt_irq != -1) { 830 mtx_unlock(&qgroup->tqg_lock); 831 832 CPU_ZERO(&mask); 833 CPU_SET(cpu, &mask); 834 error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_INTRHANDLER, &mask); 835 mtx_lock(&qgroup->tqg_lock); 836 if (error) 837 printf("taskqgroup_attach_deferred: setaffinity failed: %d\n", error); 838 } 839 qgroup->tqg_queue[qid].tgc_cnt++; 840 841 LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, 842 gt_list); 843 MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL); 844 gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; 845 mtx_unlock(&qgroup->tqg_lock); 846 } 847 848 static int 849 taskqgroup_adjust_deferred(struct taskqgroup *qgroup, int cpu) 850 { 851 int i, error = 0, cpu_max = -1; 852 853 mtx_lock(&qgroup->tqg_lock); 854 for (i = 0; i < qgroup->tqg_cnt; i++) 855 if (qgroup->tqg_queue[i].tgc_cpu > cpu_max) 856 cpu_max = qgroup->tqg_queue[i].tgc_cpu; 857 if (cpu_max >= cpu) { 858 mtx_unlock(&qgroup->tqg_lock); 859 return (0); 860 } 861 MPASS(cpu <= mp_maxid); 862 error = _taskqgroup_adjust(qgroup, cpu + 1, qgroup->tqg_stride, 863 qgroup->tqg_intr, qgroup->tqg_pri); 864 if (error) { 865 printf("%s: _taskqgroup_adjust(%p, %d, %d, %d, %d) => %d\n\n", 866 __func__, qgroup, cpu + 1, qgroup->tqg_stride, qgroup->tqg_intr, 867 qgroup->tqg_pri, error); 868 goto out; 869 } 870 for (i = 0; i < qgroup->tqg_cnt; i++) 871 if (qgroup->tqg_queue[i].tgc_cpu > cpu_max) 872 cpu_max = qgroup->tqg_queue[i].tgc_cpu; 873 MPASS(cpu_max >= cpu); 874 out: 875 mtx_unlock(&qgroup->tqg_lock); 876 return (error); 877 } 878 879 int 880 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask, 881 void *uniq, int cpu, int irq, char *name) 882 { 883 cpuset_t mask; 884 int i, error, qid; 885 886 qid = -1; 887 gtask->gt_uniq = uniq; 888 gtask->gt_name = name; 889 gtask->gt_irq = irq; 890 gtask->gt_cpu = cpu; 891 MPASS(cpu >= 0); 892 893 mtx_lock(&qgroup->tqg_lock); 894 qgroup->tqg_flags |= TQG_NEED_ADJUST; 895 mtx_unlock(&qgroup->tqg_lock); 896 897 if (tqg_smp_started && !(qgroup->tqg_flags & TQG_ADJUSTED)) { 898 uintptr_t cpuid = cpu + 1; 899 qgroup->adjust_func((void *)cpuid); 900 } 901 if ((error = taskqgroup_adjust_deferred(qgroup, cpu))) 902 return (error); 903 904 mtx_lock(&qgroup->tqg_lock); 905 if (tqg_smp_started) { 906 for (i = 0; i < qgroup->tqg_cnt; i++) { 907 if (qgroup->tqg_queue[i].tgc_cpu == cpu) { 908 qid = i; 909 break; 910 } 911 #ifdef INVARIANTS 912 else 913 printf("qgroup->tqg_queue[%d].tgc_cpu=0x%x tgc_cnt=0x%x\n", 914 i, qgroup->tqg_queue[i].tgc_cpu, qgroup->tqg_queue[i].tgc_cnt); 915 916 #endif 917 } 918 if (qid == -1) { 919 mtx_unlock(&qgroup->tqg_lock); 920 printf("%s: qid not found for cpu=%d\n", __func__, cpu); 921 return (EINVAL); 922 } 923 } else 924 qid = 0; 925 qgroup->tqg_queue[qid].tgc_cnt++; 926 LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); 927 gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; 928 cpu = qgroup->tqg_queue[qid].tgc_cpu; 929 mtx_unlock(&qgroup->tqg_lock); 930 931 CPU_ZERO(&mask); 932 CPU_SET(cpu, &mask); 933 if (irq != -1 && tqg_smp_started) { 934 error = intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &mask); 935 if (error) 936 printf("taskqgroup_attach_cpu: setaffinity failed: %d\n", error); 937 } 938 return (0); 939 } 940 941 static int 942 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask) 943 { 944 cpuset_t mask; 945 int i, qid, irq, cpu, error; 946 947 qid = -1; 948 irq = gtask->gt_irq; 949 cpu = gtask->gt_cpu; 950 MPASS(tqg_smp_started); 951 952 if ((error = taskqgroup_adjust_deferred(qgroup, cpu))) 953 return (error); 954 mtx_lock(&qgroup->tqg_lock); 955 /* adjust as needed */ 956 MPASS(cpu <= mp_maxid); 957 for (i = 0; i < qgroup->tqg_cnt; i++) 958 if (qgroup->tqg_queue[i].tgc_cpu == cpu) { 959 qid = i; 960 break; 961 } 962 if (qid == -1) { 963 mtx_unlock(&qgroup->tqg_lock); 964 printf("%s: qid not found for cpu=%d\n", __func__, cpu); 965 return (EINVAL); 966 } 967 qgroup->tqg_queue[qid].tgc_cnt++; 968 LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); 969 MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL); 970 gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; 971 mtx_unlock(&qgroup->tqg_lock); 972 973 CPU_ZERO(&mask); 974 CPU_SET(cpu, &mask); 975 976 if (irq != -1) { 977 error = intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &mask); 978 if (error) 979 printf("taskqgroup_attach_cpu: setaffinity failed: %d\n", error); 980 } 981 return (0); 982 } 983 984 void 985 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask) 986 { 987 int i; 988 989 mtx_lock(&qgroup->tqg_lock); 990 for (i = 0; i < qgroup->tqg_cnt; i++) 991 if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue) 992 break; 993 if (i == qgroup->tqg_cnt) 994 panic("taskqgroup_detach: task not in group\n"); 995 qgroup->tqg_queue[i].tgc_cnt--; 996 LIST_REMOVE(gtask, gt_list); 997 mtx_unlock(&qgroup->tqg_lock); 998 gtask->gt_taskqueue = NULL; 999 } 1000 1001 static void 1002 taskqgroup_binder(void *ctx) 1003 { 1004 struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx; 1005 cpuset_t mask; 1006 int error; 1007 1008 CPU_ZERO(&mask); 1009 CPU_SET(gtask->bt_cpuid, &mask); 1010 error = cpuset_setthread(curthread->td_tid, &mask); 1011 thread_lock(curthread); 1012 sched_bind(curthread, gtask->bt_cpuid); 1013 thread_unlock(curthread); 1014 1015 if (error) 1016 printf("taskqgroup_binder: setaffinity failed: %d\n", 1017 error); 1018 free(gtask, M_DEVBUF); 1019 1020 } 1021 static void 1022 taskqgroup_ithread_binder(void *ctx) 1023 { 1024 struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx; 1025 cpuset_t mask; 1026 int error; 1027 1028 CPU_ZERO(&mask); 1029 CPU_SET(gtask->bt_cpuid, &mask); 1030 error = cpuset_setthread(curthread->td_tid, &mask); 1031 1032 if (error) 1033 printf("taskqgroup_binder: setaffinity failed: %d\n", 1034 error); 1035 free(gtask, M_DEVBUF); 1036 1037 } 1038 static void 1039 taskqgroup_bind(struct taskqgroup *qgroup) 1040 { 1041 struct taskq_bind_task *gtask; 1042 int i; 1043 1044 /* 1045 * Bind taskqueue threads to specific CPUs, if they have been assigned 1046 * one. 1047 */ 1048 if (qgroup->tqg_cnt == 1) 1049 return; 1050 1051 for (i = 0; i < qgroup->tqg_cnt; i++) { 1052 gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK); 1053 if (qgroup->tqg_intr) 1054 GTASK_INIT(>ask->bt_task, 0, 0, taskqgroup_ithread_binder, gtask); 1055 else 1056 GTASK_INIT(>ask->bt_task, 0, 0, taskqgroup_binder, gtask); 1057 gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu; 1058 grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq, 1059 >ask->bt_task); 1060 } 1061 } 1062 1063 static int 1064 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri) 1065 { 1066 LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL); 1067 struct grouptask *gtask; 1068 int i, k, old_cnt, old_cpu, cpu; 1069 1070 mtx_assert(&qgroup->tqg_lock, MA_OWNED); 1071 1072 if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) { 1073 printf("%s: failed cnt: %d stride: %d " 1074 "mp_ncpus: %d tqg_smp_started: %d\n", 1075 __func__, cnt, stride, mp_ncpus, tqg_smp_started); 1076 return (EINVAL); 1077 } 1078 if (qgroup->tqg_adjusting) { 1079 printf("%s: failed: adjusting\n", __func__); 1080 return (EBUSY); 1081 } 1082 /* No work to be done */ 1083 if (qgroup->tqg_cnt == cnt) 1084 return (0); 1085 qgroup->tqg_adjusting = 1; 1086 old_cnt = qgroup->tqg_cnt; 1087 old_cpu = 0; 1088 if (old_cnt < cnt) { 1089 int old_max_idx = max(0, old_cnt-1); 1090 old_cpu = qgroup->tqg_queue[old_max_idx].tgc_cpu; 1091 if (old_cnt > 0) 1092 for (k = 0; k < stride; k++) 1093 old_cpu = CPU_NEXT(old_cpu); 1094 } 1095 mtx_unlock(&qgroup->tqg_lock); 1096 /* 1097 * Set up queue for tasks added before boot. 1098 */ 1099 if (old_cnt == 0) { 1100 LIST_SWAP(>ask_head, &qgroup->tqg_queue[0].tgc_tasks, 1101 grouptask, gt_list); 1102 qgroup->tqg_queue[0].tgc_cnt = 0; 1103 } 1104 1105 /* 1106 * If new taskq threads have been added. 1107 */ 1108 cpu = old_cpu; 1109 for (i = old_cnt; i < cnt; i++) { 1110 taskqgroup_cpu_create(qgroup, i, cpu, ithread, pri); 1111 1112 for (k = 0; k < stride; k++) 1113 cpu = CPU_NEXT(cpu); 1114 } 1115 mtx_lock(&qgroup->tqg_lock); 1116 qgroup->tqg_cnt = cnt; 1117 qgroup->tqg_stride = stride; 1118 qgroup->tqg_intr = ithread; 1119 qgroup->tqg_pri = pri; 1120 1121 /* 1122 * Adjust drivers to use new taskqs. 1123 */ 1124 for (i = 0; i < old_cnt; i++) { 1125 while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) { 1126 LIST_REMOVE(gtask, gt_list); 1127 qgroup->tqg_queue[i].tgc_cnt--; 1128 LIST_INSERT_HEAD(>ask_head, gtask, gt_list); 1129 } 1130 } 1131 mtx_unlock(&qgroup->tqg_lock); 1132 1133 while ((gtask = LIST_FIRST(>ask_head))) { 1134 LIST_REMOVE(gtask, gt_list); 1135 if (gtask->gt_cpu == -1) 1136 taskqgroup_attach_deferred(qgroup, gtask); 1137 else if (taskqgroup_attach_cpu_deferred(qgroup, gtask)) 1138 taskqgroup_attach_deferred(qgroup, gtask); 1139 } 1140 1141 #ifdef INVARIANTS 1142 mtx_lock(&qgroup->tqg_lock); 1143 for (i = 0; i < qgroup->tqg_cnt; i++) { 1144 MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL); 1145 LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list) 1146 MPASS(gtask->gt_taskqueue != NULL); 1147 } 1148 mtx_unlock(&qgroup->tqg_lock); 1149 #endif 1150 /* 1151 * If taskq thread count has been reduced. 1152 */ 1153 for (i = cnt; i < old_cnt; i++) 1154 taskqgroup_cpu_remove(qgroup, i); 1155 1156 taskqgroup_bind(qgroup); 1157 1158 mtx_lock(&qgroup->tqg_lock); 1159 qgroup->tqg_adjusting = 0; 1160 1161 return (0); 1162 } 1163 1164 int 1165 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri) 1166 { 1167 int error; 1168 1169 mtx_lock(&qgroup->tqg_lock); 1170 error = _taskqgroup_adjust(qgroup, cnt, stride, ithread, pri); 1171 mtx_unlock(&qgroup->tqg_lock); 1172 1173 return (error); 1174 } 1175 1176 void 1177 taskqgroup_set_adjust(struct taskqgroup *qgroup, void (*adjust_func)(void*)) 1178 { 1179 qgroup-> adjust_func = adjust_func; 1180 } 1181 1182 int 1183 taskqgroup_adjust_once(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri) 1184 { 1185 int error = 0; 1186 1187 mtx_lock(&qgroup->tqg_lock); 1188 if ((qgroup->tqg_flags & (TQG_ADJUSTED|TQG_NEED_ADJUST)) == TQG_NEED_ADJUST) { 1189 qgroup->tqg_flags |= TQG_ADJUSTED; 1190 error = _taskqgroup_adjust(qgroup, cnt, stride, ithread, pri); 1191 MPASS(error == 0); 1192 } 1193 mtx_unlock(&qgroup->tqg_lock); 1194 1195 return (error); 1196 } 1197 1198 struct taskqgroup * 1199 taskqgroup_create(char *name) 1200 { 1201 struct taskqgroup *qgroup; 1202 1203 qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO); 1204 mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF); 1205 qgroup->tqg_name = name; 1206 LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks); 1207 MPASS(qgroup->tqg_queue[0].tgc_cnt == 0); 1208 MPASS(qgroup->tqg_queue[0].tgc_cpu == 0); 1209 MPASS(qgroup->tqg_queue[0].tgc_taskq == 0); 1210 return (qgroup); 1211 } 1212 1213 void 1214 taskqgroup_destroy(struct taskqgroup *qgroup) 1215 { 1216 1217 } 1218