1 /* sched.c - SPU scheduler. 2 * 3 * Copyright (C) IBM 2005 4 * Author: Mark Nutter <mnutter@us.ibm.com> 5 * 6 * 2006-03-31 NUMA domains added. 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2, or (at your option) 11 * any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 21 */ 22 23 #undef DEBUG 24 25 #include <linux/module.h> 26 #include <linux/errno.h> 27 #include <linux/sched.h> 28 #include <linux/kernel.h> 29 #include <linux/mm.h> 30 #include <linux/completion.h> 31 #include <linux/vmalloc.h> 32 #include <linux/smp.h> 33 #include <linux/smp_lock.h> 34 #include <linux/stddef.h> 35 #include <linux/unistd.h> 36 #include <linux/numa.h> 37 #include <linux/mutex.h> 38 #include <linux/notifier.h> 39 40 #include <asm/io.h> 41 #include <asm/mmu_context.h> 42 #include <asm/spu.h> 43 #include <asm/spu_csa.h> 44 #include <asm/spu_priv1.h> 45 #include "spufs.h" 46 47 #define SPU_TIMESLICE (HZ) 48 49 struct spu_prio_array { 50 DECLARE_BITMAP(bitmap, MAX_PRIO); 51 struct list_head runq[MAX_PRIO]; 52 spinlock_t runq_lock; 53 struct list_head active_list[MAX_NUMNODES]; 54 struct mutex active_mutex[MAX_NUMNODES]; 55 }; 56 57 static struct spu_prio_array *spu_prio; 58 static struct workqueue_struct *spu_sched_wq; 59 60 static inline int node_allowed(int node) 61 { 62 cpumask_t mask; 63 64 if (!nr_cpus_node(node)) 65 return 0; 66 mask = node_to_cpumask(node); 67 if (!cpus_intersects(mask, current->cpus_allowed)) 68 return 0; 69 return 1; 70 } 71 72 void spu_start_tick(struct spu_context *ctx) 73 { 74 if (ctx->policy == SCHED_RR) 75 queue_delayed_work(spu_sched_wq, &ctx->sched_work, SPU_TIMESLICE); 76 } 77 78 void spu_stop_tick(struct spu_context *ctx) 79 { 80 if (ctx->policy == SCHED_RR) 81 cancel_delayed_work(&ctx->sched_work); 82 } 83 84 void spu_sched_tick(struct work_struct *work) 85 { 86 struct spu_context *ctx = 87 container_of(work, struct spu_context, sched_work.work); 88 struct spu *spu; 89 int rearm = 1; 90 91 mutex_lock(&ctx->state_mutex); 92 spu = ctx->spu; 93 if (spu) { 94 int best = sched_find_first_bit(spu_prio->bitmap); 95 if (best <= ctx->prio) { 96 spu_deactivate(ctx); 97 rearm = 0; 98 } 99 } 100 mutex_unlock(&ctx->state_mutex); 101 102 if (rearm) 103 spu_start_tick(ctx); 104 } 105 106 /** 107 * spu_add_to_active_list - add spu to active list 108 * @spu: spu to add to the active list 109 */ 110 static void spu_add_to_active_list(struct spu *spu) 111 { 112 mutex_lock(&spu_prio->active_mutex[spu->node]); 113 list_add_tail(&spu->list, &spu_prio->active_list[spu->node]); 114 mutex_unlock(&spu_prio->active_mutex[spu->node]); 115 } 116 117 /** 118 * spu_remove_from_active_list - remove spu from active list 119 * @spu: spu to remove from the active list 120 */ 121 static void spu_remove_from_active_list(struct spu *spu) 122 { 123 int node = spu->node; 124 125 mutex_lock(&spu_prio->active_mutex[node]); 126 list_del_init(&spu->list); 127 mutex_unlock(&spu_prio->active_mutex[node]); 128 } 129 130 static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); 131 132 static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) 133 { 134 blocking_notifier_call_chain(&spu_switch_notifier, 135 ctx ? ctx->object_id : 0, spu); 136 } 137 138 int spu_switch_event_register(struct notifier_block * n) 139 { 140 return blocking_notifier_chain_register(&spu_switch_notifier, n); 141 } 142 143 int spu_switch_event_unregister(struct notifier_block * n) 144 { 145 return blocking_notifier_chain_unregister(&spu_switch_notifier, n); 146 } 147 148 /** 149 * spu_bind_context - bind spu context to physical spu 150 * @spu: physical spu to bind to 151 * @ctx: context to bind 152 */ 153 static void spu_bind_context(struct spu *spu, struct spu_context *ctx) 154 { 155 pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid, 156 spu->number, spu->node); 157 spu->ctx = ctx; 158 spu->flags = 0; 159 ctx->spu = spu; 160 ctx->ops = &spu_hw_ops; 161 spu->pid = current->pid; 162 spu_associate_mm(spu, ctx->owner); 163 spu->ibox_callback = spufs_ibox_callback; 164 spu->wbox_callback = spufs_wbox_callback; 165 spu->stop_callback = spufs_stop_callback; 166 spu->mfc_callback = spufs_mfc_callback; 167 spu->dma_callback = spufs_dma_callback; 168 mb(); 169 spu_unmap_mappings(ctx); 170 spu_restore(&ctx->csa, spu); 171 spu->timestamp = jiffies; 172 spu_cpu_affinity_set(spu, raw_smp_processor_id()); 173 spu_switch_notify(spu, ctx); 174 spu_add_to_active_list(spu); 175 ctx->state = SPU_STATE_RUNNABLE; 176 } 177 178 /** 179 * spu_unbind_context - unbind spu context from physical spu 180 * @spu: physical spu to unbind from 181 * @ctx: context to unbind 182 */ 183 static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) 184 { 185 pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__, 186 spu->pid, spu->number, spu->node); 187 188 spu_remove_from_active_list(spu); 189 spu_switch_notify(spu, NULL); 190 spu_unmap_mappings(ctx); 191 spu_save(&ctx->csa, spu); 192 spu->timestamp = jiffies; 193 ctx->state = SPU_STATE_SAVED; 194 spu->ibox_callback = NULL; 195 spu->wbox_callback = NULL; 196 spu->stop_callback = NULL; 197 spu->mfc_callback = NULL; 198 spu->dma_callback = NULL; 199 spu_associate_mm(spu, NULL); 200 spu->pid = 0; 201 ctx->ops = &spu_backing_ops; 202 ctx->spu = NULL; 203 spu->flags = 0; 204 spu->ctx = NULL; 205 } 206 207 /** 208 * spu_add_to_rq - add a context to the runqueue 209 * @ctx: context to add 210 */ 211 static void spu_add_to_rq(struct spu_context *ctx) 212 { 213 spin_lock(&spu_prio->runq_lock); 214 list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]); 215 set_bit(ctx->prio, spu_prio->bitmap); 216 spin_unlock(&spu_prio->runq_lock); 217 } 218 219 /** 220 * spu_del_from_rq - remove a context from the runqueue 221 * @ctx: context to remove 222 */ 223 static void spu_del_from_rq(struct spu_context *ctx) 224 { 225 spin_lock(&spu_prio->runq_lock); 226 list_del_init(&ctx->rq); 227 if (list_empty(&spu_prio->runq[ctx->prio])) 228 clear_bit(ctx->prio, spu_prio->bitmap); 229 spin_unlock(&spu_prio->runq_lock); 230 } 231 232 /** 233 * spu_grab_context - remove one context from the runqueue 234 * @prio: priority of the context to be removed 235 * 236 * This function removes one context from the runqueue for priority @prio. 237 * If there is more than one context with the given priority the first 238 * task on the runqueue will be taken. 239 * 240 * Returns the spu_context it just removed. 241 * 242 * Must be called with spu_prio->runq_lock held. 243 */ 244 static struct spu_context *spu_grab_context(int prio) 245 { 246 struct list_head *rq = &spu_prio->runq[prio]; 247 248 if (list_empty(rq)) 249 return NULL; 250 return list_entry(rq->next, struct spu_context, rq); 251 } 252 253 static void spu_prio_wait(struct spu_context *ctx) 254 { 255 DEFINE_WAIT(wait); 256 257 prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE); 258 if (!signal_pending(current)) { 259 mutex_unlock(&ctx->state_mutex); 260 schedule(); 261 mutex_lock(&ctx->state_mutex); 262 } 263 __set_current_state(TASK_RUNNING); 264 remove_wait_queue(&ctx->stop_wq, &wait); 265 } 266 267 /** 268 * spu_reschedule - try to find a runnable context for a spu 269 * @spu: spu available 270 * 271 * This function is called whenever a spu becomes idle. It looks for the 272 * most suitable runnable spu context and schedules it for execution. 273 */ 274 static void spu_reschedule(struct spu *spu) 275 { 276 int best; 277 278 spu_free(spu); 279 280 spin_lock(&spu_prio->runq_lock); 281 best = sched_find_first_bit(spu_prio->bitmap); 282 if (best < MAX_PRIO) { 283 struct spu_context *ctx = spu_grab_context(best); 284 if (ctx) 285 wake_up(&ctx->stop_wq); 286 } 287 spin_unlock(&spu_prio->runq_lock); 288 } 289 290 static struct spu *spu_get_idle(struct spu_context *ctx) 291 { 292 struct spu *spu = NULL; 293 int node = cpu_to_node(raw_smp_processor_id()); 294 int n; 295 296 for (n = 0; n < MAX_NUMNODES; n++, node++) { 297 node = (node < MAX_NUMNODES) ? node : 0; 298 if (!node_allowed(node)) 299 continue; 300 spu = spu_alloc_node(node); 301 if (spu) 302 break; 303 } 304 return spu; 305 } 306 307 /** 308 * find_victim - find a lower priority context to preempt 309 * @ctx: canidate context for running 310 * 311 * Returns the freed physical spu to run the new context on. 312 */ 313 static struct spu *find_victim(struct spu_context *ctx) 314 { 315 struct spu_context *victim = NULL; 316 struct spu *spu; 317 int node, n; 318 319 /* 320 * Look for a possible preemption candidate on the local node first. 321 * If there is no candidate look at the other nodes. This isn't 322 * exactly fair, but so far the whole spu schedule tries to keep 323 * a strong node affinity. We might want to fine-tune this in 324 * the future. 325 */ 326 restart: 327 node = cpu_to_node(raw_smp_processor_id()); 328 for (n = 0; n < MAX_NUMNODES; n++, node++) { 329 node = (node < MAX_NUMNODES) ? node : 0; 330 if (!node_allowed(node)) 331 continue; 332 333 mutex_lock(&spu_prio->active_mutex[node]); 334 list_for_each_entry(spu, &spu_prio->active_list[node], list) { 335 struct spu_context *tmp = spu->ctx; 336 337 if (tmp->rt_priority < ctx->rt_priority && 338 (!victim || tmp->rt_priority < victim->rt_priority)) 339 victim = spu->ctx; 340 } 341 mutex_unlock(&spu_prio->active_mutex[node]); 342 343 if (victim) { 344 /* 345 * This nests ctx->state_mutex, but we always lock 346 * higher priority contexts before lower priority 347 * ones, so this is safe until we introduce 348 * priority inheritance schemes. 349 */ 350 if (!mutex_trylock(&victim->state_mutex)) { 351 victim = NULL; 352 goto restart; 353 } 354 355 spu = victim->spu; 356 if (!spu) { 357 /* 358 * This race can happen because we've dropped 359 * the active list mutex. No a problem, just 360 * restart the search. 361 */ 362 mutex_unlock(&victim->state_mutex); 363 victim = NULL; 364 goto restart; 365 } 366 spu_unbind_context(spu, victim); 367 mutex_unlock(&victim->state_mutex); 368 return spu; 369 } 370 } 371 372 return NULL; 373 } 374 375 /** 376 * spu_activate - find a free spu for a context and execute it 377 * @ctx: spu context to schedule 378 * @flags: flags (currently ignored) 379 * 380 * Tries to find a free spu to run @ctx. If no free spu is availble 381 * add the context to the runqueue so it gets woken up once an spu 382 * is available. 383 */ 384 int spu_activate(struct spu_context *ctx, unsigned long flags) 385 { 386 387 if (ctx->spu) 388 return 0; 389 390 do { 391 struct spu *spu; 392 393 spu = spu_get_idle(ctx); 394 /* 395 * If this is a realtime thread we try to get it running by 396 * preempting a lower priority thread. 397 */ 398 if (!spu && ctx->rt_priority) 399 spu = find_victim(ctx); 400 if (spu) { 401 spu_bind_context(spu, ctx); 402 return 0; 403 } 404 405 spu_add_to_rq(ctx); 406 spu_prio_wait(ctx); 407 spu_del_from_rq(ctx); 408 } while (!signal_pending(current)); 409 410 return -ERESTARTSYS; 411 } 412 413 /** 414 * spu_deactivate - unbind a context from it's physical spu 415 * @ctx: spu context to unbind 416 * 417 * Unbind @ctx from the physical spu it is running on and schedule 418 * the highest priority context to run on the freed physical spu. 419 */ 420 void spu_deactivate(struct spu_context *ctx) 421 { 422 struct spu *spu = ctx->spu; 423 424 if (spu) { 425 spu_unbind_context(spu, ctx); 426 spu_reschedule(spu); 427 } 428 } 429 430 /** 431 * spu_yield - yield a physical spu if others are waiting 432 * @ctx: spu context to yield 433 * 434 * Check if there is a higher priority context waiting and if yes 435 * unbind @ctx from the physical spu and schedule the highest 436 * priority context to run on the freed physical spu instead. 437 */ 438 void spu_yield(struct spu_context *ctx) 439 { 440 struct spu *spu; 441 int need_yield = 0; 442 443 if (mutex_trylock(&ctx->state_mutex)) { 444 if ((spu = ctx->spu) != NULL) { 445 int best = sched_find_first_bit(spu_prio->bitmap); 446 if (best < MAX_PRIO) { 447 pr_debug("%s: yielding SPU %d NODE %d\n", 448 __FUNCTION__, spu->number, spu->node); 449 spu_deactivate(ctx); 450 need_yield = 1; 451 } 452 } 453 mutex_unlock(&ctx->state_mutex); 454 } 455 if (unlikely(need_yield)) 456 yield(); 457 } 458 459 int __init spu_sched_init(void) 460 { 461 int i; 462 463 spu_sched_wq = create_singlethread_workqueue("spusched"); 464 if (!spu_sched_wq) 465 return 1; 466 467 spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL); 468 if (!spu_prio) { 469 printk(KERN_WARNING "%s: Unable to allocate priority queue.\n", 470 __FUNCTION__); 471 destroy_workqueue(spu_sched_wq); 472 return 1; 473 } 474 for (i = 0; i < MAX_PRIO; i++) { 475 INIT_LIST_HEAD(&spu_prio->runq[i]); 476 __clear_bit(i, spu_prio->bitmap); 477 } 478 __set_bit(MAX_PRIO, spu_prio->bitmap); 479 for (i = 0; i < MAX_NUMNODES; i++) { 480 mutex_init(&spu_prio->active_mutex[i]); 481 INIT_LIST_HEAD(&spu_prio->active_list[i]); 482 } 483 spin_lock_init(&spu_prio->runq_lock); 484 return 0; 485 } 486 487 void __exit spu_sched_exit(void) 488 { 489 struct spu *spu, *tmp; 490 int node; 491 492 for (node = 0; node < MAX_NUMNODES; node++) { 493 mutex_lock(&spu_prio->active_mutex[node]); 494 list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node], 495 list) { 496 list_del_init(&spu->list); 497 spu_free(spu); 498 } 499 mutex_unlock(&spu_prio->active_mutex[node]); 500 } 501 kfree(spu_prio); 502 destroy_workqueue(spu_sched_wq); 503 } 504