1 /* sched.c - SPU scheduler. 2 * 3 * Copyright (C) IBM 2005 4 * Author: Mark Nutter <mnutter@us.ibm.com> 5 * 6 * SPU scheduler, based on Linux thread priority. For now use 7 * a simple "cooperative" yield model with no preemption. SPU 8 * scheduling will eventually be preemptive: When a thread with 9 * a higher static priority gets ready to run, then an active SPU 10 * context will be preempted and returned to the waitq. 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License as published by 14 * the Free Software Foundation; either version 2, or (at your option) 15 * any later version. 16 * 17 * This program is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 * GNU General Public License for more details. 21 * 22 * You should have received a copy of the GNU General Public License 23 * along with this program; if not, write to the Free Software 24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 25 */ 26 27 #undef DEBUG 28 29 #include <linux/config.h> 30 #include <linux/module.h> 31 #include <linux/errno.h> 32 #include <linux/sched.h> 33 #include <linux/kernel.h> 34 #include <linux/mm.h> 35 #include <linux/completion.h> 36 #include <linux/vmalloc.h> 37 #include <linux/smp.h> 38 #include <linux/smp_lock.h> 39 #include <linux/stddef.h> 40 #include <linux/unistd.h> 41 42 #include <asm/io.h> 43 #include <asm/mmu_context.h> 44 #include <asm/spu.h> 45 #include <asm/spu_csa.h> 46 #include "spufs.h" 47 48 #define SPU_MIN_TIMESLICE (100 * HZ / 1000) 49 50 #define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1) 51 struct spu_prio_array { 52 atomic_t nr_blocked; 53 unsigned long bitmap[SPU_BITMAP_SIZE]; 54 wait_queue_head_t waitq[MAX_PRIO]; 55 }; 56 57 /* spu_runqueue - This is the main runqueue data structure for SPUs. */ 58 struct spu_runqueue { 59 struct semaphore sem; 60 unsigned long nr_active; 61 unsigned long nr_idle; 62 unsigned long nr_switches; 63 struct list_head active_list; 64 struct list_head idle_list; 65 struct spu_prio_array prio; 66 }; 67 68 static struct spu_runqueue *spu_runqueues = NULL; 69 70 static inline struct spu_runqueue *spu_rq(void) 71 { 72 /* Future: make this a per-NODE array, 73 * and use cpu_to_node(smp_processor_id()) 74 */ 75 return spu_runqueues; 76 } 77 78 static inline struct spu *del_idle(struct spu_runqueue *rq) 79 { 80 struct spu *spu; 81 82 BUG_ON(rq->nr_idle <= 0); 83 BUG_ON(list_empty(&rq->idle_list)); 84 /* Future: Move SPU out of low-power SRI state. */ 85 spu = list_entry(rq->idle_list.next, struct spu, sched_list); 86 list_del_init(&spu->sched_list); 87 rq->nr_idle--; 88 return spu; 89 } 90 91 static inline void del_active(struct spu_runqueue *rq, struct spu *spu) 92 { 93 BUG_ON(rq->nr_active <= 0); 94 BUG_ON(list_empty(&rq->active_list)); 95 list_del_init(&spu->sched_list); 96 rq->nr_active--; 97 } 98 99 static inline void add_idle(struct spu_runqueue *rq, struct spu *spu) 100 { 101 /* Future: Put SPU into low-power SRI state. */ 102 list_add_tail(&spu->sched_list, &rq->idle_list); 103 rq->nr_idle++; 104 } 105 106 static inline void add_active(struct spu_runqueue *rq, struct spu *spu) 107 { 108 rq->nr_active++; 109 rq->nr_switches++; 110 list_add_tail(&spu->sched_list, &rq->active_list); 111 } 112 113 static void prio_wakeup(struct spu_runqueue *rq) 114 { 115 if (atomic_read(&rq->prio.nr_blocked) && rq->nr_idle) { 116 int best = sched_find_first_bit(rq->prio.bitmap); 117 if (best < MAX_PRIO) { 118 wait_queue_head_t *wq = &rq->prio.waitq[best]; 119 wake_up_interruptible_nr(wq, 1); 120 } 121 } 122 } 123 124 static void prio_wait(struct spu_runqueue *rq, struct spu_context *ctx, 125 u64 flags) 126 { 127 int prio = current->prio; 128 wait_queue_head_t *wq = &rq->prio.waitq[prio]; 129 DEFINE_WAIT(wait); 130 131 __set_bit(prio, rq->prio.bitmap); 132 atomic_inc(&rq->prio.nr_blocked); 133 prepare_to_wait_exclusive(wq, &wait, TASK_INTERRUPTIBLE); 134 if (!signal_pending(current)) { 135 up(&rq->sem); 136 up_write(&ctx->state_sema); 137 pr_debug("%s: pid=%d prio=%d\n", __FUNCTION__, 138 current->pid, current->prio); 139 schedule(); 140 down_write(&ctx->state_sema); 141 down(&rq->sem); 142 } 143 finish_wait(wq, &wait); 144 atomic_dec(&rq->prio.nr_blocked); 145 if (!waitqueue_active(wq)) 146 __clear_bit(prio, rq->prio.bitmap); 147 } 148 149 static inline int is_best_prio(struct spu_runqueue *rq) 150 { 151 int best_prio; 152 153 best_prio = sched_find_first_bit(rq->prio.bitmap); 154 return (current->prio < best_prio) ? 1 : 0; 155 } 156 157 static inline void mm_needs_global_tlbie(struct mm_struct *mm) 158 { 159 /* Global TLBIE broadcast required with SPEs. */ 160 #if (NR_CPUS > 1) 161 __cpus_setall(&mm->cpu_vm_mask, NR_CPUS); 162 #else 163 __cpus_setall(&mm->cpu_vm_mask, NR_CPUS+1); /* is this ok? */ 164 #endif 165 } 166 167 static inline void bind_context(struct spu *spu, struct spu_context *ctx) 168 { 169 pr_debug("%s: pid=%d SPU=%d\n", __FUNCTION__, current->pid, 170 spu->number); 171 spu->ctx = ctx; 172 spu->flags = 0; 173 ctx->flags = 0; 174 ctx->spu = spu; 175 ctx->ops = &spu_hw_ops; 176 spu->pid = current->pid; 177 spu->prio = current->prio; 178 spu->mm = ctx->owner; 179 mm_needs_global_tlbie(spu->mm); 180 spu->ibox_callback = spufs_ibox_callback; 181 spu->wbox_callback = spufs_wbox_callback; 182 spu->stop_callback = spufs_stop_callback; 183 spu->mfc_callback = spufs_mfc_callback; 184 mb(); 185 spu_unmap_mappings(ctx); 186 spu_restore(&ctx->csa, spu); 187 spu->timestamp = jiffies; 188 } 189 190 static inline void unbind_context(struct spu *spu, struct spu_context *ctx) 191 { 192 pr_debug("%s: unbind pid=%d SPU=%d\n", __FUNCTION__, 193 spu->pid, spu->number); 194 spu_unmap_mappings(ctx); 195 spu_save(&ctx->csa, spu); 196 spu->timestamp = jiffies; 197 ctx->state = SPU_STATE_SAVED; 198 spu->ibox_callback = NULL; 199 spu->wbox_callback = NULL; 200 spu->stop_callback = NULL; 201 spu->mfc_callback = NULL; 202 spu->mm = NULL; 203 spu->pid = 0; 204 spu->prio = MAX_PRIO; 205 ctx->ops = &spu_backing_ops; 206 ctx->spu = NULL; 207 ctx->flags = 0; 208 spu->flags = 0; 209 spu->ctx = NULL; 210 } 211 212 static void spu_reaper(void *data) 213 { 214 struct spu_context *ctx = data; 215 struct spu *spu; 216 217 down_write(&ctx->state_sema); 218 spu = ctx->spu; 219 if (spu && test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) { 220 if (atomic_read(&spu->rq->prio.nr_blocked)) { 221 pr_debug("%s: spu=%d\n", __func__, spu->number); 222 ctx->ops->runcntl_stop(ctx); 223 spu_deactivate(ctx); 224 wake_up_all(&ctx->stop_wq); 225 } else { 226 clear_bit(SPU_CONTEXT_PREEMPT, &ctx->flags); 227 } 228 } 229 up_write(&ctx->state_sema); 230 put_spu_context(ctx); 231 } 232 233 static void schedule_spu_reaper(struct spu_runqueue *rq, struct spu *spu) 234 { 235 struct spu_context *ctx = get_spu_context(spu->ctx); 236 unsigned long now = jiffies; 237 unsigned long expire = spu->timestamp + SPU_MIN_TIMESLICE; 238 239 set_bit(SPU_CONTEXT_PREEMPT, &ctx->flags); 240 INIT_WORK(&ctx->reap_work, spu_reaper, ctx); 241 if (time_after(now, expire)) 242 schedule_work(&ctx->reap_work); 243 else 244 schedule_delayed_work(&ctx->reap_work, expire - now); 245 } 246 247 static void check_preempt_active(struct spu_runqueue *rq) 248 { 249 struct list_head *p; 250 struct spu *worst = NULL; 251 252 list_for_each(p, &rq->active_list) { 253 struct spu *spu = list_entry(p, struct spu, sched_list); 254 struct spu_context *ctx = spu->ctx; 255 if (!test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) { 256 if (!worst || (spu->prio > worst->prio)) { 257 worst = spu; 258 } 259 } 260 } 261 if (worst && (current->prio < worst->prio)) 262 schedule_spu_reaper(rq, worst); 263 } 264 265 static struct spu *get_idle_spu(struct spu_context *ctx, u64 flags) 266 { 267 struct spu_runqueue *rq; 268 struct spu *spu = NULL; 269 270 rq = spu_rq(); 271 down(&rq->sem); 272 for (;;) { 273 if (rq->nr_idle > 0) { 274 if (is_best_prio(rq)) { 275 /* Fall through. */ 276 spu = del_idle(rq); 277 break; 278 } else { 279 prio_wakeup(rq); 280 up(&rq->sem); 281 yield(); 282 if (signal_pending(current)) { 283 return NULL; 284 } 285 rq = spu_rq(); 286 down(&rq->sem); 287 continue; 288 } 289 } else { 290 check_preempt_active(rq); 291 prio_wait(rq, ctx, flags); 292 if (signal_pending(current)) { 293 prio_wakeup(rq); 294 spu = NULL; 295 break; 296 } 297 continue; 298 } 299 } 300 up(&rq->sem); 301 return spu; 302 } 303 304 static void put_idle_spu(struct spu *spu) 305 { 306 struct spu_runqueue *rq = spu->rq; 307 308 down(&rq->sem); 309 add_idle(rq, spu); 310 prio_wakeup(rq); 311 up(&rq->sem); 312 } 313 314 static int get_active_spu(struct spu *spu) 315 { 316 struct spu_runqueue *rq = spu->rq; 317 struct list_head *p; 318 struct spu *tmp; 319 int rc = 0; 320 321 down(&rq->sem); 322 list_for_each(p, &rq->active_list) { 323 tmp = list_entry(p, struct spu, sched_list); 324 if (tmp == spu) { 325 del_active(rq, spu); 326 rc = 1; 327 break; 328 } 329 } 330 up(&rq->sem); 331 return rc; 332 } 333 334 static void put_active_spu(struct spu *spu) 335 { 336 struct spu_runqueue *rq = spu->rq; 337 338 down(&rq->sem); 339 add_active(rq, spu); 340 up(&rq->sem); 341 } 342 343 /* Lock order: 344 * spu_activate() & spu_deactivate() require the 345 * caller to have down_write(&ctx->state_sema). 346 * 347 * The rq->sem is breifly held (inside or outside a 348 * given ctx lock) for list management, but is never 349 * held during save/restore. 350 */ 351 352 int spu_activate(struct spu_context *ctx, u64 flags) 353 { 354 struct spu *spu; 355 356 if (ctx->spu) 357 return 0; 358 spu = get_idle_spu(ctx, flags); 359 if (!spu) 360 return (signal_pending(current)) ? -ERESTARTSYS : -EAGAIN; 361 bind_context(spu, ctx); 362 /* 363 * We're likely to wait for interrupts on the same 364 * CPU that we are now on, so send them here. 365 */ 366 spu_irq_setaffinity(spu, raw_smp_processor_id()); 367 put_active_spu(spu); 368 return 0; 369 } 370 371 void spu_deactivate(struct spu_context *ctx) 372 { 373 struct spu *spu; 374 int needs_idle; 375 376 spu = ctx->spu; 377 if (!spu) 378 return; 379 needs_idle = get_active_spu(spu); 380 unbind_context(spu, ctx); 381 if (needs_idle) 382 put_idle_spu(spu); 383 } 384 385 void spu_yield(struct spu_context *ctx) 386 { 387 struct spu *spu; 388 int need_yield = 0; 389 390 down_write(&ctx->state_sema); 391 spu = ctx->spu; 392 if (spu && (sched_find_first_bit(spu->rq->prio.bitmap) < MAX_PRIO)) { 393 pr_debug("%s: yielding SPU %d\n", __FUNCTION__, spu->number); 394 spu_deactivate(ctx); 395 ctx->state = SPU_STATE_SAVED; 396 need_yield = 1; 397 } else if (spu) { 398 spu->prio = MAX_PRIO; 399 } 400 up_write(&ctx->state_sema); 401 if (unlikely(need_yield)) 402 yield(); 403 } 404 405 int __init spu_sched_init(void) 406 { 407 struct spu_runqueue *rq; 408 struct spu *spu; 409 int i; 410 411 rq = spu_runqueues = kmalloc(sizeof(struct spu_runqueue), GFP_KERNEL); 412 if (!rq) { 413 printk(KERN_WARNING "%s: Unable to allocate runqueues.\n", 414 __FUNCTION__); 415 return 1; 416 } 417 memset(rq, 0, sizeof(struct spu_runqueue)); 418 init_MUTEX(&rq->sem); 419 INIT_LIST_HEAD(&rq->active_list); 420 INIT_LIST_HEAD(&rq->idle_list); 421 rq->nr_active = 0; 422 rq->nr_idle = 0; 423 rq->nr_switches = 0; 424 atomic_set(&rq->prio.nr_blocked, 0); 425 for (i = 0; i < MAX_PRIO; i++) { 426 init_waitqueue_head(&rq->prio.waitq[i]); 427 __clear_bit(i, rq->prio.bitmap); 428 } 429 __set_bit(MAX_PRIO, rq->prio.bitmap); 430 for (;;) { 431 spu = spu_alloc(); 432 if (!spu) 433 break; 434 pr_debug("%s: adding SPU[%d]\n", __FUNCTION__, spu->number); 435 add_idle(rq, spu); 436 spu->rq = rq; 437 spu->timestamp = jiffies; 438 } 439 if (!rq->nr_idle) { 440 printk(KERN_WARNING "%s: No available SPUs.\n", __FUNCTION__); 441 kfree(rq); 442 return 1; 443 } 444 return 0; 445 } 446 447 void __exit spu_sched_exit(void) 448 { 449 struct spu_runqueue *rq = spu_rq(); 450 struct spu *spu; 451 452 if (!rq) { 453 printk(KERN_WARNING "%s: no runqueues!\n", __FUNCTION__); 454 return; 455 } 456 while (rq->nr_idle > 0) { 457 spu = del_idle(rq); 458 if (!spu) 459 break; 460 spu_free(spu); 461 } 462 kfree(rq); 463 } 464