1 /* sched.c - SPU scheduler. 2 * 3 * Copyright (C) IBM 2005 4 * Author: Mark Nutter <mnutter@us.ibm.com> 5 * 6 * SPU scheduler, based on Linux thread priority. For now use 7 * a simple "cooperative" yield model with no preemption. SPU 8 * scheduling will eventually be preemptive: When a thread with 9 * a higher static priority gets ready to run, then an active SPU 10 * context will be preempted and returned to the waitq. 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License as published by 14 * the Free Software Foundation; either version 2, or (at your option) 15 * any later version. 16 * 17 * This program is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 * GNU General Public License for more details. 21 * 22 * You should have received a copy of the GNU General Public License 23 * along with this program; if not, write to the Free Software 24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 25 */ 26 27 #undef DEBUG 28 29 #include <linux/config.h> 30 #include <linux/module.h> 31 #include <linux/errno.h> 32 #include <linux/sched.h> 33 #include <linux/kernel.h> 34 #include <linux/mm.h> 35 #include <linux/completion.h> 36 #include <linux/vmalloc.h> 37 #include <linux/smp.h> 38 #include <linux/smp_lock.h> 39 #include <linux/stddef.h> 40 #include <linux/unistd.h> 41 42 #include <asm/io.h> 43 #include <asm/mmu_context.h> 44 #include <asm/spu.h> 45 #include <asm/spu_csa.h> 46 #include <asm/spu_priv1.h> 47 #include "spufs.h" 48 49 #define SPU_MIN_TIMESLICE (100 * HZ / 1000) 50 51 #define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1) 52 struct spu_prio_array { 53 atomic_t nr_blocked; 54 unsigned long bitmap[SPU_BITMAP_SIZE]; 55 wait_queue_head_t waitq[MAX_PRIO]; 56 }; 57 58 /* spu_runqueue - This is the main runqueue data structure for SPUs. */ 59 struct spu_runqueue { 60 struct semaphore sem; 61 unsigned long nr_active; 62 unsigned long nr_idle; 63 unsigned long nr_switches; 64 struct list_head active_list; 65 struct list_head idle_list; 66 struct spu_prio_array prio; 67 }; 68 69 static struct spu_runqueue *spu_runqueues = NULL; 70 71 static inline struct spu_runqueue *spu_rq(void) 72 { 73 /* Future: make this a per-NODE array, 74 * and use cpu_to_node(smp_processor_id()) 75 */ 76 return spu_runqueues; 77 } 78 79 static inline struct spu *del_idle(struct spu_runqueue *rq) 80 { 81 struct spu *spu; 82 83 BUG_ON(rq->nr_idle <= 0); 84 BUG_ON(list_empty(&rq->idle_list)); 85 /* Future: Move SPU out of low-power SRI state. */ 86 spu = list_entry(rq->idle_list.next, struct spu, sched_list); 87 list_del_init(&spu->sched_list); 88 rq->nr_idle--; 89 return spu; 90 } 91 92 static inline void del_active(struct spu_runqueue *rq, struct spu *spu) 93 { 94 BUG_ON(rq->nr_active <= 0); 95 BUG_ON(list_empty(&rq->active_list)); 96 list_del_init(&spu->sched_list); 97 rq->nr_active--; 98 } 99 100 static inline void add_idle(struct spu_runqueue *rq, struct spu *spu) 101 { 102 /* Future: Put SPU into low-power SRI state. */ 103 list_add_tail(&spu->sched_list, &rq->idle_list); 104 rq->nr_idle++; 105 } 106 107 static inline void add_active(struct spu_runqueue *rq, struct spu *spu) 108 { 109 rq->nr_active++; 110 rq->nr_switches++; 111 list_add_tail(&spu->sched_list, &rq->active_list); 112 } 113 114 static void prio_wakeup(struct spu_runqueue *rq) 115 { 116 if (atomic_read(&rq->prio.nr_blocked) && rq->nr_idle) { 117 int best = sched_find_first_bit(rq->prio.bitmap); 118 if (best < MAX_PRIO) { 119 wait_queue_head_t *wq = &rq->prio.waitq[best]; 120 wake_up_interruptible_nr(wq, 1); 121 } 122 } 123 } 124 125 static void prio_wait(struct spu_runqueue *rq, struct spu_context *ctx, 126 u64 flags) 127 { 128 int prio = current->prio; 129 wait_queue_head_t *wq = &rq->prio.waitq[prio]; 130 DEFINE_WAIT(wait); 131 132 __set_bit(prio, rq->prio.bitmap); 133 atomic_inc(&rq->prio.nr_blocked); 134 prepare_to_wait_exclusive(wq, &wait, TASK_INTERRUPTIBLE); 135 if (!signal_pending(current)) { 136 up(&rq->sem); 137 up_write(&ctx->state_sema); 138 pr_debug("%s: pid=%d prio=%d\n", __FUNCTION__, 139 current->pid, current->prio); 140 schedule(); 141 down_write(&ctx->state_sema); 142 down(&rq->sem); 143 } 144 finish_wait(wq, &wait); 145 atomic_dec(&rq->prio.nr_blocked); 146 if (!waitqueue_active(wq)) 147 __clear_bit(prio, rq->prio.bitmap); 148 } 149 150 static inline int is_best_prio(struct spu_runqueue *rq) 151 { 152 int best_prio; 153 154 best_prio = sched_find_first_bit(rq->prio.bitmap); 155 return (current->prio < best_prio) ? 1 : 0; 156 } 157 158 static inline void mm_needs_global_tlbie(struct mm_struct *mm) 159 { 160 /* Global TLBIE broadcast required with SPEs. */ 161 #if (NR_CPUS > 1) 162 __cpus_setall(&mm->cpu_vm_mask, NR_CPUS); 163 #else 164 __cpus_setall(&mm->cpu_vm_mask, NR_CPUS+1); /* is this ok? */ 165 #endif 166 } 167 168 static inline void bind_context(struct spu *spu, struct spu_context *ctx) 169 { 170 pr_debug("%s: pid=%d SPU=%d\n", __FUNCTION__, current->pid, 171 spu->number); 172 spu->ctx = ctx; 173 spu->flags = 0; 174 ctx->flags = 0; 175 ctx->spu = spu; 176 ctx->ops = &spu_hw_ops; 177 spu->pid = current->pid; 178 spu->prio = current->prio; 179 spu->mm = ctx->owner; 180 mm_needs_global_tlbie(spu->mm); 181 spu->ibox_callback = spufs_ibox_callback; 182 spu->wbox_callback = spufs_wbox_callback; 183 spu->stop_callback = spufs_stop_callback; 184 spu->mfc_callback = spufs_mfc_callback; 185 mb(); 186 spu_unmap_mappings(ctx); 187 spu_restore(&ctx->csa, spu); 188 spu->timestamp = jiffies; 189 } 190 191 static inline void unbind_context(struct spu *spu, struct spu_context *ctx) 192 { 193 pr_debug("%s: unbind pid=%d SPU=%d\n", __FUNCTION__, 194 spu->pid, spu->number); 195 spu_unmap_mappings(ctx); 196 spu_save(&ctx->csa, spu); 197 spu->timestamp = jiffies; 198 ctx->state = SPU_STATE_SAVED; 199 spu->ibox_callback = NULL; 200 spu->wbox_callback = NULL; 201 spu->stop_callback = NULL; 202 spu->mfc_callback = NULL; 203 spu->mm = NULL; 204 spu->pid = 0; 205 spu->prio = MAX_PRIO; 206 ctx->ops = &spu_backing_ops; 207 ctx->spu = NULL; 208 ctx->flags = 0; 209 spu->flags = 0; 210 spu->ctx = NULL; 211 } 212 213 static void spu_reaper(void *data) 214 { 215 struct spu_context *ctx = data; 216 struct spu *spu; 217 218 down_write(&ctx->state_sema); 219 spu = ctx->spu; 220 if (spu && test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) { 221 if (atomic_read(&spu->rq->prio.nr_blocked)) { 222 pr_debug("%s: spu=%d\n", __func__, spu->number); 223 ctx->ops->runcntl_stop(ctx); 224 spu_deactivate(ctx); 225 wake_up_all(&ctx->stop_wq); 226 } else { 227 clear_bit(SPU_CONTEXT_PREEMPT, &ctx->flags); 228 } 229 } 230 up_write(&ctx->state_sema); 231 put_spu_context(ctx); 232 } 233 234 static void schedule_spu_reaper(struct spu_runqueue *rq, struct spu *spu) 235 { 236 struct spu_context *ctx = get_spu_context(spu->ctx); 237 unsigned long now = jiffies; 238 unsigned long expire = spu->timestamp + SPU_MIN_TIMESLICE; 239 240 set_bit(SPU_CONTEXT_PREEMPT, &ctx->flags); 241 INIT_WORK(&ctx->reap_work, spu_reaper, ctx); 242 if (time_after(now, expire)) 243 schedule_work(&ctx->reap_work); 244 else 245 schedule_delayed_work(&ctx->reap_work, expire - now); 246 } 247 248 static void check_preempt_active(struct spu_runqueue *rq) 249 { 250 struct list_head *p; 251 struct spu *worst = NULL; 252 253 list_for_each(p, &rq->active_list) { 254 struct spu *spu = list_entry(p, struct spu, sched_list); 255 struct spu_context *ctx = spu->ctx; 256 if (!test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) { 257 if (!worst || (spu->prio > worst->prio)) { 258 worst = spu; 259 } 260 } 261 } 262 if (worst && (current->prio < worst->prio)) 263 schedule_spu_reaper(rq, worst); 264 } 265 266 static struct spu *get_idle_spu(struct spu_context *ctx, u64 flags) 267 { 268 struct spu_runqueue *rq; 269 struct spu *spu = NULL; 270 271 rq = spu_rq(); 272 down(&rq->sem); 273 for (;;) { 274 if (rq->nr_idle > 0) { 275 if (is_best_prio(rq)) { 276 /* Fall through. */ 277 spu = del_idle(rq); 278 break; 279 } else { 280 prio_wakeup(rq); 281 up(&rq->sem); 282 yield(); 283 if (signal_pending(current)) { 284 return NULL; 285 } 286 rq = spu_rq(); 287 down(&rq->sem); 288 continue; 289 } 290 } else { 291 check_preempt_active(rq); 292 prio_wait(rq, ctx, flags); 293 if (signal_pending(current)) { 294 prio_wakeup(rq); 295 spu = NULL; 296 break; 297 } 298 continue; 299 } 300 } 301 up(&rq->sem); 302 return spu; 303 } 304 305 static void put_idle_spu(struct spu *spu) 306 { 307 struct spu_runqueue *rq = spu->rq; 308 309 down(&rq->sem); 310 add_idle(rq, spu); 311 prio_wakeup(rq); 312 up(&rq->sem); 313 } 314 315 static int get_active_spu(struct spu *spu) 316 { 317 struct spu_runqueue *rq = spu->rq; 318 struct list_head *p; 319 struct spu *tmp; 320 int rc = 0; 321 322 down(&rq->sem); 323 list_for_each(p, &rq->active_list) { 324 tmp = list_entry(p, struct spu, sched_list); 325 if (tmp == spu) { 326 del_active(rq, spu); 327 rc = 1; 328 break; 329 } 330 } 331 up(&rq->sem); 332 return rc; 333 } 334 335 static void put_active_spu(struct spu *spu) 336 { 337 struct spu_runqueue *rq = spu->rq; 338 339 down(&rq->sem); 340 add_active(rq, spu); 341 up(&rq->sem); 342 } 343 344 /* Lock order: 345 * spu_activate() & spu_deactivate() require the 346 * caller to have down_write(&ctx->state_sema). 347 * 348 * The rq->sem is breifly held (inside or outside a 349 * given ctx lock) for list management, but is never 350 * held during save/restore. 351 */ 352 353 int spu_activate(struct spu_context *ctx, u64 flags) 354 { 355 struct spu *spu; 356 357 if (ctx->spu) 358 return 0; 359 spu = get_idle_spu(ctx, flags); 360 if (!spu) 361 return (signal_pending(current)) ? -ERESTARTSYS : -EAGAIN; 362 bind_context(spu, ctx); 363 /* 364 * We're likely to wait for interrupts on the same 365 * CPU that we are now on, so send them here. 366 */ 367 spu_cpu_affinity_set(spu, raw_smp_processor_id()); 368 put_active_spu(spu); 369 return 0; 370 } 371 372 void spu_deactivate(struct spu_context *ctx) 373 { 374 struct spu *spu; 375 int needs_idle; 376 377 spu = ctx->spu; 378 if (!spu) 379 return; 380 needs_idle = get_active_spu(spu); 381 unbind_context(spu, ctx); 382 if (needs_idle) 383 put_idle_spu(spu); 384 } 385 386 void spu_yield(struct spu_context *ctx) 387 { 388 struct spu *spu; 389 int need_yield = 0; 390 391 down_write(&ctx->state_sema); 392 spu = ctx->spu; 393 if (spu && (sched_find_first_bit(spu->rq->prio.bitmap) < MAX_PRIO)) { 394 pr_debug("%s: yielding SPU %d\n", __FUNCTION__, spu->number); 395 spu_deactivate(ctx); 396 ctx->state = SPU_STATE_SAVED; 397 need_yield = 1; 398 } else if (spu) { 399 spu->prio = MAX_PRIO; 400 } 401 up_write(&ctx->state_sema); 402 if (unlikely(need_yield)) 403 yield(); 404 } 405 406 int __init spu_sched_init(void) 407 { 408 struct spu_runqueue *rq; 409 struct spu *spu; 410 int i; 411 412 rq = spu_runqueues = kmalloc(sizeof(struct spu_runqueue), GFP_KERNEL); 413 if (!rq) { 414 printk(KERN_WARNING "%s: Unable to allocate runqueues.\n", 415 __FUNCTION__); 416 return 1; 417 } 418 memset(rq, 0, sizeof(struct spu_runqueue)); 419 init_MUTEX(&rq->sem); 420 INIT_LIST_HEAD(&rq->active_list); 421 INIT_LIST_HEAD(&rq->idle_list); 422 rq->nr_active = 0; 423 rq->nr_idle = 0; 424 rq->nr_switches = 0; 425 atomic_set(&rq->prio.nr_blocked, 0); 426 for (i = 0; i < MAX_PRIO; i++) { 427 init_waitqueue_head(&rq->prio.waitq[i]); 428 __clear_bit(i, rq->prio.bitmap); 429 } 430 __set_bit(MAX_PRIO, rq->prio.bitmap); 431 for (;;) { 432 spu = spu_alloc(); 433 if (!spu) 434 break; 435 pr_debug("%s: adding SPU[%d]\n", __FUNCTION__, spu->number); 436 add_idle(rq, spu); 437 spu->rq = rq; 438 spu->timestamp = jiffies; 439 } 440 if (!rq->nr_idle) { 441 printk(KERN_WARNING "%s: No available SPUs.\n", __FUNCTION__); 442 kfree(rq); 443 return 1; 444 } 445 return 0; 446 } 447 448 void __exit spu_sched_exit(void) 449 { 450 struct spu_runqueue *rq = spu_rq(); 451 struct spu *spu; 452 453 if (!rq) { 454 printk(KERN_WARNING "%s: no runqueues!\n", __FUNCTION__); 455 return; 456 } 457 while (rq->nr_idle > 0) { 458 spu = del_idle(rq); 459 if (!spu) 460 break; 461 spu_free(spu); 462 } 463 kfree(rq); 464 } 465