1 /* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/kthread.h> 25 #include <linux/slab.h> 26 #include <linux/completion.h> 27 28 #include <drm/drm_print.h> 29 #include <drm/gpu_scheduler.h> 30 31 #include "gpu_scheduler_trace.h" 32 33 #define to_drm_sched_job(sched_job) \ 34 container_of((sched_job), struct drm_sched_job, queue_node) 35 36 /** 37 * drm_sched_entity_init - Init a context entity used by scheduler when 38 * submit to HW ring. 39 * 40 * @entity: scheduler entity to init 41 * @rq_list: the list of run queue on which jobs from this 42 * entity can be submitted 43 * @num_rq_list: number of run queue in rq_list 44 * @guilty: atomic_t set to 1 when a job on this queue 45 * is found to be guilty causing a timeout 46 * 47 * Note: the rq_list should have atleast one element to schedule 48 * the entity 49 * 50 * Returns 0 on success or a negative error code on failure. 51 */ 52 int drm_sched_entity_init(struct drm_sched_entity *entity, 53 struct drm_sched_rq **rq_list, 54 unsigned int num_rq_list, 55 atomic_t *guilty) 56 { 57 int i; 58 59 if (!(entity && rq_list && (num_rq_list == 0 || rq_list[0]))) 60 return -EINVAL; 61 62 memset(entity, 0, sizeof(struct drm_sched_entity)); 63 INIT_LIST_HEAD(&entity->list); 64 entity->rq = NULL; 65 entity->guilty = guilty; 66 entity->num_rq_list = num_rq_list; 67 entity->rq_list = kcalloc(num_rq_list, sizeof(struct drm_sched_rq *), 68 GFP_KERNEL); 69 if (!entity->rq_list) 70 return -ENOMEM; 71 72 init_completion(&entity->entity_idle); 73 74 for (i = 0; i < num_rq_list; ++i) 75 entity->rq_list[i] = rq_list[i]; 76 77 if (num_rq_list) 78 entity->rq = rq_list[0]; 79 80 entity->last_scheduled = NULL; 81 82 spin_lock_init(&entity->rq_lock); 83 spsc_queue_init(&entity->job_queue); 84 85 atomic_set(&entity->fence_seq, 0); 86 entity->fence_context = dma_fence_context_alloc(2); 87 88 return 0; 89 } 90 EXPORT_SYMBOL(drm_sched_entity_init); 91 92 /** 93 * drm_sched_entity_is_idle - Check if entity is idle 94 * 95 * @entity: scheduler entity 96 * 97 * Returns true if the entity does not have any unscheduled jobs. 98 */ 99 static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity) 100 { 101 rmb(); /* for list_empty to work without lock */ 102 103 if (list_empty(&entity->list) || 104 spsc_queue_count(&entity->job_queue) == 0) 105 return true; 106 107 return false; 108 } 109 110 /** 111 * drm_sched_entity_is_ready - Check if entity is ready 112 * 113 * @entity: scheduler entity 114 * 115 * Return true if entity could provide a job. 116 */ 117 bool drm_sched_entity_is_ready(struct drm_sched_entity *entity) 118 { 119 if (spsc_queue_peek(&entity->job_queue) == NULL) 120 return false; 121 122 if (READ_ONCE(entity->dependency)) 123 return false; 124 125 return true; 126 } 127 128 /** 129 * drm_sched_entity_get_free_sched - Get the rq from rq_list with least load 130 * 131 * @entity: scheduler entity 132 * 133 * Return the pointer to the rq with least load. 134 */ 135 static struct drm_sched_rq * 136 drm_sched_entity_get_free_sched(struct drm_sched_entity *entity) 137 { 138 struct drm_sched_rq *rq = NULL; 139 unsigned int min_jobs = UINT_MAX, num_jobs; 140 int i; 141 142 for (i = 0; i < entity->num_rq_list; ++i) { 143 struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched; 144 145 if (!entity->rq_list[i]->sched->ready) { 146 DRM_WARN("sched%s is not ready, skipping", sched->name); 147 continue; 148 } 149 150 num_jobs = atomic_read(&sched->num_jobs); 151 if (num_jobs < min_jobs) { 152 min_jobs = num_jobs; 153 rq = entity->rq_list[i]; 154 } 155 } 156 157 return rq; 158 } 159 160 /** 161 * drm_sched_entity_flush - Flush a context entity 162 * 163 * @entity: scheduler entity 164 * @timeout: time to wait in for Q to become empty in jiffies. 165 * 166 * Splitting drm_sched_entity_fini() into two functions, The first one does the 167 * waiting, removes the entity from the runqueue and returns an error when the 168 * process was killed. 169 * 170 * Returns the remaining time in jiffies left from the input timeout 171 */ 172 long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) 173 { 174 struct drm_gpu_scheduler *sched; 175 struct task_struct *last_user; 176 long ret = timeout; 177 178 if (!entity->rq) 179 return 0; 180 181 sched = entity->rq->sched; 182 /** 183 * The client will not queue more IBs during this fini, consume existing 184 * queued IBs or discard them on SIGKILL 185 */ 186 if (current->flags & PF_EXITING) { 187 if (timeout) 188 ret = wait_event_timeout( 189 sched->job_scheduled, 190 drm_sched_entity_is_idle(entity), 191 timeout); 192 } else { 193 wait_event_killable(sched->job_scheduled, 194 drm_sched_entity_is_idle(entity)); 195 } 196 197 /* For killed process disable any more IBs enqueue right now */ 198 last_user = cmpxchg(&entity->last_user, current->group_leader, NULL); 199 if ((!last_user || last_user == current->group_leader) && 200 (current->flags & PF_EXITING) && (current->exit_code == SIGKILL)) { 201 spin_lock(&entity->rq_lock); 202 entity->stopped = true; 203 drm_sched_rq_remove_entity(entity->rq, entity); 204 spin_unlock(&entity->rq_lock); 205 } 206 207 return ret; 208 } 209 EXPORT_SYMBOL(drm_sched_entity_flush); 210 211 /** 212 * drm_sched_entity_kill_jobs - helper for drm_sched_entity_kill_jobs 213 * 214 * @f: signaled fence 215 * @cb: our callback structure 216 * 217 * Signal the scheduler finished fence when the entity in question is killed. 218 */ 219 static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, 220 struct dma_fence_cb *cb) 221 { 222 struct drm_sched_job *job = container_of(cb, struct drm_sched_job, 223 finish_cb); 224 225 drm_sched_fence_finished(job->s_fence); 226 WARN_ON(job->s_fence->parent); 227 job->sched->ops->free_job(job); 228 } 229 230 /** 231 * drm_sched_entity_kill_jobs - Make sure all remaining jobs are killed 232 * 233 * @entity: entity which is cleaned up 234 * 235 * Makes sure that all remaining jobs in an entity are killed before it is 236 * destroyed. 237 */ 238 static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity) 239 { 240 struct drm_sched_job *job; 241 int r; 242 243 while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) { 244 struct drm_sched_fence *s_fence = job->s_fence; 245 246 drm_sched_fence_scheduled(s_fence); 247 dma_fence_set_error(&s_fence->finished, -ESRCH); 248 249 /* 250 * When pipe is hanged by older entity, new entity might 251 * not even have chance to submit it's first job to HW 252 * and so entity->last_scheduled will remain NULL 253 */ 254 if (!entity->last_scheduled) { 255 drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb); 256 continue; 257 } 258 259 r = dma_fence_add_callback(entity->last_scheduled, 260 &job->finish_cb, 261 drm_sched_entity_kill_jobs_cb); 262 if (r == -ENOENT) 263 drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb); 264 else if (r) 265 DRM_ERROR("fence add callback failed (%d)\n", r); 266 } 267 } 268 269 /** 270 * drm_sched_entity_cleanup - Destroy a context entity 271 * 272 * @entity: scheduler entity 273 * 274 * This should be called after @drm_sched_entity_do_release. It goes over the 275 * entity and signals all jobs with an error code if the process was killed. 276 * 277 */ 278 void drm_sched_entity_fini(struct drm_sched_entity *entity) 279 { 280 struct drm_gpu_scheduler *sched = NULL; 281 282 if (entity->rq) { 283 sched = entity->rq->sched; 284 drm_sched_rq_remove_entity(entity->rq, entity); 285 } 286 287 /* Consumption of existing IBs wasn't completed. Forcefully 288 * remove them here. 289 */ 290 if (spsc_queue_count(&entity->job_queue)) { 291 if (sched) { 292 /* 293 * Wait for thread to idle to make sure it isn't processing 294 * this entity. 295 */ 296 wait_for_completion(&entity->entity_idle); 297 298 } 299 if (entity->dependency) { 300 dma_fence_remove_callback(entity->dependency, 301 &entity->cb); 302 dma_fence_put(entity->dependency); 303 entity->dependency = NULL; 304 } 305 306 drm_sched_entity_kill_jobs(entity); 307 } 308 309 dma_fence_put(entity->last_scheduled); 310 entity->last_scheduled = NULL; 311 kfree(entity->rq_list); 312 } 313 EXPORT_SYMBOL(drm_sched_entity_fini); 314 315 /** 316 * drm_sched_entity_fini - Destroy a context entity 317 * 318 * @entity: scheduler entity 319 * 320 * Calls drm_sched_entity_do_release() and drm_sched_entity_cleanup() 321 */ 322 void drm_sched_entity_destroy(struct drm_sched_entity *entity) 323 { 324 drm_sched_entity_flush(entity, MAX_WAIT_SCHED_ENTITY_Q_EMPTY); 325 drm_sched_entity_fini(entity); 326 } 327 EXPORT_SYMBOL(drm_sched_entity_destroy); 328 329 /** 330 * drm_sched_entity_clear_dep - callback to clear the entities dependency 331 */ 332 static void drm_sched_entity_clear_dep(struct dma_fence *f, 333 struct dma_fence_cb *cb) 334 { 335 struct drm_sched_entity *entity = 336 container_of(cb, struct drm_sched_entity, cb); 337 338 entity->dependency = NULL; 339 dma_fence_put(f); 340 } 341 342 /** 343 * drm_sched_entity_clear_dep - callback to clear the entities dependency and 344 * wake up scheduler 345 */ 346 static void drm_sched_entity_wakeup(struct dma_fence *f, 347 struct dma_fence_cb *cb) 348 { 349 struct drm_sched_entity *entity = 350 container_of(cb, struct drm_sched_entity, cb); 351 352 drm_sched_entity_clear_dep(f, cb); 353 drm_sched_wakeup(entity->rq->sched); 354 } 355 356 /** 357 * drm_sched_entity_set_rq_priority - helper for drm_sched_entity_set_priority 358 */ 359 static void drm_sched_entity_set_rq_priority(struct drm_sched_rq **rq, 360 enum drm_sched_priority priority) 361 { 362 *rq = &(*rq)->sched->sched_rq[priority]; 363 } 364 365 /** 366 * drm_sched_entity_set_priority - Sets priority of the entity 367 * 368 * @entity: scheduler entity 369 * @priority: scheduler priority 370 * 371 * Update the priority of runqueus used for the entity. 372 */ 373 void drm_sched_entity_set_priority(struct drm_sched_entity *entity, 374 enum drm_sched_priority priority) 375 { 376 unsigned int i; 377 378 spin_lock(&entity->rq_lock); 379 380 for (i = 0; i < entity->num_rq_list; ++i) 381 drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority); 382 383 if (entity->rq) { 384 drm_sched_rq_remove_entity(entity->rq, entity); 385 drm_sched_entity_set_rq_priority(&entity->rq, priority); 386 drm_sched_rq_add_entity(entity->rq, entity); 387 } 388 389 spin_unlock(&entity->rq_lock); 390 } 391 EXPORT_SYMBOL(drm_sched_entity_set_priority); 392 393 /** 394 * drm_sched_entity_add_dependency_cb - add callback for the entities dependency 395 * 396 * @entity: entity with dependency 397 * 398 * Add a callback to the current dependency of the entity to wake up the 399 * scheduler when the entity becomes available. 400 */ 401 static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) 402 { 403 struct drm_gpu_scheduler *sched = entity->rq->sched; 404 struct dma_fence *fence = entity->dependency; 405 struct drm_sched_fence *s_fence; 406 407 if (fence->context == entity->fence_context || 408 fence->context == entity->fence_context + 1) { 409 /* 410 * Fence is a scheduled/finished fence from a job 411 * which belongs to the same entity, we can ignore 412 * fences from ourself 413 */ 414 dma_fence_put(entity->dependency); 415 return false; 416 } 417 418 s_fence = to_drm_sched_fence(fence); 419 if (s_fence && s_fence->sched == sched) { 420 421 /* 422 * Fence is from the same scheduler, only need to wait for 423 * it to be scheduled 424 */ 425 fence = dma_fence_get(&s_fence->scheduled); 426 dma_fence_put(entity->dependency); 427 entity->dependency = fence; 428 if (!dma_fence_add_callback(fence, &entity->cb, 429 drm_sched_entity_clear_dep)) 430 return true; 431 432 /* Ignore it when it is already scheduled */ 433 dma_fence_put(fence); 434 return false; 435 } 436 437 if (!dma_fence_add_callback(entity->dependency, &entity->cb, 438 drm_sched_entity_wakeup)) 439 return true; 440 441 dma_fence_put(entity->dependency); 442 return false; 443 } 444 445 /** 446 * drm_sched_entity_pop_job - get a ready to be scheduled job from the entity 447 * 448 * @entity: entity to get the job from 449 * 450 * Process all dependencies and try to get one job from the entities queue. 451 */ 452 struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) 453 { 454 struct drm_gpu_scheduler *sched = entity->rq->sched; 455 struct drm_sched_job *sched_job; 456 457 sched_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); 458 if (!sched_job) 459 return NULL; 460 461 while ((entity->dependency = 462 sched->ops->dependency(sched_job, entity))) { 463 trace_drm_sched_job_wait_dep(sched_job, entity->dependency); 464 465 if (drm_sched_entity_add_dependency_cb(entity)) 466 return NULL; 467 } 468 469 /* skip jobs from entity that marked guilty */ 470 if (entity->guilty && atomic_read(entity->guilty)) 471 dma_fence_set_error(&sched_job->s_fence->finished, -ECANCELED); 472 473 dma_fence_put(entity->last_scheduled); 474 entity->last_scheduled = dma_fence_get(&sched_job->s_fence->finished); 475 476 spsc_queue_pop(&entity->job_queue); 477 return sched_job; 478 } 479 480 /** 481 * drm_sched_entity_select_rq - select a new rq for the entity 482 * 483 * @entity: scheduler entity 484 * 485 * Check all prerequisites and select a new rq for the entity for load 486 * balancing. 487 */ 488 void drm_sched_entity_select_rq(struct drm_sched_entity *entity) 489 { 490 struct dma_fence *fence; 491 struct drm_sched_rq *rq; 492 493 if (spsc_queue_count(&entity->job_queue) || entity->num_rq_list <= 1) 494 return; 495 496 fence = READ_ONCE(entity->last_scheduled); 497 if (fence && !dma_fence_is_signaled(fence)) 498 return; 499 500 rq = drm_sched_entity_get_free_sched(entity); 501 if (rq == entity->rq) 502 return; 503 504 spin_lock(&entity->rq_lock); 505 drm_sched_rq_remove_entity(entity->rq, entity); 506 entity->rq = rq; 507 spin_unlock(&entity->rq_lock); 508 } 509 510 /** 511 * drm_sched_entity_push_job - Submit a job to the entity's job queue 512 * 513 * @sched_job: job to submit 514 * @entity: scheduler entity 515 * 516 * Note: To guarantee that the order of insertion to queue matches 517 * the job's fence sequence number this function should be 518 * called with drm_sched_job_init under common lock. 519 * 520 * Returns 0 for success, negative error code otherwise. 521 */ 522 void drm_sched_entity_push_job(struct drm_sched_job *sched_job, 523 struct drm_sched_entity *entity) 524 { 525 bool first; 526 527 trace_drm_sched_job(sched_job, entity); 528 atomic_inc(&entity->rq->sched->num_jobs); 529 WRITE_ONCE(entity->last_user, current->group_leader); 530 first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node); 531 532 /* first job wakes up scheduler */ 533 if (first) { 534 /* Add the entity to the run queue */ 535 spin_lock(&entity->rq_lock); 536 if (entity->stopped) { 537 spin_unlock(&entity->rq_lock); 538 539 DRM_ERROR("Trying to push to a killed entity\n"); 540 return; 541 } 542 drm_sched_rq_add_entity(entity->rq, entity); 543 spin_unlock(&entity->rq_lock); 544 drm_sched_wakeup(entity->rq->sched); 545 } 546 } 547 EXPORT_SYMBOL(drm_sched_entity_push_job); 548