1 // SPDX-License-Identifier: GPL-2.0+ 2 /* Copyright (C) 2018 Broadcom */ 3 4 /** 5 * DOC: Broadcom V3D scheduling 6 * 7 * The shared DRM GPU scheduler is used to coordinate submitting jobs 8 * to the hardware. Each DRM fd (roughly a client process) gets its 9 * own scheduler entity, which will process jobs in order. The GPU 10 * scheduler will round-robin between clients to submit the next job. 11 * 12 * For simplicity, and in order to keep latency low for interactive 13 * jobs when bulk background jobs are queued up, we submit a new job 14 * to the HW only when it has completed the last one, instead of 15 * filling up the CT[01]Q FIFOs with jobs. Similarly, we use 16 * drm_sched_job_add_dependency() to manage the dependency between bin and 17 * render, instead of having the clients submit jobs using the HW's 18 * semaphores to interlock between them. 19 */ 20 21 #include <linux/sched/clock.h> 22 #include <linux/kthread.h> 23 24 #include <drm/drm_syncobj.h> 25 26 #include "v3d_drv.h" 27 #include "v3d_regs.h" 28 #include "v3d_trace.h" 29 30 #define V3D_CSD_CFG012_WG_COUNT_SHIFT 16 31 32 static struct v3d_job * 33 to_v3d_job(struct drm_sched_job *sched_job) 34 { 35 return container_of(sched_job, struct v3d_job, base); 36 } 37 38 static struct v3d_bin_job * 39 to_bin_job(struct drm_sched_job *sched_job) 40 { 41 return container_of(sched_job, struct v3d_bin_job, base.base); 42 } 43 44 static struct v3d_render_job * 45 to_render_job(struct drm_sched_job *sched_job) 46 { 47 return container_of(sched_job, struct v3d_render_job, base.base); 48 } 49 50 static struct v3d_tfu_job * 51 to_tfu_job(struct drm_sched_job *sched_job) 52 { 53 return container_of(sched_job, struct v3d_tfu_job, base.base); 54 } 55 56 static struct v3d_csd_job * 57 to_csd_job(struct drm_sched_job *sched_job) 58 { 59 return container_of(sched_job, struct v3d_csd_job, base.base); 60 } 61 62 static struct v3d_cpu_job * 63 to_cpu_job(struct drm_sched_job *sched_job) 64 { 65 return container_of(sched_job, struct v3d_cpu_job, base.base); 66 } 67 68 static void 69 v3d_sched_job_free(struct drm_sched_job *sched_job) 70 { 71 struct v3d_job *job = to_v3d_job(sched_job); 72 73 v3d_job_cleanup(job); 74 } 75 76 static void 77 v3d_cpu_job_free(struct drm_sched_job *sched_job) 78 { 79 struct v3d_cpu_job *job = to_cpu_job(sched_job); 80 struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query; 81 struct v3d_performance_query_info *performance_query = &job->performance_query; 82 83 if (timestamp_query->queries) { 84 for (int i = 0; i < timestamp_query->count; i++) 85 drm_syncobj_put(timestamp_query->queries[i].syncobj); 86 kvfree(timestamp_query->queries); 87 } 88 89 if (performance_query->queries) { 90 for (int i = 0; i < performance_query->count; i++) 91 drm_syncobj_put(performance_query->queries[i].syncobj); 92 kvfree(performance_query->queries); 93 } 94 95 v3d_job_cleanup(&job->base); 96 } 97 98 static void 99 v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job) 100 { 101 if (job->perfmon != v3d->active_perfmon) 102 v3d_perfmon_stop(v3d, v3d->active_perfmon, true); 103 104 if (job->perfmon && v3d->active_perfmon != job->perfmon) 105 v3d_perfmon_start(v3d, job->perfmon); 106 } 107 108 static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) 109 { 110 struct v3d_bin_job *job = to_bin_job(sched_job); 111 struct v3d_dev *v3d = job->base.v3d; 112 struct v3d_file_priv *file = job->base.file->driver_priv; 113 struct drm_device *dev = &v3d->drm; 114 struct dma_fence *fence; 115 unsigned long irqflags; 116 117 if (unlikely(job->base.base.s_fence->finished.error)) 118 return NULL; 119 120 /* Lock required around bin_job update vs 121 * v3d_overflow_mem_work(). 122 */ 123 spin_lock_irqsave(&v3d->job_lock, irqflags); 124 v3d->bin_job = job; 125 /* Clear out the overflow allocation, so we don't 126 * reuse the overflow attached to a previous job. 127 */ 128 V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0); 129 spin_unlock_irqrestore(&v3d->job_lock, irqflags); 130 131 v3d_invalidate_caches(v3d); 132 133 fence = v3d_fence_create(v3d, V3D_BIN); 134 if (IS_ERR(fence)) 135 return NULL; 136 137 if (job->base.irq_fence) 138 dma_fence_put(job->base.irq_fence); 139 job->base.irq_fence = dma_fence_get(fence); 140 141 trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno, 142 job->start, job->end); 143 144 file->start_ns[V3D_BIN] = local_clock(); 145 v3d->queue[V3D_BIN].start_ns = file->start_ns[V3D_BIN]; 146 147 v3d_switch_perfmon(v3d, &job->base); 148 149 /* Set the current and end address of the control list. 150 * Writing the end register is what starts the job. 151 */ 152 if (job->qma) { 153 V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, job->qma); 154 V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, job->qms); 155 } 156 if (job->qts) { 157 V3D_CORE_WRITE(0, V3D_CLE_CT0QTS, 158 V3D_CLE_CT0QTS_ENABLE | 159 job->qts); 160 } 161 V3D_CORE_WRITE(0, V3D_CLE_CT0QBA, job->start); 162 V3D_CORE_WRITE(0, V3D_CLE_CT0QEA, job->end); 163 164 return fence; 165 } 166 167 static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job) 168 { 169 struct v3d_render_job *job = to_render_job(sched_job); 170 struct v3d_dev *v3d = job->base.v3d; 171 struct v3d_file_priv *file = job->base.file->driver_priv; 172 struct drm_device *dev = &v3d->drm; 173 struct dma_fence *fence; 174 175 if (unlikely(job->base.base.s_fence->finished.error)) 176 return NULL; 177 178 v3d->render_job = job; 179 180 /* Can we avoid this flush? We need to be careful of 181 * scheduling, though -- imagine job0 rendering to texture and 182 * job1 reading, and them being executed as bin0, bin1, 183 * render0, render1, so that render1's flush at bin time 184 * wasn't enough. 185 */ 186 v3d_invalidate_caches(v3d); 187 188 fence = v3d_fence_create(v3d, V3D_RENDER); 189 if (IS_ERR(fence)) 190 return NULL; 191 192 if (job->base.irq_fence) 193 dma_fence_put(job->base.irq_fence); 194 job->base.irq_fence = dma_fence_get(fence); 195 196 trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno, 197 job->start, job->end); 198 199 file->start_ns[V3D_RENDER] = local_clock(); 200 v3d->queue[V3D_RENDER].start_ns = file->start_ns[V3D_RENDER]; 201 202 v3d_switch_perfmon(v3d, &job->base); 203 204 /* XXX: Set the QCFG */ 205 206 /* Set the current and end address of the control list. 207 * Writing the end register is what starts the job. 208 */ 209 V3D_CORE_WRITE(0, V3D_CLE_CT1QBA, job->start); 210 V3D_CORE_WRITE(0, V3D_CLE_CT1QEA, job->end); 211 212 return fence; 213 } 214 215 static struct dma_fence * 216 v3d_tfu_job_run(struct drm_sched_job *sched_job) 217 { 218 struct v3d_tfu_job *job = to_tfu_job(sched_job); 219 struct v3d_dev *v3d = job->base.v3d; 220 struct v3d_file_priv *file = job->base.file->driver_priv; 221 struct drm_device *dev = &v3d->drm; 222 struct dma_fence *fence; 223 224 fence = v3d_fence_create(v3d, V3D_TFU); 225 if (IS_ERR(fence)) 226 return NULL; 227 228 v3d->tfu_job = job; 229 if (job->base.irq_fence) 230 dma_fence_put(job->base.irq_fence); 231 job->base.irq_fence = dma_fence_get(fence); 232 233 trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno); 234 235 file->start_ns[V3D_TFU] = local_clock(); 236 v3d->queue[V3D_TFU].start_ns = file->start_ns[V3D_TFU]; 237 238 V3D_WRITE(V3D_TFU_IIA(v3d->ver), job->args.iia); 239 V3D_WRITE(V3D_TFU_IIS(v3d->ver), job->args.iis); 240 V3D_WRITE(V3D_TFU_ICA(v3d->ver), job->args.ica); 241 V3D_WRITE(V3D_TFU_IUA(v3d->ver), job->args.iua); 242 V3D_WRITE(V3D_TFU_IOA(v3d->ver), job->args.ioa); 243 if (v3d->ver >= 71) 244 V3D_WRITE(V3D_V7_TFU_IOC, job->args.v71.ioc); 245 V3D_WRITE(V3D_TFU_IOS(v3d->ver), job->args.ios); 246 V3D_WRITE(V3D_TFU_COEF0(v3d->ver), job->args.coef[0]); 247 if (v3d->ver >= 71 || (job->args.coef[0] & V3D_TFU_COEF0_USECOEF)) { 248 V3D_WRITE(V3D_TFU_COEF1(v3d->ver), job->args.coef[1]); 249 V3D_WRITE(V3D_TFU_COEF2(v3d->ver), job->args.coef[2]); 250 V3D_WRITE(V3D_TFU_COEF3(v3d->ver), job->args.coef[3]); 251 } 252 /* ICFG kicks off the job. */ 253 V3D_WRITE(V3D_TFU_ICFG(v3d->ver), job->args.icfg | V3D_TFU_ICFG_IOC); 254 255 return fence; 256 } 257 258 static struct dma_fence * 259 v3d_csd_job_run(struct drm_sched_job *sched_job) 260 { 261 struct v3d_csd_job *job = to_csd_job(sched_job); 262 struct v3d_dev *v3d = job->base.v3d; 263 struct v3d_file_priv *file = job->base.file->driver_priv; 264 struct drm_device *dev = &v3d->drm; 265 struct dma_fence *fence; 266 int i, csd_cfg0_reg, csd_cfg_reg_count; 267 268 v3d->csd_job = job; 269 270 v3d_invalidate_caches(v3d); 271 272 fence = v3d_fence_create(v3d, V3D_CSD); 273 if (IS_ERR(fence)) 274 return NULL; 275 276 if (job->base.irq_fence) 277 dma_fence_put(job->base.irq_fence); 278 job->base.irq_fence = dma_fence_get(fence); 279 280 trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno); 281 282 file->start_ns[V3D_CSD] = local_clock(); 283 v3d->queue[V3D_CSD].start_ns = file->start_ns[V3D_CSD]; 284 285 v3d_switch_perfmon(v3d, &job->base); 286 287 csd_cfg0_reg = V3D_CSD_QUEUED_CFG0(v3d->ver); 288 csd_cfg_reg_count = v3d->ver < 71 ? 6 : 7; 289 for (i = 1; i <= csd_cfg_reg_count; i++) 290 V3D_CORE_WRITE(0, csd_cfg0_reg + 4 * i, job->args.cfg[i]); 291 /* CFG0 write kicks off the job. */ 292 V3D_CORE_WRITE(0, csd_cfg0_reg, job->args.cfg[0]); 293 294 return fence; 295 } 296 297 static void 298 v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job) 299 { 300 struct v3d_indirect_csd_info *indirect_csd = &job->indirect_csd; 301 struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); 302 struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect); 303 struct drm_v3d_submit_csd *args = &indirect_csd->job->args; 304 u32 *wg_counts; 305 306 v3d_get_bo_vaddr(bo); 307 v3d_get_bo_vaddr(indirect); 308 309 wg_counts = (uint32_t *)(bo->vaddr + indirect_csd->offset); 310 311 if (wg_counts[0] == 0 || wg_counts[1] == 0 || wg_counts[2] == 0) 312 return; 313 314 args->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 315 args->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 316 args->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 317 args->cfg[4] = DIV_ROUND_UP(indirect_csd->wg_size, 16) * 318 (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1; 319 320 for (int i = 0; i < 3; i++) { 321 /* 0xffffffff indicates that the uniform rewrite is not needed */ 322 if (indirect_csd->wg_uniform_offsets[i] != 0xffffffff) { 323 u32 uniform_idx = indirect_csd->wg_uniform_offsets[i]; 324 ((uint32_t *)indirect->vaddr)[uniform_idx] = wg_counts[i]; 325 } 326 } 327 328 v3d_put_bo_vaddr(indirect); 329 v3d_put_bo_vaddr(bo); 330 } 331 332 static void 333 v3d_timestamp_query(struct v3d_cpu_job *job) 334 { 335 struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query; 336 struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); 337 u8 *value_addr; 338 339 v3d_get_bo_vaddr(bo); 340 341 for (int i = 0; i < timestamp_query->count; i++) { 342 value_addr = ((u8 *)bo->vaddr) + timestamp_query->queries[i].offset; 343 *((u64 *)value_addr) = i == 0 ? ktime_get_ns() : 0ull; 344 345 drm_syncobj_replace_fence(timestamp_query->queries[i].syncobj, 346 job->base.done_fence); 347 } 348 349 v3d_put_bo_vaddr(bo); 350 } 351 352 static void 353 v3d_reset_timestamp_queries(struct v3d_cpu_job *job) 354 { 355 struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query; 356 struct v3d_timestamp_query *queries = timestamp_query->queries; 357 struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); 358 u8 *value_addr; 359 360 v3d_get_bo_vaddr(bo); 361 362 for (int i = 0; i < timestamp_query->count; i++) { 363 value_addr = ((u8 *)bo->vaddr) + queries[i].offset; 364 *((u64 *)value_addr) = 0; 365 366 drm_syncobj_replace_fence(queries[i].syncobj, NULL); 367 } 368 369 v3d_put_bo_vaddr(bo); 370 } 371 372 static void 373 write_to_buffer(void *dst, u32 idx, bool do_64bit, u64 value) 374 { 375 if (do_64bit) { 376 u64 *dst64 = (u64 *)dst; 377 378 dst64[idx] = value; 379 } else { 380 u32 *dst32 = (u32 *)dst; 381 382 dst32[idx] = (u32)value; 383 } 384 } 385 386 static void 387 v3d_copy_query_results(struct v3d_cpu_job *job) 388 { 389 struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query; 390 struct v3d_timestamp_query *queries = timestamp_query->queries; 391 struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); 392 struct v3d_bo *timestamp = to_v3d_bo(job->base.bo[1]); 393 struct v3d_copy_query_results_info *copy = &job->copy; 394 struct dma_fence *fence; 395 u8 *query_addr; 396 bool available, write_result; 397 u8 *data; 398 int i; 399 400 v3d_get_bo_vaddr(bo); 401 v3d_get_bo_vaddr(timestamp); 402 403 data = ((u8 *)bo->vaddr) + copy->offset; 404 405 for (i = 0; i < timestamp_query->count; i++) { 406 fence = drm_syncobj_fence_get(queries[i].syncobj); 407 available = fence ? dma_fence_is_signaled(fence) : false; 408 409 write_result = available || copy->do_partial; 410 if (write_result) { 411 query_addr = ((u8 *)timestamp->vaddr) + queries[i].offset; 412 write_to_buffer(data, 0, copy->do_64bit, *((u64 *)query_addr)); 413 } 414 415 if (copy->availability_bit) 416 write_to_buffer(data, 1, copy->do_64bit, available ? 1u : 0u); 417 418 data += copy->stride; 419 420 dma_fence_put(fence); 421 } 422 423 v3d_put_bo_vaddr(timestamp); 424 v3d_put_bo_vaddr(bo); 425 } 426 427 static void 428 v3d_reset_performance_queries(struct v3d_cpu_job *job) 429 { 430 struct v3d_performance_query_info *performance_query = &job->performance_query; 431 struct v3d_file_priv *v3d_priv = job->base.file->driver_priv; 432 struct v3d_dev *v3d = job->base.v3d; 433 struct v3d_perfmon *perfmon; 434 435 for (int i = 0; i < performance_query->count; i++) { 436 for (int j = 0; j < performance_query->nperfmons; j++) { 437 perfmon = v3d_perfmon_find(v3d_priv, 438 performance_query->queries[i].kperfmon_ids[j]); 439 if (!perfmon) { 440 DRM_DEBUG("Failed to find perfmon."); 441 continue; 442 } 443 444 v3d_perfmon_stop(v3d, perfmon, false); 445 446 memset(perfmon->values, 0, perfmon->ncounters * sizeof(u64)); 447 448 v3d_perfmon_put(perfmon); 449 } 450 451 drm_syncobj_replace_fence(performance_query->queries[i].syncobj, NULL); 452 } 453 } 454 455 static void 456 v3d_write_performance_query_result(struct v3d_cpu_job *job, void *data, u32 query) 457 { 458 struct v3d_performance_query_info *performance_query = &job->performance_query; 459 struct v3d_copy_query_results_info *copy = &job->copy; 460 struct v3d_file_priv *v3d_priv = job->base.file->driver_priv; 461 struct v3d_dev *v3d = job->base.v3d; 462 struct v3d_perfmon *perfmon; 463 u64 counter_values[V3D_PERFCNT_NUM]; 464 465 for (int i = 0; i < performance_query->nperfmons; i++) { 466 perfmon = v3d_perfmon_find(v3d_priv, 467 performance_query->queries[query].kperfmon_ids[i]); 468 if (!perfmon) { 469 DRM_DEBUG("Failed to find perfmon."); 470 continue; 471 } 472 473 v3d_perfmon_stop(v3d, perfmon, true); 474 475 memcpy(&counter_values[i * DRM_V3D_MAX_PERF_COUNTERS], perfmon->values, 476 perfmon->ncounters * sizeof(u64)); 477 478 v3d_perfmon_put(perfmon); 479 } 480 481 for (int i = 0; i < performance_query->ncounters; i++) 482 write_to_buffer(data, i, copy->do_64bit, counter_values[i]); 483 } 484 485 static void 486 v3d_copy_performance_query(struct v3d_cpu_job *job) 487 { 488 struct v3d_performance_query_info *performance_query = &job->performance_query; 489 struct v3d_copy_query_results_info *copy = &job->copy; 490 struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); 491 struct dma_fence *fence; 492 bool available, write_result; 493 u8 *data; 494 495 v3d_get_bo_vaddr(bo); 496 497 data = ((u8 *)bo->vaddr) + copy->offset; 498 499 for (int i = 0; i < performance_query->count; i++) { 500 fence = drm_syncobj_fence_get(performance_query->queries[i].syncobj); 501 available = fence ? dma_fence_is_signaled(fence) : false; 502 503 write_result = available || copy->do_partial; 504 if (write_result) 505 v3d_write_performance_query_result(job, data, i); 506 507 if (copy->availability_bit) 508 write_to_buffer(data, performance_query->ncounters, 509 copy->do_64bit, available ? 1u : 0u); 510 511 data += copy->stride; 512 513 dma_fence_put(fence); 514 } 515 516 v3d_put_bo_vaddr(bo); 517 } 518 519 static const v3d_cpu_job_fn cpu_job_function[] = { 520 [V3D_CPU_JOB_TYPE_INDIRECT_CSD] = v3d_rewrite_csd_job_wg_counts_from_indirect, 521 [V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = v3d_timestamp_query, 522 [V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = v3d_reset_timestamp_queries, 523 [V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = v3d_copy_query_results, 524 [V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = v3d_reset_performance_queries, 525 [V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = v3d_copy_performance_query, 526 }; 527 528 static struct dma_fence * 529 v3d_cpu_job_run(struct drm_sched_job *sched_job) 530 { 531 struct v3d_cpu_job *job = to_cpu_job(sched_job); 532 struct v3d_dev *v3d = job->base.v3d; 533 struct v3d_file_priv *file = job->base.file->driver_priv; 534 u64 runtime; 535 536 v3d->cpu_job = job; 537 538 if (job->job_type >= ARRAY_SIZE(cpu_job_function)) { 539 DRM_DEBUG_DRIVER("Unknown CPU job: %d\n", job->job_type); 540 return NULL; 541 } 542 543 file->start_ns[V3D_CPU] = local_clock(); 544 v3d->queue[V3D_CPU].start_ns = file->start_ns[V3D_CPU]; 545 546 trace_v3d_cpu_job_begin(&v3d->drm, job->job_type); 547 548 cpu_job_function[job->job_type](job); 549 550 trace_v3d_cpu_job_end(&v3d->drm, job->job_type); 551 552 runtime = local_clock() - file->start_ns[V3D_CPU]; 553 554 file->enabled_ns[V3D_CPU] += runtime; 555 v3d->queue[V3D_CPU].enabled_ns += runtime; 556 557 file->jobs_sent[V3D_CPU]++; 558 v3d->queue[V3D_CPU].jobs_sent++; 559 560 file->start_ns[V3D_CPU] = 0; 561 v3d->queue[V3D_CPU].start_ns = 0; 562 563 return NULL; 564 } 565 566 static struct dma_fence * 567 v3d_cache_clean_job_run(struct drm_sched_job *sched_job) 568 { 569 struct v3d_job *job = to_v3d_job(sched_job); 570 struct v3d_dev *v3d = job->v3d; 571 struct v3d_file_priv *file = job->file->driver_priv; 572 u64 runtime; 573 574 file->start_ns[V3D_CACHE_CLEAN] = local_clock(); 575 v3d->queue[V3D_CACHE_CLEAN].start_ns = file->start_ns[V3D_CACHE_CLEAN]; 576 577 v3d_clean_caches(v3d); 578 579 runtime = local_clock() - file->start_ns[V3D_CACHE_CLEAN]; 580 581 file->enabled_ns[V3D_CACHE_CLEAN] += runtime; 582 v3d->queue[V3D_CACHE_CLEAN].enabled_ns += runtime; 583 584 file->jobs_sent[V3D_CACHE_CLEAN]++; 585 v3d->queue[V3D_CACHE_CLEAN].jobs_sent++; 586 587 file->start_ns[V3D_CACHE_CLEAN] = 0; 588 v3d->queue[V3D_CACHE_CLEAN].start_ns = 0; 589 590 return NULL; 591 } 592 593 static enum drm_gpu_sched_stat 594 v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) 595 { 596 enum v3d_queue q; 597 598 mutex_lock(&v3d->reset_lock); 599 600 /* block scheduler */ 601 for (q = 0; q < V3D_MAX_QUEUES; q++) 602 drm_sched_stop(&v3d->queue[q].sched, sched_job); 603 604 if (sched_job) 605 drm_sched_increase_karma(sched_job); 606 607 /* get the GPU back into the init state */ 608 v3d_reset(v3d); 609 610 for (q = 0; q < V3D_MAX_QUEUES; q++) 611 drm_sched_resubmit_jobs(&v3d->queue[q].sched); 612 613 /* Unblock schedulers and restart their jobs. */ 614 for (q = 0; q < V3D_MAX_QUEUES; q++) { 615 drm_sched_start(&v3d->queue[q].sched, true); 616 } 617 618 mutex_unlock(&v3d->reset_lock); 619 620 return DRM_GPU_SCHED_STAT_NOMINAL; 621 } 622 623 /* If the current address or return address have changed, then the GPU 624 * has probably made progress and we should delay the reset. This 625 * could fail if the GPU got in an infinite loop in the CL, but that 626 * is pretty unlikely outside of an i-g-t testcase. 627 */ 628 static enum drm_gpu_sched_stat 629 v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, 630 u32 *timedout_ctca, u32 *timedout_ctra) 631 { 632 struct v3d_job *job = to_v3d_job(sched_job); 633 struct v3d_dev *v3d = job->v3d; 634 u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q)); 635 u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q)); 636 637 if (*timedout_ctca != ctca || *timedout_ctra != ctra) { 638 *timedout_ctca = ctca; 639 *timedout_ctra = ctra; 640 return DRM_GPU_SCHED_STAT_NOMINAL; 641 } 642 643 return v3d_gpu_reset_for_timeout(v3d, sched_job); 644 } 645 646 static enum drm_gpu_sched_stat 647 v3d_bin_job_timedout(struct drm_sched_job *sched_job) 648 { 649 struct v3d_bin_job *job = to_bin_job(sched_job); 650 651 return v3d_cl_job_timedout(sched_job, V3D_BIN, 652 &job->timedout_ctca, &job->timedout_ctra); 653 } 654 655 static enum drm_gpu_sched_stat 656 v3d_render_job_timedout(struct drm_sched_job *sched_job) 657 { 658 struct v3d_render_job *job = to_render_job(sched_job); 659 660 return v3d_cl_job_timedout(sched_job, V3D_RENDER, 661 &job->timedout_ctca, &job->timedout_ctra); 662 } 663 664 static enum drm_gpu_sched_stat 665 v3d_generic_job_timedout(struct drm_sched_job *sched_job) 666 { 667 struct v3d_job *job = to_v3d_job(sched_job); 668 669 return v3d_gpu_reset_for_timeout(job->v3d, sched_job); 670 } 671 672 static enum drm_gpu_sched_stat 673 v3d_csd_job_timedout(struct drm_sched_job *sched_job) 674 { 675 struct v3d_csd_job *job = to_csd_job(sched_job); 676 struct v3d_dev *v3d = job->base.v3d; 677 u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4(v3d->ver)); 678 679 /* If we've made progress, skip reset and let the timer get 680 * rearmed. 681 */ 682 if (job->timedout_batches != batches) { 683 job->timedout_batches = batches; 684 return DRM_GPU_SCHED_STAT_NOMINAL; 685 } 686 687 return v3d_gpu_reset_for_timeout(v3d, sched_job); 688 } 689 690 static const struct drm_sched_backend_ops v3d_bin_sched_ops = { 691 .run_job = v3d_bin_job_run, 692 .timedout_job = v3d_bin_job_timedout, 693 .free_job = v3d_sched_job_free, 694 }; 695 696 static const struct drm_sched_backend_ops v3d_render_sched_ops = { 697 .run_job = v3d_render_job_run, 698 .timedout_job = v3d_render_job_timedout, 699 .free_job = v3d_sched_job_free, 700 }; 701 702 static const struct drm_sched_backend_ops v3d_tfu_sched_ops = { 703 .run_job = v3d_tfu_job_run, 704 .timedout_job = v3d_generic_job_timedout, 705 .free_job = v3d_sched_job_free, 706 }; 707 708 static const struct drm_sched_backend_ops v3d_csd_sched_ops = { 709 .run_job = v3d_csd_job_run, 710 .timedout_job = v3d_csd_job_timedout, 711 .free_job = v3d_sched_job_free 712 }; 713 714 static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = { 715 .run_job = v3d_cache_clean_job_run, 716 .timedout_job = v3d_generic_job_timedout, 717 .free_job = v3d_sched_job_free 718 }; 719 720 static const struct drm_sched_backend_ops v3d_cpu_sched_ops = { 721 .run_job = v3d_cpu_job_run, 722 .timedout_job = v3d_generic_job_timedout, 723 .free_job = v3d_cpu_job_free 724 }; 725 726 int 727 v3d_sched_init(struct v3d_dev *v3d) 728 { 729 int hw_jobs_limit = 1; 730 int job_hang_limit = 0; 731 int hang_limit_ms = 500; 732 int ret; 733 734 ret = drm_sched_init(&v3d->queue[V3D_BIN].sched, 735 &v3d_bin_sched_ops, NULL, 736 DRM_SCHED_PRIORITY_COUNT, 737 hw_jobs_limit, job_hang_limit, 738 msecs_to_jiffies(hang_limit_ms), NULL, 739 NULL, "v3d_bin", v3d->drm.dev); 740 if (ret) 741 return ret; 742 743 ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched, 744 &v3d_render_sched_ops, NULL, 745 DRM_SCHED_PRIORITY_COUNT, 746 hw_jobs_limit, job_hang_limit, 747 msecs_to_jiffies(hang_limit_ms), NULL, 748 NULL, "v3d_render", v3d->drm.dev); 749 if (ret) 750 goto fail; 751 752 ret = drm_sched_init(&v3d->queue[V3D_TFU].sched, 753 &v3d_tfu_sched_ops, NULL, 754 DRM_SCHED_PRIORITY_COUNT, 755 hw_jobs_limit, job_hang_limit, 756 msecs_to_jiffies(hang_limit_ms), NULL, 757 NULL, "v3d_tfu", v3d->drm.dev); 758 if (ret) 759 goto fail; 760 761 if (v3d_has_csd(v3d)) { 762 ret = drm_sched_init(&v3d->queue[V3D_CSD].sched, 763 &v3d_csd_sched_ops, NULL, 764 DRM_SCHED_PRIORITY_COUNT, 765 hw_jobs_limit, job_hang_limit, 766 msecs_to_jiffies(hang_limit_ms), NULL, 767 NULL, "v3d_csd", v3d->drm.dev); 768 if (ret) 769 goto fail; 770 771 ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched, 772 &v3d_cache_clean_sched_ops, NULL, 773 DRM_SCHED_PRIORITY_COUNT, 774 hw_jobs_limit, job_hang_limit, 775 msecs_to_jiffies(hang_limit_ms), NULL, 776 NULL, "v3d_cache_clean", v3d->drm.dev); 777 if (ret) 778 goto fail; 779 } 780 781 ret = drm_sched_init(&v3d->queue[V3D_CPU].sched, 782 &v3d_cpu_sched_ops, NULL, 783 DRM_SCHED_PRIORITY_COUNT, 784 1, job_hang_limit, 785 msecs_to_jiffies(hang_limit_ms), NULL, 786 NULL, "v3d_cpu", v3d->drm.dev); 787 if (ret) 788 goto fail; 789 790 return 0; 791 792 fail: 793 v3d_sched_fini(v3d); 794 return ret; 795 } 796 797 void 798 v3d_sched_fini(struct v3d_dev *v3d) 799 { 800 enum v3d_queue q; 801 802 for (q = 0; q < V3D_MAX_QUEUES; q++) { 803 if (v3d->queue[q].sched.ready) 804 drm_sched_fini(&v3d->queue[q].sched); 805 } 806 } 807