1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/firmware.h> 27 #include <linux/pm_runtime.h> 28 29 #include "amdgpu.h" 30 #include "amdgpu_gfx.h" 31 #include "amdgpu_rlc.h" 32 #include "amdgpu_ras.h" 33 #include "amdgpu_reset.h" 34 #include "amdgpu_xcp.h" 35 #include "amdgpu_xgmi.h" 36 37 /* delay 0.1 second to enable gfx off feature */ 38 #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) 39 40 #define GFX_OFF_NO_DELAY 0 41 42 /* 43 * GPU GFX IP block helpers function. 44 */ 45 46 int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec, 47 int pipe, int queue) 48 { 49 int bit = 0; 50 51 bit += mec * adev->gfx.mec.num_pipe_per_mec 52 * adev->gfx.mec.num_queue_per_pipe; 53 bit += pipe * adev->gfx.mec.num_queue_per_pipe; 54 bit += queue; 55 56 return bit; 57 } 58 59 void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit, 60 int *mec, int *pipe, int *queue) 61 { 62 *queue = bit % adev->gfx.mec.num_queue_per_pipe; 63 *pipe = (bit / adev->gfx.mec.num_queue_per_pipe) 64 % adev->gfx.mec.num_pipe_per_mec; 65 *mec = (bit / adev->gfx.mec.num_queue_per_pipe) 66 / adev->gfx.mec.num_pipe_per_mec; 67 68 } 69 70 bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, 71 int xcc_id, int mec, int pipe, int queue) 72 { 73 return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue), 74 adev->gfx.mec_bitmap[xcc_id].queue_bitmap); 75 } 76 77 static int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, 78 int me, int pipe, int queue) 79 { 80 int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ 81 int bit = 0; 82 83 bit += me * adev->gfx.me.num_pipe_per_me 84 * num_queue_per_pipe; 85 bit += pipe * num_queue_per_pipe; 86 bit += queue; 87 88 return bit; 89 } 90 91 bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, 92 int me, int pipe, int queue) 93 { 94 return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue), 95 adev->gfx.me.queue_bitmap); 96 } 97 98 /** 99 * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter 100 * 101 * @mask: array in which the per-shader array disable masks will be stored 102 * @max_se: number of SEs 103 * @max_sh: number of SHs 104 * 105 * The bitmask of CUs to be disabled in the shader array determined by se and 106 * sh is stored in mask[se * max_sh + sh]. 107 */ 108 void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh) 109 { 110 unsigned int se, sh, cu; 111 const char *p; 112 113 memset(mask, 0, sizeof(*mask) * max_se * max_sh); 114 115 if (!amdgpu_disable_cu || !*amdgpu_disable_cu) 116 return; 117 118 p = amdgpu_disable_cu; 119 for (;;) { 120 char *next; 121 int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu); 122 123 if (ret < 3) { 124 DRM_ERROR("amdgpu: could not parse disable_cu\n"); 125 return; 126 } 127 128 if (se < max_se && sh < max_sh && cu < 16) { 129 DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu); 130 mask[se * max_sh + sh] |= 1u << cu; 131 } else { 132 DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n", 133 se, sh, cu); 134 } 135 136 next = strchr(p, ','); 137 if (!next) 138 break; 139 p = next + 1; 140 } 141 } 142 143 static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev) 144 { 145 return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1; 146 } 147 148 static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev) 149 { 150 if (amdgpu_compute_multipipe != -1) { 151 DRM_INFO("amdgpu: forcing compute pipe policy %d\n", 152 amdgpu_compute_multipipe); 153 return amdgpu_compute_multipipe == 1; 154 } 155 156 if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0)) 157 return true; 158 159 /* FIXME: spreading the queues across pipes causes perf regressions 160 * on POLARIS11 compute workloads */ 161 if (adev->asic_type == CHIP_POLARIS11) 162 return false; 163 164 return adev->gfx.mec.num_mec > 1; 165 } 166 167 bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev, 168 struct amdgpu_ring *ring) 169 { 170 int queue = ring->queue; 171 int pipe = ring->pipe; 172 173 /* Policy: use pipe1 queue0 as high priority graphics queue if we 174 * have more than one gfx pipe. 175 */ 176 if (amdgpu_gfx_is_graphics_multipipe_capable(adev) && 177 adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) { 178 int me = ring->me; 179 int bit; 180 181 bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue); 182 if (ring == &adev->gfx.gfx_ring[bit]) 183 return true; 184 } 185 186 return false; 187 } 188 189 bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, 190 struct amdgpu_ring *ring) 191 { 192 /* Policy: use 1st queue as high priority compute queue if we 193 * have more than one compute queue. 194 */ 195 if (adev->gfx.num_compute_rings > 1 && 196 ring == &adev->gfx.compute_ring[0]) 197 return true; 198 199 return false; 200 } 201 202 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) 203 { 204 int i, j, queue, pipe; 205 bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev); 206 int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec * 207 adev->gfx.mec.num_queue_per_pipe, 208 adev->gfx.num_compute_rings); 209 int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; 210 211 if (multipipe_policy) { 212 /* policy: make queues evenly cross all pipes on MEC1 only 213 * for multiple xcc, just use the original policy for simplicity */ 214 for (j = 0; j < num_xcc; j++) { 215 for (i = 0; i < max_queues_per_mec; i++) { 216 pipe = i % adev->gfx.mec.num_pipe_per_mec; 217 queue = (i / adev->gfx.mec.num_pipe_per_mec) % 218 adev->gfx.mec.num_queue_per_pipe; 219 220 set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue, 221 adev->gfx.mec_bitmap[j].queue_bitmap); 222 } 223 } 224 } else { 225 /* policy: amdgpu owns all queues in the given pipe */ 226 for (j = 0; j < num_xcc; j++) { 227 for (i = 0; i < max_queues_per_mec; ++i) 228 set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap); 229 } 230 } 231 232 for (j = 0; j < num_xcc; j++) { 233 dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", 234 bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)); 235 } 236 } 237 238 void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) 239 { 240 int i, queue, pipe; 241 bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev); 242 int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ 243 int max_queues_per_me = adev->gfx.me.num_pipe_per_me * num_queue_per_pipe; 244 245 if (multipipe_policy) { 246 /* policy: amdgpu owns the first queue per pipe at this stage 247 * will extend to mulitple queues per pipe later */ 248 for (i = 0; i < max_queues_per_me; i++) { 249 pipe = i % adev->gfx.me.num_pipe_per_me; 250 queue = (i / adev->gfx.me.num_pipe_per_me) % 251 num_queue_per_pipe; 252 253 set_bit(pipe * num_queue_per_pipe + queue, 254 adev->gfx.me.queue_bitmap); 255 } 256 } else { 257 for (i = 0; i < max_queues_per_me; ++i) 258 set_bit(i, adev->gfx.me.queue_bitmap); 259 } 260 261 /* update the number of active graphics rings */ 262 adev->gfx.num_gfx_rings = 263 bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); 264 } 265 266 static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, 267 struct amdgpu_ring *ring, int xcc_id) 268 { 269 int queue_bit; 270 int mec, pipe, queue; 271 272 queue_bit = adev->gfx.mec.num_mec 273 * adev->gfx.mec.num_pipe_per_mec 274 * adev->gfx.mec.num_queue_per_pipe; 275 276 while (--queue_bit >= 0) { 277 if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap)) 278 continue; 279 280 amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); 281 282 /* 283 * 1. Using pipes 2/3 from MEC 2 seems cause problems. 284 * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN 285 * only can be issued on queue 0. 286 */ 287 if ((mec == 1 && pipe > 1) || queue != 0) 288 continue; 289 290 ring->me = mec + 1; 291 ring->pipe = pipe; 292 ring->queue = queue; 293 294 return 0; 295 } 296 297 dev_err(adev->dev, "Failed to find a queue for KIQ\n"); 298 return -EINVAL; 299 } 300 301 int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id) 302 { 303 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 304 struct amdgpu_irq_src *irq = &kiq->irq; 305 struct amdgpu_ring *ring = &kiq->ring; 306 int r = 0; 307 308 spin_lock_init(&kiq->ring_lock); 309 310 ring->adev = NULL; 311 ring->ring_obj = NULL; 312 ring->use_doorbell = true; 313 ring->xcc_id = xcc_id; 314 ring->vm_hub = AMDGPU_GFXHUB(xcc_id); 315 ring->doorbell_index = 316 (adev->doorbell_index.kiq + 317 xcc_id * adev->doorbell_index.xcc_doorbell_range) 318 << 1; 319 320 r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id); 321 if (r) 322 return r; 323 324 ring->eop_gpu_addr = kiq->eop_gpu_addr; 325 ring->no_scheduler = true; 326 snprintf(ring->name, sizeof(ring->name), "kiq_%hhu.%hhu.%hhu.%hhu", 327 (unsigned char)xcc_id, (unsigned char)ring->me, 328 (unsigned char)ring->pipe, (unsigned char)ring->queue); 329 r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0, 330 AMDGPU_RING_PRIO_DEFAULT, NULL); 331 if (r) 332 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); 333 334 return r; 335 } 336 337 void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) 338 { 339 amdgpu_ring_fini(ring); 340 } 341 342 void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id) 343 { 344 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 345 346 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); 347 } 348 349 int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, 350 unsigned int hpd_size, int xcc_id) 351 { 352 int r; 353 u32 *hpd; 354 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 355 356 r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE, 357 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, 358 &kiq->eop_gpu_addr, (void **)&hpd); 359 if (r) { 360 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); 361 return r; 362 } 363 364 memset(hpd, 0, hpd_size); 365 366 r = amdgpu_bo_reserve(kiq->eop_obj, true); 367 if (unlikely(r != 0)) 368 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); 369 amdgpu_bo_kunmap(kiq->eop_obj); 370 amdgpu_bo_unreserve(kiq->eop_obj); 371 372 return 0; 373 } 374 375 /* create MQD for each compute/gfx queue */ 376 int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, 377 unsigned int mqd_size, int xcc_id) 378 { 379 int r, i, j; 380 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 381 struct amdgpu_ring *ring = &kiq->ring; 382 u32 domain = AMDGPU_GEM_DOMAIN_GTT; 383 384 #if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64) 385 /* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */ 386 if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0)) 387 domain |= AMDGPU_GEM_DOMAIN_VRAM; 388 #endif 389 390 /* create MQD for KIQ */ 391 if (!adev->enable_mes_kiq && !ring->mqd_obj) { 392 /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must 393 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD 394 * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for 395 * KIQ MQD no matter SRIOV or Bare-metal 396 */ 397 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, 398 AMDGPU_GEM_DOMAIN_VRAM | 399 AMDGPU_GEM_DOMAIN_GTT, 400 &ring->mqd_obj, 401 &ring->mqd_gpu_addr, 402 &ring->mqd_ptr); 403 if (r) { 404 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); 405 return r; 406 } 407 408 /* prepare MQD backup */ 409 kiq->mqd_backup = kzalloc(mqd_size, GFP_KERNEL); 410 if (!kiq->mqd_backup) { 411 dev_warn(adev->dev, 412 "no memory to create MQD backup for ring %s\n", ring->name); 413 return -ENOMEM; 414 } 415 } 416 417 if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { 418 /* create MQD for each KGQ */ 419 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 420 ring = &adev->gfx.gfx_ring[i]; 421 if (!ring->mqd_obj) { 422 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, 423 domain, &ring->mqd_obj, 424 &ring->mqd_gpu_addr, &ring->mqd_ptr); 425 if (r) { 426 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); 427 return r; 428 } 429 430 ring->mqd_size = mqd_size; 431 /* prepare MQD backup */ 432 adev->gfx.me.mqd_backup[i] = kzalloc(mqd_size, GFP_KERNEL); 433 if (!adev->gfx.me.mqd_backup[i]) { 434 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); 435 return -ENOMEM; 436 } 437 } 438 } 439 } 440 441 /* create MQD for each KCQ */ 442 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 443 j = i + xcc_id * adev->gfx.num_compute_rings; 444 ring = &adev->gfx.compute_ring[j]; 445 if (!ring->mqd_obj) { 446 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, 447 domain, &ring->mqd_obj, 448 &ring->mqd_gpu_addr, &ring->mqd_ptr); 449 if (r) { 450 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); 451 return r; 452 } 453 454 ring->mqd_size = mqd_size; 455 /* prepare MQD backup */ 456 adev->gfx.mec.mqd_backup[j] = kzalloc(mqd_size, GFP_KERNEL); 457 if (!adev->gfx.mec.mqd_backup[j]) { 458 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); 459 return -ENOMEM; 460 } 461 } 462 } 463 464 return 0; 465 } 466 467 void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id) 468 { 469 struct amdgpu_ring *ring = NULL; 470 int i, j; 471 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 472 473 if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { 474 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 475 ring = &adev->gfx.gfx_ring[i]; 476 kfree(adev->gfx.me.mqd_backup[i]); 477 amdgpu_bo_free_kernel(&ring->mqd_obj, 478 &ring->mqd_gpu_addr, 479 &ring->mqd_ptr); 480 } 481 } 482 483 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 484 j = i + xcc_id * adev->gfx.num_compute_rings; 485 ring = &adev->gfx.compute_ring[j]; 486 kfree(adev->gfx.mec.mqd_backup[j]); 487 amdgpu_bo_free_kernel(&ring->mqd_obj, 488 &ring->mqd_gpu_addr, 489 &ring->mqd_ptr); 490 } 491 492 ring = &kiq->ring; 493 kfree(kiq->mqd_backup); 494 amdgpu_bo_free_kernel(&ring->mqd_obj, 495 &ring->mqd_gpu_addr, 496 &ring->mqd_ptr); 497 } 498 499 int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) 500 { 501 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 502 struct amdgpu_ring *kiq_ring = &kiq->ring; 503 int i, r = 0; 504 int j; 505 506 if (adev->enable_mes) { 507 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 508 j = i + xcc_id * adev->gfx.num_compute_rings; 509 amdgpu_mes_unmap_legacy_queue(adev, 510 &adev->gfx.compute_ring[j], 511 RESET_QUEUES, 0, 0); 512 } 513 return 0; 514 } 515 516 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 517 return -EINVAL; 518 519 if (!kiq_ring->sched.ready || amdgpu_in_reset(adev)) 520 return 0; 521 522 spin_lock(&kiq->ring_lock); 523 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * 524 adev->gfx.num_compute_rings)) { 525 spin_unlock(&kiq->ring_lock); 526 return -ENOMEM; 527 } 528 529 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 530 j = i + xcc_id * adev->gfx.num_compute_rings; 531 kiq->pmf->kiq_unmap_queues(kiq_ring, 532 &adev->gfx.compute_ring[j], 533 RESET_QUEUES, 0, 0); 534 } 535 /* Submit unmap queue packet */ 536 amdgpu_ring_commit(kiq_ring); 537 /* 538 * Ring test will do a basic scratch register change check. Just run 539 * this to ensure that unmap queues that is submitted before got 540 * processed successfully before returning. 541 */ 542 r = amdgpu_ring_test_helper(kiq_ring); 543 544 spin_unlock(&kiq->ring_lock); 545 546 return r; 547 } 548 549 int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id) 550 { 551 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 552 struct amdgpu_ring *kiq_ring = &kiq->ring; 553 int i, r = 0; 554 int j; 555 556 if (adev->enable_mes) { 557 if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { 558 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 559 j = i + xcc_id * adev->gfx.num_gfx_rings; 560 amdgpu_mes_unmap_legacy_queue(adev, 561 &adev->gfx.gfx_ring[j], 562 PREEMPT_QUEUES, 0, 0); 563 } 564 } 565 return 0; 566 } 567 568 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 569 return -EINVAL; 570 571 if (!adev->gfx.kiq[0].ring.sched.ready || amdgpu_in_reset(adev)) 572 return 0; 573 574 if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { 575 spin_lock(&kiq->ring_lock); 576 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * 577 adev->gfx.num_gfx_rings)) { 578 spin_unlock(&kiq->ring_lock); 579 return -ENOMEM; 580 } 581 582 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 583 j = i + xcc_id * adev->gfx.num_gfx_rings; 584 kiq->pmf->kiq_unmap_queues(kiq_ring, 585 &adev->gfx.gfx_ring[j], 586 PREEMPT_QUEUES, 0, 0); 587 } 588 /* Submit unmap queue packet */ 589 amdgpu_ring_commit(kiq_ring); 590 591 /* 592 * Ring test will do a basic scratch register change check. 593 * Just run this to ensure that unmap queues that is submitted 594 * before got processed successfully before returning. 595 */ 596 r = amdgpu_ring_test_helper(kiq_ring); 597 spin_unlock(&kiq->ring_lock); 598 } 599 600 return r; 601 } 602 603 int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, 604 int queue_bit) 605 { 606 int mec, pipe, queue; 607 int set_resource_bit = 0; 608 609 amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); 610 611 set_resource_bit = mec * 4 * 8 + pipe * 8 + queue; 612 613 return set_resource_bit; 614 } 615 616 static int amdgpu_gfx_mes_enable_kcq(struct amdgpu_device *adev, int xcc_id) 617 { 618 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 619 struct amdgpu_ring *kiq_ring = &kiq->ring; 620 uint64_t queue_mask = ~0ULL; 621 int r, i, j; 622 623 amdgpu_device_flush_hdp(adev, NULL); 624 625 if (!adev->enable_uni_mes) { 626 spin_lock(&kiq->ring_lock); 627 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->set_resources_size); 628 if (r) { 629 dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r); 630 spin_unlock(&kiq->ring_lock); 631 return r; 632 } 633 634 kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); 635 r = amdgpu_ring_test_helper(kiq_ring); 636 spin_unlock(&kiq->ring_lock); 637 if (r) 638 dev_err(adev->dev, "KIQ failed to set resources\n"); 639 } 640 641 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 642 j = i + xcc_id * adev->gfx.num_compute_rings; 643 r = amdgpu_mes_map_legacy_queue(adev, 644 &adev->gfx.compute_ring[j]); 645 if (r) { 646 dev_err(adev->dev, "failed to map compute queue\n"); 647 return r; 648 } 649 } 650 651 return 0; 652 } 653 654 int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) 655 { 656 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 657 struct amdgpu_ring *kiq_ring = &kiq->ring; 658 uint64_t queue_mask = 0; 659 int r, i, j; 660 661 if (adev->mes.enable_legacy_queue_map) 662 return amdgpu_gfx_mes_enable_kcq(adev, xcc_id); 663 664 if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources) 665 return -EINVAL; 666 667 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 668 if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap)) 669 continue; 670 671 /* This situation may be hit in the future if a new HW 672 * generation exposes more than 64 queues. If so, the 673 * definition of queue_mask needs updating */ 674 if (WARN_ON(i > (sizeof(queue_mask)*8))) { 675 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 676 break; 677 } 678 679 queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i)); 680 } 681 682 amdgpu_device_flush_hdp(adev, NULL); 683 684 DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, 685 kiq_ring->queue); 686 687 spin_lock(&kiq->ring_lock); 688 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * 689 adev->gfx.num_compute_rings + 690 kiq->pmf->set_resources_size); 691 if (r) { 692 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 693 spin_unlock(&kiq->ring_lock); 694 return r; 695 } 696 697 kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); 698 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 699 j = i + xcc_id * adev->gfx.num_compute_rings; 700 kiq->pmf->kiq_map_queues(kiq_ring, 701 &adev->gfx.compute_ring[j]); 702 } 703 /* Submit map queue packet */ 704 amdgpu_ring_commit(kiq_ring); 705 /* 706 * Ring test will do a basic scratch register change check. Just run 707 * this to ensure that map queues that is submitted before got 708 * processed successfully before returning. 709 */ 710 r = amdgpu_ring_test_helper(kiq_ring); 711 spin_unlock(&kiq->ring_lock); 712 if (r) 713 DRM_ERROR("KCQ enable failed\n"); 714 715 return r; 716 } 717 718 int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) 719 { 720 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 721 struct amdgpu_ring *kiq_ring = &kiq->ring; 722 int r, i, j; 723 724 if (!kiq->pmf || !kiq->pmf->kiq_map_queues) 725 return -EINVAL; 726 727 amdgpu_device_flush_hdp(adev, NULL); 728 729 if (adev->mes.enable_legacy_queue_map) { 730 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 731 j = i + xcc_id * adev->gfx.num_gfx_rings; 732 r = amdgpu_mes_map_legacy_queue(adev, 733 &adev->gfx.gfx_ring[j]); 734 if (r) { 735 DRM_ERROR("failed to map gfx queue\n"); 736 return r; 737 } 738 } 739 740 return 0; 741 } 742 743 spin_lock(&kiq->ring_lock); 744 /* No need to map kcq on the slave */ 745 if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { 746 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * 747 adev->gfx.num_gfx_rings); 748 if (r) { 749 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 750 spin_unlock(&kiq->ring_lock); 751 return r; 752 } 753 754 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 755 j = i + xcc_id * adev->gfx.num_gfx_rings; 756 kiq->pmf->kiq_map_queues(kiq_ring, 757 &adev->gfx.gfx_ring[j]); 758 } 759 } 760 /* Submit map queue packet */ 761 amdgpu_ring_commit(kiq_ring); 762 /* 763 * Ring test will do a basic scratch register change check. Just run 764 * this to ensure that map queues that is submitted before got 765 * processed successfully before returning. 766 */ 767 r = amdgpu_ring_test_helper(kiq_ring); 768 spin_unlock(&kiq->ring_lock); 769 if (r) 770 DRM_ERROR("KGQ enable failed\n"); 771 772 return r; 773 } 774 775 static void amdgpu_gfx_do_off_ctrl(struct amdgpu_device *adev, bool enable, 776 bool no_delay) 777 { 778 unsigned long delay = GFX_OFF_DELAY_ENABLE; 779 780 if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) 781 return; 782 783 mutex_lock(&adev->gfx.gfx_off_mutex); 784 785 if (enable) { 786 /* If the count is already 0, it means there's an imbalance bug somewhere. 787 * Note that the bug may be in a different caller than the one which triggers the 788 * WARN_ON_ONCE. 789 */ 790 if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0)) 791 goto unlock; 792 793 adev->gfx.gfx_off_req_count--; 794 795 if (adev->gfx.gfx_off_req_count == 0 && 796 !adev->gfx.gfx_off_state) { 797 /* If going to s2idle, no need to wait */ 798 if (no_delay) { 799 if (!amdgpu_dpm_set_powergating_by_smu(adev, 800 AMD_IP_BLOCK_TYPE_GFX, true, 0)) 801 adev->gfx.gfx_off_state = true; 802 } else { 803 schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 804 delay); 805 } 806 } 807 } else { 808 if (adev->gfx.gfx_off_req_count == 0) { 809 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 810 811 if (adev->gfx.gfx_off_state && 812 !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false, 0)) { 813 adev->gfx.gfx_off_state = false; 814 815 if (adev->gfx.funcs->init_spm_golden) { 816 dev_dbg(adev->dev, 817 "GFXOFF is disabled, re-init SPM golden settings\n"); 818 amdgpu_gfx_init_spm_golden(adev); 819 } 820 } 821 } 822 823 adev->gfx.gfx_off_req_count++; 824 } 825 826 unlock: 827 mutex_unlock(&adev->gfx.gfx_off_mutex); 828 } 829 830 /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable 831 * 832 * @adev: amdgpu_device pointer 833 * @bool enable true: enable gfx off feature, false: disable gfx off feature 834 * 835 * 1. gfx off feature will be enabled by gfx ip after gfx cg pg enabled. 836 * 2. other client can send request to disable gfx off feature, the request should be honored. 837 * 3. other client can cancel their request of disable gfx off feature 838 * 4. other client should not send request to enable gfx off feature before disable gfx off feature. 839 * 840 * gfx off allow will be delayed by GFX_OFF_DELAY_ENABLE ms. 841 */ 842 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) 843 { 844 /* If going to s2idle, no need to wait */ 845 bool no_delay = adev->in_s0ix ? true : false; 846 847 amdgpu_gfx_do_off_ctrl(adev, enable, no_delay); 848 } 849 850 /* amdgpu_gfx_off_ctrl_immediate - Handle gfx off feature enable/disable 851 * 852 * @adev: amdgpu_device pointer 853 * @bool enable true: enable gfx off feature, false: disable gfx off feature 854 * 855 * 1. gfx off feature will be enabled by gfx ip after gfx cg pg enabled. 856 * 2. other client can send request to disable gfx off feature, the request should be honored. 857 * 3. other client can cancel their request of disable gfx off feature 858 * 4. other client should not send request to enable gfx off feature before disable gfx off feature. 859 * 860 * gfx off allow will be issued immediately. 861 */ 862 void amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device *adev, bool enable) 863 { 864 amdgpu_gfx_do_off_ctrl(adev, enable, true); 865 } 866 867 int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value) 868 { 869 int r = 0; 870 871 mutex_lock(&adev->gfx.gfx_off_mutex); 872 873 r = amdgpu_dpm_set_residency_gfxoff(adev, value); 874 875 mutex_unlock(&adev->gfx.gfx_off_mutex); 876 877 return r; 878 } 879 880 int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value) 881 { 882 int r = 0; 883 884 mutex_lock(&adev->gfx.gfx_off_mutex); 885 886 r = amdgpu_dpm_get_residency_gfxoff(adev, value); 887 888 mutex_unlock(&adev->gfx.gfx_off_mutex); 889 890 return r; 891 } 892 893 int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value) 894 { 895 int r = 0; 896 897 mutex_lock(&adev->gfx.gfx_off_mutex); 898 899 r = amdgpu_dpm_get_entrycount_gfxoff(adev, value); 900 901 mutex_unlock(&adev->gfx.gfx_off_mutex); 902 903 return r; 904 } 905 906 int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) 907 { 908 909 int r = 0; 910 911 mutex_lock(&adev->gfx.gfx_off_mutex); 912 913 r = amdgpu_dpm_get_status_gfxoff(adev, value); 914 915 mutex_unlock(&adev->gfx.gfx_off_mutex); 916 917 return r; 918 } 919 920 int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) 921 { 922 int r; 923 924 if (amdgpu_ras_is_supported(adev, ras_block->block)) { 925 if (!amdgpu_persistent_edc_harvesting_supported(adev)) { 926 r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); 927 if (r) 928 return r; 929 } 930 931 r = amdgpu_ras_block_late_init(adev, ras_block); 932 if (r) 933 return r; 934 935 if (amdgpu_sriov_vf(adev)) 936 return r; 937 938 if (adev->gfx.cp_ecc_error_irq.funcs) { 939 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 940 if (r) 941 goto late_fini; 942 } 943 } else { 944 amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); 945 } 946 947 return 0; 948 late_fini: 949 amdgpu_ras_block_late_fini(adev, ras_block); 950 return r; 951 } 952 953 int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev) 954 { 955 int err = 0; 956 struct amdgpu_gfx_ras *ras = NULL; 957 958 /* adev->gfx.ras is NULL, which means gfx does not 959 * support ras function, then do nothing here. 960 */ 961 if (!adev->gfx.ras) 962 return 0; 963 964 ras = adev->gfx.ras; 965 966 err = amdgpu_ras_register_ras_block(adev, &ras->ras_block); 967 if (err) { 968 dev_err(adev->dev, "Failed to register gfx ras block!\n"); 969 return err; 970 } 971 972 strcpy(ras->ras_block.ras_comm.name, "gfx"); 973 ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX; 974 ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; 975 adev->gfx.ras_if = &ras->ras_block.ras_comm; 976 977 /* If not define special ras_late_init function, use gfx default ras_late_init */ 978 if (!ras->ras_block.ras_late_init) 979 ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init; 980 981 /* If not defined special ras_cb function, use default ras_cb */ 982 if (!ras->ras_block.ras_cb) 983 ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb; 984 985 return 0; 986 } 987 988 int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev, 989 struct amdgpu_iv_entry *entry) 990 { 991 if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler) 992 return adev->gfx.ras->poison_consumption_handler(adev, entry); 993 994 return 0; 995 } 996 997 int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, 998 void *err_data, 999 struct amdgpu_iv_entry *entry) 1000 { 1001 /* TODO ue will trigger an interrupt. 1002 * 1003 * When “Full RAS” is enabled, the per-IP interrupt sources should 1004 * be disabled and the driver should only look for the aggregated 1005 * interrupt via sync flood 1006 */ 1007 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { 1008 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); 1009 if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops && 1010 adev->gfx.ras->ras_block.hw_ops->query_ras_error_count) 1011 adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data); 1012 amdgpu_ras_reset_gpu(adev); 1013 } 1014 return AMDGPU_RAS_SUCCESS; 1015 } 1016 1017 int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, 1018 struct amdgpu_irq_src *source, 1019 struct amdgpu_iv_entry *entry) 1020 { 1021 struct ras_common_if *ras_if = adev->gfx.ras_if; 1022 struct ras_dispatch_if ih_data = { 1023 .entry = entry, 1024 }; 1025 1026 if (!ras_if) 1027 return 0; 1028 1029 ih_data.head = *ras_if; 1030 1031 DRM_ERROR("CP ECC ERROR IRQ\n"); 1032 amdgpu_ras_interrupt_dispatch(adev, &ih_data); 1033 return 0; 1034 } 1035 1036 void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev, 1037 void *ras_error_status, 1038 void (*func)(struct amdgpu_device *adev, void *ras_error_status, 1039 int xcc_id)) 1040 { 1041 int i; 1042 int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; 1043 uint32_t xcc_mask = GENMASK(num_xcc - 1, 0); 1044 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 1045 1046 if (err_data) { 1047 err_data->ue_count = 0; 1048 err_data->ce_count = 0; 1049 } 1050 1051 for_each_inst(i, xcc_mask) 1052 func(adev, ras_error_status, i); 1053 } 1054 1055 uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id) 1056 { 1057 signed long r, cnt = 0; 1058 unsigned long flags; 1059 uint32_t seq, reg_val_offs = 0, value = 0; 1060 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 1061 struct amdgpu_ring *ring = &kiq->ring; 1062 1063 if (amdgpu_device_skip_hw_access(adev)) 1064 return 0; 1065 1066 if (adev->mes.ring[0].sched.ready) 1067 return amdgpu_mes_rreg(adev, reg); 1068 1069 BUG_ON(!ring->funcs->emit_rreg); 1070 1071 spin_lock_irqsave(&kiq->ring_lock, flags); 1072 if (amdgpu_device_wb_get(adev, ®_val_offs)) { 1073 pr_err("critical bug! too many kiq readers\n"); 1074 goto failed_unlock; 1075 } 1076 r = amdgpu_ring_alloc(ring, 32); 1077 if (r) 1078 goto failed_unlock; 1079 1080 amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); 1081 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); 1082 if (r) 1083 goto failed_undo; 1084 1085 amdgpu_ring_commit(ring); 1086 spin_unlock_irqrestore(&kiq->ring_lock, flags); 1087 1088 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 1089 1090 /* don't wait anymore for gpu reset case because this way may 1091 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg 1092 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will 1093 * never return if we keep waiting in virt_kiq_rreg, which cause 1094 * gpu_recover() hang there. 1095 * 1096 * also don't wait anymore for IRQ context 1097 * */ 1098 if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) 1099 goto failed_kiq_read; 1100 1101 might_sleep(); 1102 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 1103 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 1104 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 1105 } 1106 1107 if (cnt > MAX_KIQ_REG_TRY) 1108 goto failed_kiq_read; 1109 1110 mb(); 1111 value = adev->wb.wb[reg_val_offs]; 1112 amdgpu_device_wb_free(adev, reg_val_offs); 1113 return value; 1114 1115 failed_undo: 1116 amdgpu_ring_undo(ring); 1117 failed_unlock: 1118 spin_unlock_irqrestore(&kiq->ring_lock, flags); 1119 failed_kiq_read: 1120 if (reg_val_offs) 1121 amdgpu_device_wb_free(adev, reg_val_offs); 1122 dev_err(adev->dev, "failed to read reg:%x\n", reg); 1123 return ~0; 1124 } 1125 1126 void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id) 1127 { 1128 signed long r, cnt = 0; 1129 unsigned long flags; 1130 uint32_t seq; 1131 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 1132 struct amdgpu_ring *ring = &kiq->ring; 1133 1134 BUG_ON(!ring->funcs->emit_wreg); 1135 1136 if (amdgpu_device_skip_hw_access(adev)) 1137 return; 1138 1139 if (adev->mes.ring[0].sched.ready) { 1140 amdgpu_mes_wreg(adev, reg, v); 1141 return; 1142 } 1143 1144 spin_lock_irqsave(&kiq->ring_lock, flags); 1145 r = amdgpu_ring_alloc(ring, 32); 1146 if (r) 1147 goto failed_unlock; 1148 1149 amdgpu_ring_emit_wreg(ring, reg, v); 1150 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); 1151 if (r) 1152 goto failed_undo; 1153 1154 amdgpu_ring_commit(ring); 1155 spin_unlock_irqrestore(&kiq->ring_lock, flags); 1156 1157 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 1158 1159 /* don't wait anymore for gpu reset case because this way may 1160 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg 1161 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will 1162 * never return if we keep waiting in virt_kiq_rreg, which cause 1163 * gpu_recover() hang there. 1164 * 1165 * also don't wait anymore for IRQ context 1166 * */ 1167 if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) 1168 goto failed_kiq_write; 1169 1170 might_sleep(); 1171 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 1172 1173 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 1174 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 1175 } 1176 1177 if (cnt > MAX_KIQ_REG_TRY) 1178 goto failed_kiq_write; 1179 1180 return; 1181 1182 failed_undo: 1183 amdgpu_ring_undo(ring); 1184 failed_unlock: 1185 spin_unlock_irqrestore(&kiq->ring_lock, flags); 1186 failed_kiq_write: 1187 dev_err(adev->dev, "failed to write reg:%x\n", reg); 1188 } 1189 1190 int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev) 1191 { 1192 if (amdgpu_num_kcq == -1) { 1193 return 8; 1194 } else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) { 1195 dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n"); 1196 return 8; 1197 } 1198 return amdgpu_num_kcq; 1199 } 1200 1201 void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, 1202 uint32_t ucode_id) 1203 { 1204 const struct gfx_firmware_header_v1_0 *cp_hdr; 1205 const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0; 1206 struct amdgpu_firmware_info *info = NULL; 1207 const struct firmware *ucode_fw; 1208 unsigned int fw_size; 1209 1210 switch (ucode_id) { 1211 case AMDGPU_UCODE_ID_CP_PFP: 1212 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1213 adev->gfx.pfp_fw->data; 1214 adev->gfx.pfp_fw_version = 1215 le32_to_cpu(cp_hdr->header.ucode_version); 1216 adev->gfx.pfp_feature_version = 1217 le32_to_cpu(cp_hdr->ucode_feature_version); 1218 ucode_fw = adev->gfx.pfp_fw; 1219 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1220 break; 1221 case AMDGPU_UCODE_ID_CP_RS64_PFP: 1222 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) 1223 adev->gfx.pfp_fw->data; 1224 adev->gfx.pfp_fw_version = 1225 le32_to_cpu(cp_hdr_v2_0->header.ucode_version); 1226 adev->gfx.pfp_feature_version = 1227 le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); 1228 ucode_fw = adev->gfx.pfp_fw; 1229 fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes); 1230 break; 1231 case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK: 1232 case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK: 1233 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) 1234 adev->gfx.pfp_fw->data; 1235 ucode_fw = adev->gfx.pfp_fw; 1236 fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes); 1237 break; 1238 case AMDGPU_UCODE_ID_CP_ME: 1239 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1240 adev->gfx.me_fw->data; 1241 adev->gfx.me_fw_version = 1242 le32_to_cpu(cp_hdr->header.ucode_version); 1243 adev->gfx.me_feature_version = 1244 le32_to_cpu(cp_hdr->ucode_feature_version); 1245 ucode_fw = adev->gfx.me_fw; 1246 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1247 break; 1248 case AMDGPU_UCODE_ID_CP_RS64_ME: 1249 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) 1250 adev->gfx.me_fw->data; 1251 adev->gfx.me_fw_version = 1252 le32_to_cpu(cp_hdr_v2_0->header.ucode_version); 1253 adev->gfx.me_feature_version = 1254 le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); 1255 ucode_fw = adev->gfx.me_fw; 1256 fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes); 1257 break; 1258 case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK: 1259 case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK: 1260 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) 1261 adev->gfx.me_fw->data; 1262 ucode_fw = adev->gfx.me_fw; 1263 fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes); 1264 break; 1265 case AMDGPU_UCODE_ID_CP_CE: 1266 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1267 adev->gfx.ce_fw->data; 1268 adev->gfx.ce_fw_version = 1269 le32_to_cpu(cp_hdr->header.ucode_version); 1270 adev->gfx.ce_feature_version = 1271 le32_to_cpu(cp_hdr->ucode_feature_version); 1272 ucode_fw = adev->gfx.ce_fw; 1273 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1274 break; 1275 case AMDGPU_UCODE_ID_CP_MEC1: 1276 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1277 adev->gfx.mec_fw->data; 1278 adev->gfx.mec_fw_version = 1279 le32_to_cpu(cp_hdr->header.ucode_version); 1280 adev->gfx.mec_feature_version = 1281 le32_to_cpu(cp_hdr->ucode_feature_version); 1282 ucode_fw = adev->gfx.mec_fw; 1283 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - 1284 le32_to_cpu(cp_hdr->jt_size) * 4; 1285 break; 1286 case AMDGPU_UCODE_ID_CP_MEC1_JT: 1287 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1288 adev->gfx.mec_fw->data; 1289 ucode_fw = adev->gfx.mec_fw; 1290 fw_size = le32_to_cpu(cp_hdr->jt_size) * 4; 1291 break; 1292 case AMDGPU_UCODE_ID_CP_MEC2: 1293 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1294 adev->gfx.mec2_fw->data; 1295 adev->gfx.mec2_fw_version = 1296 le32_to_cpu(cp_hdr->header.ucode_version); 1297 adev->gfx.mec2_feature_version = 1298 le32_to_cpu(cp_hdr->ucode_feature_version); 1299 ucode_fw = adev->gfx.mec2_fw; 1300 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - 1301 le32_to_cpu(cp_hdr->jt_size) * 4; 1302 break; 1303 case AMDGPU_UCODE_ID_CP_MEC2_JT: 1304 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1305 adev->gfx.mec2_fw->data; 1306 ucode_fw = adev->gfx.mec2_fw; 1307 fw_size = le32_to_cpu(cp_hdr->jt_size) * 4; 1308 break; 1309 case AMDGPU_UCODE_ID_CP_RS64_MEC: 1310 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) 1311 adev->gfx.mec_fw->data; 1312 adev->gfx.mec_fw_version = 1313 le32_to_cpu(cp_hdr_v2_0->header.ucode_version); 1314 adev->gfx.mec_feature_version = 1315 le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); 1316 ucode_fw = adev->gfx.mec_fw; 1317 fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes); 1318 break; 1319 case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK: 1320 case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK: 1321 case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK: 1322 case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK: 1323 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) 1324 adev->gfx.mec_fw->data; 1325 ucode_fw = adev->gfx.mec_fw; 1326 fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes); 1327 break; 1328 default: 1329 dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id); 1330 return; 1331 } 1332 1333 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1334 info = &adev->firmware.ucode[ucode_id]; 1335 info->ucode_id = ucode_id; 1336 info->fw = ucode_fw; 1337 adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE); 1338 } 1339 } 1340 1341 bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id) 1342 { 1343 return !(xcc_id % (adev->gfx.num_xcc_per_xcp ? 1344 adev->gfx.num_xcc_per_xcp : 1)); 1345 } 1346 1347 static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev, 1348 struct device_attribute *addr, 1349 char *buf) 1350 { 1351 struct drm_device *ddev = dev_get_drvdata(dev); 1352 struct amdgpu_device *adev = drm_to_adev(ddev); 1353 int mode; 1354 1355 mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr, 1356 AMDGPU_XCP_FL_NONE); 1357 1358 return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode)); 1359 } 1360 1361 static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev, 1362 struct device_attribute *addr, 1363 const char *buf, size_t count) 1364 { 1365 struct drm_device *ddev = dev_get_drvdata(dev); 1366 struct amdgpu_device *adev = drm_to_adev(ddev); 1367 enum amdgpu_gfx_partition mode; 1368 int ret = 0, num_xcc; 1369 1370 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1371 if (num_xcc % 2 != 0) 1372 return -EINVAL; 1373 1374 if (!strncasecmp("SPX", buf, strlen("SPX"))) { 1375 mode = AMDGPU_SPX_PARTITION_MODE; 1376 } else if (!strncasecmp("DPX", buf, strlen("DPX"))) { 1377 /* 1378 * DPX mode needs AIDs to be in multiple of 2. 1379 * Each AID connects 2 XCCs. 1380 */ 1381 if (num_xcc%4) 1382 return -EINVAL; 1383 mode = AMDGPU_DPX_PARTITION_MODE; 1384 } else if (!strncasecmp("TPX", buf, strlen("TPX"))) { 1385 if (num_xcc != 6) 1386 return -EINVAL; 1387 mode = AMDGPU_TPX_PARTITION_MODE; 1388 } else if (!strncasecmp("QPX", buf, strlen("QPX"))) { 1389 if (num_xcc != 8) 1390 return -EINVAL; 1391 mode = AMDGPU_QPX_PARTITION_MODE; 1392 } else if (!strncasecmp("CPX", buf, strlen("CPX"))) { 1393 mode = AMDGPU_CPX_PARTITION_MODE; 1394 } else { 1395 return -EINVAL; 1396 } 1397 1398 ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode); 1399 1400 if (ret) 1401 return ret; 1402 1403 return count; 1404 } 1405 1406 static const char *xcp_desc[] = { 1407 [AMDGPU_SPX_PARTITION_MODE] = "SPX", 1408 [AMDGPU_DPX_PARTITION_MODE] = "DPX", 1409 [AMDGPU_TPX_PARTITION_MODE] = "TPX", 1410 [AMDGPU_QPX_PARTITION_MODE] = "QPX", 1411 [AMDGPU_CPX_PARTITION_MODE] = "CPX", 1412 }; 1413 1414 static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev, 1415 struct device_attribute *addr, 1416 char *buf) 1417 { 1418 struct drm_device *ddev = dev_get_drvdata(dev); 1419 struct amdgpu_device *adev = drm_to_adev(ddev); 1420 struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; 1421 int size = 0, mode; 1422 char *sep = ""; 1423 1424 if (!xcp_mgr || !xcp_mgr->avail_xcp_modes) 1425 return sysfs_emit(buf, "Not supported\n"); 1426 1427 for_each_inst(mode, xcp_mgr->avail_xcp_modes) { 1428 size += sysfs_emit_at(buf, size, "%s%s", sep, xcp_desc[mode]); 1429 sep = ", "; 1430 } 1431 1432 size += sysfs_emit_at(buf, size, "\n"); 1433 1434 return size; 1435 } 1436 1437 static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) 1438 { 1439 struct amdgpu_device *adev = ring->adev; 1440 struct drm_gpu_scheduler *sched = &ring->sched; 1441 struct drm_sched_entity entity; 1442 struct dma_fence *f; 1443 struct amdgpu_job *job; 1444 struct amdgpu_ib *ib; 1445 int i, r; 1446 1447 /* Initialize the scheduler entity */ 1448 r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL, 1449 &sched, 1, NULL); 1450 if (r) { 1451 dev_err(adev->dev, "Failed setting up GFX kernel entity.\n"); 1452 goto err; 1453 } 1454 1455 r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL, 1456 64, 0, 1457 &job); 1458 if (r) 1459 goto err; 1460 1461 job->enforce_isolation = true; 1462 1463 ib = &job->ibs[0]; 1464 for (i = 0; i <= ring->funcs->align_mask; ++i) 1465 ib->ptr[i] = ring->funcs->nop; 1466 ib->length_dw = ring->funcs->align_mask + 1; 1467 1468 f = amdgpu_job_submit(job); 1469 1470 r = dma_fence_wait(f, false); 1471 if (r) 1472 goto err; 1473 1474 dma_fence_put(f); 1475 1476 /* Clean up the scheduler entity */ 1477 drm_sched_entity_destroy(&entity); 1478 return 0; 1479 1480 err: 1481 return r; 1482 } 1483 1484 static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id) 1485 { 1486 int num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1487 struct amdgpu_ring *ring; 1488 int num_xcc_to_clear; 1489 int i, r, xcc_id; 1490 1491 if (adev->gfx.num_xcc_per_xcp) 1492 num_xcc_to_clear = adev->gfx.num_xcc_per_xcp; 1493 else 1494 num_xcc_to_clear = 1; 1495 1496 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1497 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 1498 ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings]; 1499 if ((ring->xcp_id == xcp_id) && ring->sched.ready) { 1500 r = amdgpu_gfx_run_cleaner_shader_job(ring); 1501 if (r) 1502 return r; 1503 num_xcc_to_clear--; 1504 break; 1505 } 1506 } 1507 } 1508 1509 if (num_xcc_to_clear) 1510 return -ENOENT; 1511 1512 return 0; 1513 } 1514 1515 /** 1516 * amdgpu_gfx_set_run_cleaner_shader - Execute the AMDGPU GFX Cleaner Shader 1517 * @dev: The device structure 1518 * @attr: The device attribute structure 1519 * @buf: The buffer containing the input data 1520 * @count: The size of the input data 1521 * 1522 * Provides the sysfs interface to manually run a cleaner shader, which is 1523 * used to clear the GPU state between different tasks. Writing a value to the 1524 * 'run_cleaner_shader' sysfs file triggers the cleaner shader execution. 1525 * The value written corresponds to the partition index on multi-partition 1526 * devices. On single-partition devices, the value should be '0'. 1527 * 1528 * The cleaner shader clears the Local Data Store (LDS) and General Purpose 1529 * Registers (GPRs) to ensure data isolation between GPU workloads. 1530 * 1531 * Return: The number of bytes written to the sysfs file. 1532 */ 1533 static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, 1534 struct device_attribute *attr, 1535 const char *buf, 1536 size_t count) 1537 { 1538 struct drm_device *ddev = dev_get_drvdata(dev); 1539 struct amdgpu_device *adev = drm_to_adev(ddev); 1540 int ret; 1541 long value; 1542 1543 if (amdgpu_in_reset(adev)) 1544 return -EPERM; 1545 if (adev->in_suspend && !adev->in_runpm) 1546 return -EPERM; 1547 1548 ret = kstrtol(buf, 0, &value); 1549 1550 if (ret) 1551 return -EINVAL; 1552 1553 if (value < 0) 1554 return -EINVAL; 1555 1556 if (adev->xcp_mgr) { 1557 if (value >= adev->xcp_mgr->num_xcps) 1558 return -EINVAL; 1559 } else { 1560 if (value > 1) 1561 return -EINVAL; 1562 } 1563 1564 ret = pm_runtime_get_sync(ddev->dev); 1565 if (ret < 0) { 1566 pm_runtime_put_autosuspend(ddev->dev); 1567 return ret; 1568 } 1569 1570 ret = amdgpu_gfx_run_cleaner_shader(adev, value); 1571 1572 pm_runtime_mark_last_busy(ddev->dev); 1573 pm_runtime_put_autosuspend(ddev->dev); 1574 1575 if (ret) 1576 return ret; 1577 1578 return count; 1579 } 1580 1581 /** 1582 * amdgpu_gfx_get_enforce_isolation - Query AMDGPU GFX Enforce Isolation Settings 1583 * @dev: The device structure 1584 * @attr: The device attribute structure 1585 * @buf: The buffer to store the output data 1586 * 1587 * Provides the sysfs read interface to get the current settings of the 'enforce_isolation' 1588 * feature for each GPU partition. Reading from the 'enforce_isolation' 1589 * sysfs file returns the isolation settings for all partitions, where '0' 1590 * indicates disabled and '1' indicates enabled. 1591 * 1592 * Return: The number of bytes read from the sysfs file. 1593 */ 1594 static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, 1595 struct device_attribute *attr, 1596 char *buf) 1597 { 1598 struct drm_device *ddev = dev_get_drvdata(dev); 1599 struct amdgpu_device *adev = drm_to_adev(ddev); 1600 int i; 1601 ssize_t size = 0; 1602 1603 if (adev->xcp_mgr) { 1604 for (i = 0; i < adev->xcp_mgr->num_xcps; i++) { 1605 size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]); 1606 if (i < (adev->xcp_mgr->num_xcps - 1)) 1607 size += sysfs_emit_at(buf, size, " "); 1608 } 1609 buf[size++] = '\n'; 1610 } else { 1611 size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]); 1612 } 1613 1614 return size; 1615 } 1616 1617 /** 1618 * amdgpu_gfx_set_enforce_isolation - Control AMDGPU GFX Enforce Isolation 1619 * @dev: The device structure 1620 * @attr: The device attribute structure 1621 * @buf: The buffer containing the input data 1622 * @count: The size of the input data 1623 * 1624 * This function allows control over the 'enforce_isolation' feature, which 1625 * serializes access to the graphics engine. Writing '1' or '0' to the 1626 * 'enforce_isolation' sysfs file enables or disables process isolation for 1627 * each partition. The input should specify the setting for all partitions. 1628 * 1629 * Return: The number of bytes written to the sysfs file. 1630 */ 1631 static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, 1632 struct device_attribute *attr, 1633 const char *buf, size_t count) 1634 { 1635 struct drm_device *ddev = dev_get_drvdata(dev); 1636 struct amdgpu_device *adev = drm_to_adev(ddev); 1637 long partition_values[MAX_XCP] = {0}; 1638 int ret, i, num_partitions; 1639 const char *input_buf = buf; 1640 1641 for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { 1642 ret = sscanf(input_buf, "%ld", &partition_values[i]); 1643 if (ret <= 0) 1644 break; 1645 1646 /* Move the pointer to the next value in the string */ 1647 input_buf = strchr(input_buf, ' '); 1648 if (input_buf) { 1649 input_buf++; 1650 } else { 1651 i++; 1652 break; 1653 } 1654 } 1655 num_partitions = i; 1656 1657 if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps) 1658 return -EINVAL; 1659 1660 if (!adev->xcp_mgr && num_partitions != 1) 1661 return -EINVAL; 1662 1663 for (i = 0; i < num_partitions; i++) { 1664 if (partition_values[i] != 0 && partition_values[i] != 1) 1665 return -EINVAL; 1666 } 1667 1668 mutex_lock(&adev->enforce_isolation_mutex); 1669 for (i = 0; i < num_partitions; i++) 1670 adev->enforce_isolation[i] = partition_values[i]; 1671 mutex_unlock(&adev->enforce_isolation_mutex); 1672 1673 amdgpu_mes_update_enforce_isolation(adev); 1674 1675 return count; 1676 } 1677 1678 static ssize_t amdgpu_gfx_get_gfx_reset_mask(struct device *dev, 1679 struct device_attribute *attr, 1680 char *buf) 1681 { 1682 struct drm_device *ddev = dev_get_drvdata(dev); 1683 struct amdgpu_device *adev = drm_to_adev(ddev); 1684 1685 if (!adev) 1686 return -ENODEV; 1687 1688 return amdgpu_show_reset_mask(buf, adev->gfx.gfx_supported_reset); 1689 } 1690 1691 static ssize_t amdgpu_gfx_get_compute_reset_mask(struct device *dev, 1692 struct device_attribute *attr, 1693 char *buf) 1694 { 1695 struct drm_device *ddev = dev_get_drvdata(dev); 1696 struct amdgpu_device *adev = drm_to_adev(ddev); 1697 1698 if (!adev) 1699 return -ENODEV; 1700 1701 return amdgpu_show_reset_mask(buf, adev->gfx.compute_supported_reset); 1702 } 1703 1704 static DEVICE_ATTR(run_cleaner_shader, 0200, 1705 NULL, amdgpu_gfx_set_run_cleaner_shader); 1706 1707 static DEVICE_ATTR(enforce_isolation, 0644, 1708 amdgpu_gfx_get_enforce_isolation, 1709 amdgpu_gfx_set_enforce_isolation); 1710 1711 static DEVICE_ATTR(current_compute_partition, 0644, 1712 amdgpu_gfx_get_current_compute_partition, 1713 amdgpu_gfx_set_compute_partition); 1714 1715 static DEVICE_ATTR(available_compute_partition, 0444, 1716 amdgpu_gfx_get_available_compute_partition, NULL); 1717 static DEVICE_ATTR(gfx_reset_mask, 0444, 1718 amdgpu_gfx_get_gfx_reset_mask, NULL); 1719 1720 static DEVICE_ATTR(compute_reset_mask, 0444, 1721 amdgpu_gfx_get_compute_reset_mask, NULL); 1722 1723 static int amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device *adev) 1724 { 1725 struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; 1726 bool xcp_switch_supported; 1727 int r; 1728 1729 if (!xcp_mgr) 1730 return 0; 1731 1732 xcp_switch_supported = 1733 (xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode); 1734 1735 if (!xcp_switch_supported) 1736 dev_attr_current_compute_partition.attr.mode &= 1737 ~(S_IWUSR | S_IWGRP | S_IWOTH); 1738 1739 r = device_create_file(adev->dev, &dev_attr_current_compute_partition); 1740 if (r) 1741 return r; 1742 1743 if (xcp_switch_supported) 1744 r = device_create_file(adev->dev, 1745 &dev_attr_available_compute_partition); 1746 1747 return r; 1748 } 1749 1750 static void amdgpu_gfx_sysfs_xcp_fini(struct amdgpu_device *adev) 1751 { 1752 struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; 1753 bool xcp_switch_supported; 1754 1755 if (!xcp_mgr) 1756 return; 1757 1758 xcp_switch_supported = 1759 (xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode); 1760 device_remove_file(adev->dev, &dev_attr_current_compute_partition); 1761 1762 if (xcp_switch_supported) 1763 device_remove_file(adev->dev, 1764 &dev_attr_available_compute_partition); 1765 } 1766 1767 static int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev) 1768 { 1769 int r; 1770 1771 r = device_create_file(adev->dev, &dev_attr_enforce_isolation); 1772 if (r) 1773 return r; 1774 if (adev->gfx.enable_cleaner_shader) 1775 r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader); 1776 1777 return r; 1778 } 1779 1780 static void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev) 1781 { 1782 device_remove_file(adev->dev, &dev_attr_enforce_isolation); 1783 if (adev->gfx.enable_cleaner_shader) 1784 device_remove_file(adev->dev, &dev_attr_run_cleaner_shader); 1785 } 1786 1787 static int amdgpu_gfx_sysfs_reset_mask_init(struct amdgpu_device *adev) 1788 { 1789 int r = 0; 1790 1791 if (!amdgpu_gpu_recovery) 1792 return r; 1793 1794 if (adev->gfx.num_gfx_rings) { 1795 r = device_create_file(adev->dev, &dev_attr_gfx_reset_mask); 1796 if (r) 1797 return r; 1798 } 1799 1800 if (adev->gfx.num_compute_rings) { 1801 r = device_create_file(adev->dev, &dev_attr_compute_reset_mask); 1802 if (r) 1803 return r; 1804 } 1805 1806 return r; 1807 } 1808 1809 static void amdgpu_gfx_sysfs_reset_mask_fini(struct amdgpu_device *adev) 1810 { 1811 if (!amdgpu_gpu_recovery) 1812 return; 1813 1814 if (adev->gfx.num_gfx_rings) 1815 device_remove_file(adev->dev, &dev_attr_gfx_reset_mask); 1816 1817 if (adev->gfx.num_compute_rings) 1818 device_remove_file(adev->dev, &dev_attr_compute_reset_mask); 1819 } 1820 1821 int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) 1822 { 1823 int r; 1824 1825 r = amdgpu_gfx_sysfs_xcp_init(adev); 1826 if (r) { 1827 dev_err(adev->dev, "failed to create xcp sysfs files"); 1828 return r; 1829 } 1830 1831 r = amdgpu_gfx_sysfs_isolation_shader_init(adev); 1832 if (r) 1833 dev_err(adev->dev, "failed to create isolation sysfs files"); 1834 1835 r = amdgpu_gfx_sysfs_reset_mask_init(adev); 1836 if (r) 1837 dev_err(adev->dev, "failed to create reset mask sysfs files"); 1838 1839 return r; 1840 } 1841 1842 void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) 1843 { 1844 if (adev->dev->kobj.sd) { 1845 amdgpu_gfx_sysfs_xcp_fini(adev); 1846 amdgpu_gfx_sysfs_isolation_shader_fini(adev); 1847 amdgpu_gfx_sysfs_reset_mask_fini(adev); 1848 } 1849 } 1850 1851 int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, 1852 unsigned int cleaner_shader_size) 1853 { 1854 if (!adev->gfx.enable_cleaner_shader) 1855 return -EOPNOTSUPP; 1856 1857 return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE, 1858 AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, 1859 &adev->gfx.cleaner_shader_obj, 1860 &adev->gfx.cleaner_shader_gpu_addr, 1861 (void **)&adev->gfx.cleaner_shader_cpu_ptr); 1862 } 1863 1864 void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev) 1865 { 1866 if (!adev->gfx.enable_cleaner_shader) 1867 return; 1868 1869 amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj, 1870 &adev->gfx.cleaner_shader_gpu_addr, 1871 (void **)&adev->gfx.cleaner_shader_cpu_ptr); 1872 } 1873 1874 void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, 1875 unsigned int cleaner_shader_size, 1876 const void *cleaner_shader_ptr) 1877 { 1878 if (!adev->gfx.enable_cleaner_shader) 1879 return; 1880 1881 if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr) 1882 memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr, 1883 cleaner_shader_size); 1884 } 1885 1886 /** 1887 * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver) 1888 * @adev: amdgpu_device pointer 1889 * @idx: Index of the scheduler to control 1890 * @enable: Whether to enable or disable the KFD scheduler 1891 * 1892 * This function is used to control the KFD (Kernel Fusion Driver) scheduler 1893 * from the KGD. It is part of the cleaner shader feature. This function plays 1894 * a key role in enforcing process isolation on the GPU. 1895 * 1896 * The function uses a reference count mechanism (kfd_sch_req_count) to keep 1897 * track of the number of requests to enable the KFD scheduler. When a request 1898 * to enable the KFD scheduler is made, the reference count is decremented. 1899 * When the reference count reaches zero, a delayed work is scheduled to 1900 * enforce isolation after a delay of GFX_SLICE_PERIOD. 1901 * 1902 * When a request to disable the KFD scheduler is made, the function first 1903 * checks if the reference count is zero. If it is, it cancels the delayed work 1904 * for enforcing isolation and checks if the KFD scheduler is active. If the 1905 * KFD scheduler is active, it sends a request to stop the KFD scheduler and 1906 * sets the KFD scheduler state to inactive. Then, it increments the reference 1907 * count. 1908 * 1909 * The function is synchronized using the kfd_sch_mutex to ensure that the KFD 1910 * scheduler state and reference count are updated atomically. 1911 * 1912 * Note: If the reference count is already zero when a request to enable the 1913 * KFD scheduler is made, it means there's an imbalance bug somewhere. The 1914 * function triggers a warning in this case. 1915 */ 1916 static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx, 1917 bool enable) 1918 { 1919 mutex_lock(&adev->gfx.kfd_sch_mutex); 1920 1921 if (enable) { 1922 /* If the count is already 0, it means there's an imbalance bug somewhere. 1923 * Note that the bug may be in a different caller than the one which triggers the 1924 * WARN_ON_ONCE. 1925 */ 1926 if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) { 1927 dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n"); 1928 goto unlock; 1929 } 1930 1931 adev->gfx.kfd_sch_req_count[idx]--; 1932 1933 if (adev->gfx.kfd_sch_req_count[idx] == 0 && 1934 adev->gfx.kfd_sch_inactive[idx]) { 1935 schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, 1936 msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx])); 1937 } 1938 } else { 1939 if (adev->gfx.kfd_sch_req_count[idx] == 0) { 1940 cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work); 1941 if (!adev->gfx.kfd_sch_inactive[idx]) { 1942 amdgpu_amdkfd_stop_sched(adev, idx); 1943 adev->gfx.kfd_sch_inactive[idx] = true; 1944 } 1945 } 1946 1947 adev->gfx.kfd_sch_req_count[idx]++; 1948 } 1949 1950 unlock: 1951 mutex_unlock(&adev->gfx.kfd_sch_mutex); 1952 } 1953 1954 /** 1955 * amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation 1956 * 1957 * @work: work_struct. 1958 * 1959 * This function is the work handler for enforcing shader isolation on AMD GPUs. 1960 * It counts the number of emitted fences for each GFX and compute ring. If there 1961 * are any fences, it schedules the `enforce_isolation_work` to be run after a 1962 * delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion 1963 * Driver (KFD) to resume the runqueue. The function is synchronized using the 1964 * `enforce_isolation_mutex`. 1965 */ 1966 void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) 1967 { 1968 struct amdgpu_isolation_work *isolation_work = 1969 container_of(work, struct amdgpu_isolation_work, work.work); 1970 struct amdgpu_device *adev = isolation_work->adev; 1971 u32 i, idx, fences = 0; 1972 1973 if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION) 1974 idx = 0; 1975 else 1976 idx = isolation_work->xcp_id; 1977 1978 if (idx >= MAX_XCP) 1979 return; 1980 1981 mutex_lock(&adev->enforce_isolation_mutex); 1982 for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) { 1983 if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id) 1984 fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]); 1985 } 1986 for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) { 1987 if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id) 1988 fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]); 1989 } 1990 if (fences) { 1991 /* we've already had our timeslice, so let's wrap this up */ 1992 schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, 1993 msecs_to_jiffies(1)); 1994 } else { 1995 /* Tell KFD to resume the runqueue */ 1996 if (adev->kfd.init_complete) { 1997 WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]); 1998 WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]); 1999 amdgpu_amdkfd_start_sched(adev, idx); 2000 adev->gfx.kfd_sch_inactive[idx] = false; 2001 } 2002 } 2003 mutex_unlock(&adev->enforce_isolation_mutex); 2004 } 2005 2006 /** 2007 * amdgpu_gfx_enforce_isolation_wait_for_kfd - Manage KFD wait period for process isolation 2008 * @adev: amdgpu_device pointer 2009 * @idx: Index of the GPU partition 2010 * 2011 * When kernel submissions come in, the jobs are given a time slice and once 2012 * that time slice is up, if there are KFD user queues active, kernel 2013 * submissions are blocked until KFD has had its time slice. Once the KFD time 2014 * slice is up, KFD user queues are preempted and kernel submissions are 2015 * unblocked and allowed to run again. 2016 */ 2017 static void 2018 amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev, 2019 u32 idx) 2020 { 2021 unsigned long cjiffies; 2022 bool wait = false; 2023 2024 mutex_lock(&adev->enforce_isolation_mutex); 2025 if (adev->enforce_isolation[idx]) { 2026 /* set the initial values if nothing is set */ 2027 if (!adev->gfx.enforce_isolation_jiffies[idx]) { 2028 adev->gfx.enforce_isolation_jiffies[idx] = jiffies; 2029 adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS; 2030 } 2031 /* Make sure KFD gets a chance to run */ 2032 if (amdgpu_amdkfd_compute_active(adev, idx)) { 2033 cjiffies = jiffies; 2034 if (time_after(cjiffies, adev->gfx.enforce_isolation_jiffies[idx])) { 2035 cjiffies -= adev->gfx.enforce_isolation_jiffies[idx]; 2036 if ((jiffies_to_msecs(cjiffies) >= GFX_SLICE_PERIOD_MS)) { 2037 /* if our time is up, let KGD work drain before scheduling more */ 2038 wait = true; 2039 /* reset the timer period */ 2040 adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS; 2041 } else { 2042 /* set the timer period to what's left in our time slice */ 2043 adev->gfx.enforce_isolation_time[idx] = 2044 GFX_SLICE_PERIOD_MS - jiffies_to_msecs(cjiffies); 2045 } 2046 } else { 2047 /* if jiffies wrap around we will just wait a little longer */ 2048 adev->gfx.enforce_isolation_jiffies[idx] = jiffies; 2049 } 2050 } else { 2051 /* if there is no KFD work, then set the full slice period */ 2052 adev->gfx.enforce_isolation_jiffies[idx] = jiffies; 2053 adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS; 2054 } 2055 } 2056 mutex_unlock(&adev->enforce_isolation_mutex); 2057 2058 if (wait) 2059 msleep(GFX_SLICE_PERIOD_MS); 2060 } 2061 2062 /** 2063 * amdgpu_gfx_enforce_isolation_ring_begin_use - Begin use of a ring with enforced isolation 2064 * @ring: Pointer to the amdgpu_ring structure 2065 * 2066 * Ring begin_use helper implementation for gfx which serializes access to the 2067 * gfx IP between kernel submission IOCTLs and KFD user queues when isolation 2068 * enforcement is enabled. The kernel submission IOCTLs and KFD user queues 2069 * each get a time slice when both are active. 2070 */ 2071 void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) 2072 { 2073 struct amdgpu_device *adev = ring->adev; 2074 u32 idx; 2075 bool sched_work = false; 2076 2077 if (!adev->gfx.enable_cleaner_shader) 2078 return; 2079 2080 if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION) 2081 idx = 0; 2082 else 2083 idx = ring->xcp_id; 2084 2085 if (idx >= MAX_XCP) 2086 return; 2087 2088 /* Don't submit more work until KFD has had some time */ 2089 amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx); 2090 2091 mutex_lock(&adev->enforce_isolation_mutex); 2092 if (adev->enforce_isolation[idx]) { 2093 if (adev->kfd.init_complete) 2094 sched_work = true; 2095 } 2096 mutex_unlock(&adev->enforce_isolation_mutex); 2097 2098 if (sched_work) 2099 amdgpu_gfx_kfd_sch_ctrl(adev, idx, false); 2100 } 2101 2102 /** 2103 * amdgpu_gfx_enforce_isolation_ring_end_use - End use of a ring with enforced isolation 2104 * @ring: Pointer to the amdgpu_ring structure 2105 * 2106 * Ring end_use helper implementation for gfx which serializes access to the 2107 * gfx IP between kernel submission IOCTLs and KFD user queues when isolation 2108 * enforcement is enabled. The kernel submission IOCTLs and KFD user queues 2109 * each get a time slice when both are active. 2110 */ 2111 void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) 2112 { 2113 struct amdgpu_device *adev = ring->adev; 2114 u32 idx; 2115 bool sched_work = false; 2116 2117 if (!adev->gfx.enable_cleaner_shader) 2118 return; 2119 2120 if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION) 2121 idx = 0; 2122 else 2123 idx = ring->xcp_id; 2124 2125 if (idx >= MAX_XCP) 2126 return; 2127 2128 mutex_lock(&adev->enforce_isolation_mutex); 2129 if (adev->enforce_isolation[idx]) { 2130 if (adev->kfd.init_complete) 2131 sched_work = true; 2132 } 2133 mutex_unlock(&adev->enforce_isolation_mutex); 2134 2135 if (sched_work) 2136 amdgpu_gfx_kfd_sch_ctrl(adev, idx, true); 2137 } 2138 2139 void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work) 2140 { 2141 struct amdgpu_device *adev = 2142 container_of(work, struct amdgpu_device, gfx.idle_work.work); 2143 enum PP_SMC_POWER_PROFILE profile; 2144 u32 i, fences = 0; 2145 int r; 2146 2147 if (adev->gfx.num_gfx_rings) 2148 profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D; 2149 else 2150 profile = PP_SMC_POWER_PROFILE_COMPUTE; 2151 2152 for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) 2153 fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]); 2154 for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) 2155 fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]); 2156 if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) { 2157 mutex_lock(&adev->gfx.workload_profile_mutex); 2158 if (adev->gfx.workload_profile_active) { 2159 r = amdgpu_dpm_switch_power_profile(adev, profile, false); 2160 if (r) 2161 dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r, 2162 profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ? 2163 "fullscreen 3D" : "compute"); 2164 adev->gfx.workload_profile_active = false; 2165 } 2166 mutex_unlock(&adev->gfx.workload_profile_mutex); 2167 } else { 2168 schedule_delayed_work(&adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT); 2169 } 2170 } 2171 2172 void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring) 2173 { 2174 struct amdgpu_device *adev = ring->adev; 2175 enum PP_SMC_POWER_PROFILE profile; 2176 int r; 2177 2178 if (adev->gfx.num_gfx_rings) 2179 profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D; 2180 else 2181 profile = PP_SMC_POWER_PROFILE_COMPUTE; 2182 2183 atomic_inc(&adev->gfx.total_submission_cnt); 2184 2185 cancel_delayed_work_sync(&adev->gfx.idle_work); 2186 2187 /* We can safely return early here because we've cancelled the 2188 * the delayed work so there is no one else to set it to false 2189 * and we don't care if someone else sets it to true. 2190 */ 2191 if (adev->gfx.workload_profile_active) 2192 return; 2193 2194 mutex_lock(&adev->gfx.workload_profile_mutex); 2195 if (!adev->gfx.workload_profile_active) { 2196 r = amdgpu_dpm_switch_power_profile(adev, profile, true); 2197 if (r) 2198 dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r, 2199 profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ? 2200 "fullscreen 3D" : "compute"); 2201 adev->gfx.workload_profile_active = true; 2202 } 2203 mutex_unlock(&adev->gfx.workload_profile_mutex); 2204 } 2205 2206 void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring) 2207 { 2208 atomic_dec(&ring->adev->gfx.total_submission_cnt); 2209 2210 schedule_delayed_work(&ring->adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT); 2211 } 2212 2213 /* 2214 * debugfs for to enable/disable gfx job submission to specific core. 2215 */ 2216 #if defined(CONFIG_DEBUG_FS) 2217 static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val) 2218 { 2219 struct amdgpu_device *adev = (struct amdgpu_device *)data; 2220 u32 i; 2221 u64 mask = 0; 2222 struct amdgpu_ring *ring; 2223 2224 if (!adev) 2225 return -ENODEV; 2226 2227 mask = (1ULL << adev->gfx.num_gfx_rings) - 1; 2228 if ((val & mask) == 0) 2229 return -EINVAL; 2230 2231 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 2232 ring = &adev->gfx.gfx_ring[i]; 2233 if (val & (1 << i)) 2234 ring->sched.ready = true; 2235 else 2236 ring->sched.ready = false; 2237 } 2238 /* publish sched.ready flag update effective immediately across smp */ 2239 smp_rmb(); 2240 return 0; 2241 } 2242 2243 static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val) 2244 { 2245 struct amdgpu_device *adev = (struct amdgpu_device *)data; 2246 u32 i; 2247 u64 mask = 0; 2248 struct amdgpu_ring *ring; 2249 2250 if (!adev) 2251 return -ENODEV; 2252 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 2253 ring = &adev->gfx.gfx_ring[i]; 2254 if (ring->sched.ready) 2255 mask |= 1ULL << i; 2256 } 2257 2258 *val = mask; 2259 return 0; 2260 } 2261 2262 DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gfx_sched_mask_fops, 2263 amdgpu_debugfs_gfx_sched_mask_get, 2264 amdgpu_debugfs_gfx_sched_mask_set, "%llx\n"); 2265 2266 #endif 2267 2268 void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev) 2269 { 2270 #if defined(CONFIG_DEBUG_FS) 2271 struct drm_minor *minor = adev_to_drm(adev)->primary; 2272 struct dentry *root = minor->debugfs_root; 2273 char name[32]; 2274 2275 if (!(adev->gfx.num_gfx_rings > 1)) 2276 return; 2277 sprintf(name, "amdgpu_gfx_sched_mask"); 2278 debugfs_create_file(name, 0600, root, adev, 2279 &amdgpu_debugfs_gfx_sched_mask_fops); 2280 #endif 2281 } 2282 2283 /* 2284 * debugfs for to enable/disable compute job submission to specific core. 2285 */ 2286 #if defined(CONFIG_DEBUG_FS) 2287 static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val) 2288 { 2289 struct amdgpu_device *adev = (struct amdgpu_device *)data; 2290 u32 i; 2291 u64 mask = 0; 2292 struct amdgpu_ring *ring; 2293 2294 if (!adev) 2295 return -ENODEV; 2296 2297 mask = (1ULL << adev->gfx.num_compute_rings) - 1; 2298 if ((val & mask) == 0) 2299 return -EINVAL; 2300 2301 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 2302 ring = &adev->gfx.compute_ring[i]; 2303 if (val & (1 << i)) 2304 ring->sched.ready = true; 2305 else 2306 ring->sched.ready = false; 2307 } 2308 2309 /* publish sched.ready flag update effective immediately across smp */ 2310 smp_rmb(); 2311 return 0; 2312 } 2313 2314 static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 *val) 2315 { 2316 struct amdgpu_device *adev = (struct amdgpu_device *)data; 2317 u32 i; 2318 u64 mask = 0; 2319 struct amdgpu_ring *ring; 2320 2321 if (!adev) 2322 return -ENODEV; 2323 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 2324 ring = &adev->gfx.compute_ring[i]; 2325 if (ring->sched.ready) 2326 mask |= 1ULL << i; 2327 } 2328 2329 *val = mask; 2330 return 0; 2331 } 2332 2333 DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_compute_sched_mask_fops, 2334 amdgpu_debugfs_compute_sched_mask_get, 2335 amdgpu_debugfs_compute_sched_mask_set, "%llx\n"); 2336 2337 #endif 2338 2339 void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev) 2340 { 2341 #if defined(CONFIG_DEBUG_FS) 2342 struct drm_minor *minor = adev_to_drm(adev)->primary; 2343 struct dentry *root = minor->debugfs_root; 2344 char name[32]; 2345 2346 if (!(adev->gfx.num_compute_rings > 1)) 2347 return; 2348 sprintf(name, "amdgpu_compute_sched_mask"); 2349 debugfs_create_file(name, 0600, root, adev, 2350 &amdgpu_debugfs_compute_sched_mask_fops); 2351 #endif 2352 } 2353