1 /* 2 * Copyright 2016-2024 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <linux/module.h> 29 #include <linux/dmi.h> 30 #include <linux/pci.h> 31 #include <linux/debugfs.h> 32 #include <drm/drm_drv.h> 33 34 #include "amdgpu.h" 35 #include "amdgpu_pm.h" 36 #include "amdgpu_vcn.h" 37 #include "soc15d.h" 38 39 /* Firmware Names */ 40 #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" 41 #define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin" 42 #define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin" 43 #define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin" 44 #define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin" 45 #define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin" 46 #define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin" 47 #define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin" 48 #define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin" 49 #define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin" 50 #define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin" 51 #define FIRMWARE_VANGOGH "amdgpu/vangogh_vcn.bin" 52 #define FIRMWARE_DIMGREY_CAVEFISH "amdgpu/dimgrey_cavefish_vcn.bin" 53 #define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin" 54 #define FIRMWARE_BEIGE_GOBY "amdgpu/beige_goby_vcn.bin" 55 #define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin" 56 #define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin" 57 #define FIRMWARE_VCN4_0_0 "amdgpu/vcn_4_0_0.bin" 58 #define FIRMWARE_VCN4_0_2 "amdgpu/vcn_4_0_2.bin" 59 #define FIRMWARE_VCN4_0_3 "amdgpu/vcn_4_0_3.bin" 60 #define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin" 61 #define FIRMWARE_VCN4_0_5 "amdgpu/vcn_4_0_5.bin" 62 #define FIRMWARE_VCN4_0_6 "amdgpu/vcn_4_0_6.bin" 63 #define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin" 64 #define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin" 65 #define FIRMWARE_VCN5_0_1 "amdgpu/vcn_5_0_1.bin" 66 #define FIRMWARE_VCN5_0_2 "amdgpu/vcn_5_0_2.bin" 67 #define FIRMWARE_VCN5_3_0 "amdgpu/vcn_5_3_0.bin" 68 69 MODULE_FIRMWARE(FIRMWARE_RAVEN); 70 MODULE_FIRMWARE(FIRMWARE_PICASSO); 71 MODULE_FIRMWARE(FIRMWARE_RAVEN2); 72 MODULE_FIRMWARE(FIRMWARE_ARCTURUS); 73 MODULE_FIRMWARE(FIRMWARE_RENOIR); 74 MODULE_FIRMWARE(FIRMWARE_GREEN_SARDINE); 75 MODULE_FIRMWARE(FIRMWARE_ALDEBARAN); 76 MODULE_FIRMWARE(FIRMWARE_NAVI10); 77 MODULE_FIRMWARE(FIRMWARE_NAVI14); 78 MODULE_FIRMWARE(FIRMWARE_NAVI12); 79 MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID); 80 MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER); 81 MODULE_FIRMWARE(FIRMWARE_VANGOGH); 82 MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH); 83 MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY); 84 MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP); 85 MODULE_FIRMWARE(FIRMWARE_VCN_3_1_2); 86 MODULE_FIRMWARE(FIRMWARE_VCN4_0_0); 87 MODULE_FIRMWARE(FIRMWARE_VCN4_0_2); 88 MODULE_FIRMWARE(FIRMWARE_VCN4_0_3); 89 MODULE_FIRMWARE(FIRMWARE_VCN4_0_4); 90 MODULE_FIRMWARE(FIRMWARE_VCN4_0_5); 91 MODULE_FIRMWARE(FIRMWARE_VCN4_0_6); 92 MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1); 93 MODULE_FIRMWARE(FIRMWARE_VCN5_0_0); 94 MODULE_FIRMWARE(FIRMWARE_VCN5_0_1); 95 MODULE_FIRMWARE(FIRMWARE_VCN5_0_2); 96 MODULE_FIRMWARE(FIRMWARE_VCN5_3_0); 97 98 static void amdgpu_vcn_idle_work_handler(struct work_struct *work); 99 static void amdgpu_vcn_reg_dump_fini(struct amdgpu_device *adev); 100 101 int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i) 102 { 103 char ucode_prefix[25]; 104 int r; 105 106 adev->vcn.inst[i].adev = adev; 107 adev->vcn.inst[i].inst = i; 108 amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix)); 109 110 if (i != 0 && adev->vcn.per_inst_fw) { 111 r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw, 112 AMDGPU_UCODE_REQUIRED, 113 "amdgpu/%s_%d.bin", ucode_prefix, i); 114 if (r) 115 amdgpu_ucode_release(&adev->vcn.inst[i].fw); 116 } else { 117 if (!adev->vcn.inst[0].fw) { 118 r = amdgpu_ucode_request(adev, &adev->vcn.inst[0].fw, 119 AMDGPU_UCODE_REQUIRED, 120 "amdgpu/%s.bin", ucode_prefix); 121 if (r) 122 amdgpu_ucode_release(&adev->vcn.inst[0].fw); 123 } else { 124 r = 0; 125 } 126 adev->vcn.inst[i].fw = adev->vcn.inst[0].fw; 127 } 128 129 return r; 130 } 131 132 int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i) 133 { 134 unsigned long bo_size; 135 const struct common_firmware_header *hdr; 136 unsigned char fw_check; 137 unsigned int fw_shared_size, log_offset; 138 int r; 139 140 mutex_init(&adev->vcn.inst[i].vcn1_jpeg1_workaround); 141 mutex_init(&adev->vcn.inst[i].vcn_pg_lock); 142 mutex_init(&adev->vcn.inst[i].engine_reset_mutex); 143 atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0); 144 INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, amdgpu_vcn_idle_work_handler); 145 atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0); 146 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 147 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 148 adev->vcn.inst[i].indirect_sram = true; 149 150 /* 151 * Some Steam Deck's BIOS versions are incompatible with the 152 * indirect SRAM mode, leading to amdgpu being unable to get 153 * properly probed (and even potentially crashing the kernel). 154 * Hence, check for these versions here - notice this is 155 * restricted to Vangogh (Deck's APU). 156 */ 157 if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(3, 0, 2)) { 158 const char *bios_ver = dmi_get_system_info(DMI_BIOS_VERSION); 159 160 if (bios_ver && (!strncmp("F7A0113", bios_ver, 7) || 161 !strncmp("F7A0114", bios_ver, 7))) { 162 adev->vcn.inst[i].indirect_sram = false; 163 dev_info(adev->dev, 164 "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver); 165 } 166 } 167 168 /* from vcn4 and above, only unified queue is used */ 169 adev->vcn.inst[i].using_unified_queue = 170 amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0); 171 172 hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data; 173 adev->vcn.inst[i].fw_version = le32_to_cpu(hdr->ucode_version); 174 adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); 175 176 /* Bit 20-23, it is encode major and non-zero for new naming convention. 177 * This field is part of version minor and DRM_DISABLED_FLAG in old naming 178 * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG 179 * is zero in old naming convention, this field is always zero so far. 180 * These four bits are used to tell which naming convention is present. 181 */ 182 fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf; 183 if (fw_check) { 184 unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev; 185 186 fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff; 187 enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff; 188 enc_major = fw_check; 189 dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf; 190 vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf; 191 dev_info(adev->dev, 192 "[VCN instance %d] Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n", 193 i, enc_major, enc_minor, dec_ver, vep, fw_rev); 194 } else { 195 unsigned int version_major, version_minor, family_id; 196 197 family_id = le32_to_cpu(hdr->ucode_version) & 0xff; 198 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; 199 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; 200 dev_info(adev->dev, "[VCN instance %d] Found VCN firmware Version: %u.%u Family ID: %u\n", 201 i, version_major, version_minor, family_id); 202 } 203 204 bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; 205 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 206 bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); 207 208 if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(5, 0, 0)) { 209 fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)); 210 log_offset = offsetof(struct amdgpu_vcn5_fw_shared, fw_log); 211 } else if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) { 212 fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)); 213 log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log); 214 } else { 215 fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); 216 log_offset = offsetof(struct amdgpu_fw_shared, fw_log); 217 } 218 219 bo_size += fw_shared_size; 220 221 if (amdgpu_vcnfw_log) 222 bo_size += AMDGPU_VCNFW_LOG_SIZE; 223 224 r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, 225 AMDGPU_GEM_DOMAIN_VRAM | 226 AMDGPU_GEM_DOMAIN_GTT, 227 &adev->vcn.inst[i].vcpu_bo, 228 &adev->vcn.inst[i].gpu_addr, 229 &adev->vcn.inst[i].cpu_addr); 230 if (r) { 231 dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); 232 return r; 233 } 234 235 adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr + 236 bo_size - fw_shared_size; 237 adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr + 238 bo_size - fw_shared_size; 239 240 adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size; 241 242 if (amdgpu_vcnfw_log) { 243 adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE; 244 adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE; 245 adev->vcn.inst[i].fw_shared.log_offset = log_offset; 246 } 247 248 if (adev->vcn.inst[i].indirect_sram) { 249 r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, 250 AMDGPU_GEM_DOMAIN_VRAM | 251 AMDGPU_GEM_DOMAIN_GTT, 252 &adev->vcn.inst[i].dpg_sram_bo, 253 &adev->vcn.inst[i].dpg_sram_gpu_addr, 254 &adev->vcn.inst[i].dpg_sram_cpu_addr); 255 if (r) { 256 dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r); 257 return r; 258 } 259 } 260 261 return 0; 262 } 263 264 void amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i) 265 { 266 int j; 267 268 if (adev->vcn.harvest_config & (1 << i)) 269 return; 270 271 amdgpu_bo_free_kernel( 272 &adev->vcn.inst[i].dpg_sram_bo, 273 &adev->vcn.inst[i].dpg_sram_gpu_addr, 274 (void **)&adev->vcn.inst[i].dpg_sram_cpu_addr); 275 276 kvfree(adev->vcn.inst[i].saved_bo); 277 278 amdgpu_bo_free_kernel(&adev->vcn.inst[i].vcpu_bo, 279 &adev->vcn.inst[i].gpu_addr, 280 (void **)&adev->vcn.inst[i].cpu_addr); 281 282 amdgpu_ring_fini(&adev->vcn.inst[i].ring_dec); 283 284 for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) 285 amdgpu_ring_fini(&adev->vcn.inst[i].ring_enc[j]); 286 287 if (adev->vcn.per_inst_fw) { 288 amdgpu_ucode_release(&adev->vcn.inst[i].fw); 289 } else { 290 amdgpu_ucode_release(&adev->vcn.inst[0].fw); 291 adev->vcn.inst[i].fw = NULL; 292 } 293 294 if (adev->vcn.reg_list) 295 amdgpu_vcn_reg_dump_fini(adev); 296 297 mutex_destroy(&adev->vcn.inst[i].vcn_pg_lock); 298 mutex_destroy(&adev->vcn.inst[i].vcn1_jpeg1_workaround); 299 } 300 301 bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) 302 { 303 bool ret = false; 304 int vcn_config = adev->vcn.inst[vcn_instance].vcn_config; 305 306 if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK)) 307 ret = true; 308 else if ((type == VCN_DECODE_RING) && (vcn_config & VCN_BLOCK_DECODE_DISABLE_MASK)) 309 ret = true; 310 else if ((type == VCN_UNIFIED_RING) && (vcn_config & VCN_BLOCK_QUEUE_DISABLE_MASK)) 311 ret = true; 312 313 return ret; 314 } 315 316 static int amdgpu_vcn_save_vcpu_bo_inst(struct amdgpu_device *adev, int i) 317 { 318 unsigned int size; 319 void *ptr; 320 int idx; 321 322 if (adev->vcn.harvest_config & (1 << i)) 323 return 0; 324 if (adev->vcn.inst[i].vcpu_bo == NULL) 325 return 0; 326 327 size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); 328 ptr = adev->vcn.inst[i].cpu_addr; 329 330 adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL); 331 if (!adev->vcn.inst[i].saved_bo) 332 return -ENOMEM; 333 334 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 335 memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); 336 drm_dev_exit(idx); 337 } 338 339 return 0; 340 } 341 342 int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev) 343 { 344 int ret, i; 345 346 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 347 ret = amdgpu_vcn_save_vcpu_bo_inst(adev, i); 348 if (ret) 349 return ret; 350 } 351 352 return 0; 353 } 354 355 int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i) 356 { 357 bool in_ras_intr = amdgpu_ras_intr_triggered(); 358 359 if (adev->vcn.harvest_config & (1 << i)) 360 return 0; 361 362 /* err_event_athub and dpc recovery will corrupt VCPU buffer, so we need to 363 * restore fw data and clear buffer in amdgpu_vcn_resume() */ 364 if (in_ras_intr || adev->pcie_reset_ctx.in_link_reset) 365 return 0; 366 367 return amdgpu_vcn_save_vcpu_bo_inst(adev, i); 368 } 369 370 int amdgpu_vcn_resume(struct amdgpu_device *adev, int i) 371 { 372 unsigned int size; 373 void *ptr; 374 int idx; 375 376 if (adev->vcn.harvest_config & (1 << i)) 377 return 0; 378 if (adev->vcn.inst[i].vcpu_bo == NULL) 379 return -EINVAL; 380 381 size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); 382 ptr = adev->vcn.inst[i].cpu_addr; 383 384 if (adev->vcn.inst[i].saved_bo != NULL) { 385 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 386 memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); 387 drm_dev_exit(idx); 388 } 389 kvfree(adev->vcn.inst[i].saved_bo); 390 adev->vcn.inst[i].saved_bo = NULL; 391 } else { 392 const struct common_firmware_header *hdr; 393 unsigned int offset; 394 395 hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data; 396 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 397 offset = le32_to_cpu(hdr->ucode_array_offset_bytes); 398 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 399 memcpy_toio(adev->vcn.inst[i].cpu_addr, 400 adev->vcn.inst[i].fw->data + offset, 401 le32_to_cpu(hdr->ucode_size_bytes)); 402 drm_dev_exit(idx); 403 } 404 size -= le32_to_cpu(hdr->ucode_size_bytes); 405 ptr += le32_to_cpu(hdr->ucode_size_bytes); 406 } 407 memset_io(ptr, 0, size); 408 } 409 410 return 0; 411 } 412 413 void amdgpu_vcn_get_profile(struct amdgpu_device *adev) 414 { 415 int r; 416 417 mutex_lock(&adev->vcn.workload_profile_mutex); 418 419 if (adev->vcn.workload_profile_active) { 420 mutex_unlock(&adev->vcn.workload_profile_mutex); 421 return; 422 } 423 r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, 424 true); 425 if (r) 426 dev_warn(adev->dev, 427 "(%d) failed to enable video power profile mode\n", r); 428 else 429 adev->vcn.workload_profile_active = true; 430 mutex_unlock(&adev->vcn.workload_profile_mutex); 431 } 432 433 void amdgpu_vcn_put_profile(struct amdgpu_device *adev) 434 { 435 bool pg = true; 436 int r, i; 437 438 mutex_lock(&adev->vcn.workload_profile_mutex); 439 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 440 if (adev->vcn.inst[i].cur_state != AMD_PG_STATE_GATE) { 441 pg = false; 442 break; 443 } 444 } 445 446 if (pg) { 447 r = amdgpu_dpm_switch_power_profile( 448 adev, PP_SMC_POWER_PROFILE_VIDEO, false); 449 if (r) 450 dev_warn( 451 adev->dev, 452 "(%d) failed to disable video power profile mode\n", 453 r); 454 else 455 adev->vcn.workload_profile_active = false; 456 } 457 458 mutex_unlock(&adev->vcn.workload_profile_mutex); 459 } 460 461 static void amdgpu_vcn_idle_work_handler(struct work_struct *work) 462 { 463 struct amdgpu_vcn_inst *vcn_inst = 464 container_of(work, struct amdgpu_vcn_inst, idle_work.work); 465 struct amdgpu_device *adev = vcn_inst->adev; 466 unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; 467 unsigned int i = vcn_inst->inst, j; 468 469 if (adev->vcn.harvest_config & (1 << i)) 470 return; 471 472 for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) 473 fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[j]); 474 475 /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ 476 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && 477 !adev->vcn.inst[i].using_unified_queue) { 478 struct dpg_pause_state new_state; 479 480 if (fence[i] || 481 unlikely(atomic_read(&vcn_inst->dpg_enc_submission_cnt))) 482 new_state.fw_based = VCN_DPG_STATE__PAUSE; 483 else 484 new_state.fw_based = VCN_DPG_STATE__UNPAUSE; 485 486 adev->vcn.inst[i].pause_dpg_mode(vcn_inst, &new_state); 487 } 488 489 fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_dec); 490 fences += fence[i]; 491 492 if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) { 493 mutex_lock(&vcn_inst->vcn_pg_lock); 494 vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE); 495 mutex_unlock(&vcn_inst->vcn_pg_lock); 496 amdgpu_vcn_put_profile(adev); 497 498 } else { 499 schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT); 500 } 501 } 502 503 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) 504 { 505 struct amdgpu_device *adev = ring->adev; 506 struct amdgpu_vcn_inst *vcn_inst = &adev->vcn.inst[ring->me]; 507 508 atomic_inc(&vcn_inst->total_submission_cnt); 509 510 cancel_delayed_work_sync(&vcn_inst->idle_work); 511 512 mutex_lock(&vcn_inst->vcn_pg_lock); 513 vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE); 514 515 /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ 516 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && 517 !vcn_inst->using_unified_queue) { 518 struct dpg_pause_state new_state; 519 520 if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { 521 atomic_inc(&vcn_inst->dpg_enc_submission_cnt); 522 new_state.fw_based = VCN_DPG_STATE__PAUSE; 523 } else { 524 unsigned int fences = 0; 525 unsigned int i; 526 527 for (i = 0; i < vcn_inst->num_enc_rings; ++i) 528 fences += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[i]); 529 530 if (fences || atomic_read(&vcn_inst->dpg_enc_submission_cnt)) 531 new_state.fw_based = VCN_DPG_STATE__PAUSE; 532 else 533 new_state.fw_based = VCN_DPG_STATE__UNPAUSE; 534 } 535 536 vcn_inst->pause_dpg_mode(vcn_inst, &new_state); 537 } 538 mutex_unlock(&vcn_inst->vcn_pg_lock); 539 amdgpu_vcn_get_profile(adev); 540 } 541 542 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) 543 { 544 struct amdgpu_device *adev = ring->adev; 545 546 /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ 547 if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && 548 ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC && 549 !adev->vcn.inst[ring->me].using_unified_queue) 550 atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt); 551 552 atomic_dec(&ring->adev->vcn.inst[ring->me].total_submission_cnt); 553 554 schedule_delayed_work(&ring->adev->vcn.inst[ring->me].idle_work, 555 VCN_IDLE_TIMEOUT); 556 } 557 558 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) 559 { 560 struct amdgpu_device *adev = ring->adev; 561 uint32_t tmp = 0; 562 unsigned int i; 563 int r; 564 565 /* VCN in SRIOV does not support direct register read/write */ 566 if (amdgpu_sriov_vf(adev)) 567 return 0; 568 569 WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); 570 r = amdgpu_ring_alloc(ring, 3); 571 if (r) 572 return r; 573 amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.scratch9, 0)); 574 amdgpu_ring_write(ring, 0xDEADBEEF); 575 amdgpu_ring_commit(ring); 576 for (i = 0; i < adev->usec_timeout; i++) { 577 tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9); 578 if (tmp == 0xDEADBEEF) 579 break; 580 udelay(1); 581 } 582 583 if (i >= adev->usec_timeout) 584 r = -ETIMEDOUT; 585 586 return r; 587 } 588 589 int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring) 590 { 591 struct amdgpu_device *adev = ring->adev; 592 uint32_t rptr; 593 unsigned int i; 594 int r; 595 596 if (amdgpu_sriov_vf(adev)) 597 return 0; 598 599 r = amdgpu_ring_alloc(ring, 16); 600 if (r) 601 return r; 602 603 rptr = amdgpu_ring_get_rptr(ring); 604 605 amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END); 606 amdgpu_ring_commit(ring); 607 608 for (i = 0; i < adev->usec_timeout; i++) { 609 if (amdgpu_ring_get_rptr(ring) != rptr) 610 break; 611 udelay(1); 612 } 613 614 if (i >= adev->usec_timeout) 615 r = -ETIMEDOUT; 616 617 return r; 618 } 619 620 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, 621 struct amdgpu_ib *ib_msg, 622 struct dma_fence **fence) 623 { 624 u64 addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); 625 struct amdgpu_device *adev = ring->adev; 626 struct dma_fence *f = NULL; 627 struct amdgpu_job *job; 628 struct amdgpu_ib *ib; 629 int i, r; 630 631 r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, 632 64, AMDGPU_IB_POOL_DIRECT, 633 &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); 634 if (r) 635 goto err; 636 637 ib = &job->ibs[0]; 638 ib->ptr[0] = PACKET0(adev->vcn.inst[ring->me].internal.data0, 0); 639 ib->ptr[1] = addr; 640 ib->ptr[2] = PACKET0(adev->vcn.inst[ring->me].internal.data1, 0); 641 ib->ptr[3] = addr >> 32; 642 ib->ptr[4] = PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0); 643 ib->ptr[5] = 0; 644 for (i = 6; i < 16; i += 2) { 645 ib->ptr[i] = PACKET0(adev->vcn.inst[ring->me].internal.nop, 0); 646 ib->ptr[i+1] = 0; 647 } 648 ib->length_dw = 16; 649 650 r = amdgpu_job_submit_direct(job, ring, &f); 651 if (r) 652 goto err_free; 653 654 amdgpu_ib_free(ib_msg, f); 655 656 if (fence) 657 *fence = dma_fence_get(f); 658 dma_fence_put(f); 659 660 return 0; 661 662 err_free: 663 amdgpu_job_free(job); 664 err: 665 amdgpu_ib_free(ib_msg, f); 666 return r; 667 } 668 669 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 670 struct amdgpu_ib *ib) 671 { 672 struct amdgpu_device *adev = ring->adev; 673 uint32_t *msg; 674 int r, i; 675 676 memset(ib, 0, sizeof(*ib)); 677 r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2, 678 AMDGPU_IB_POOL_DIRECT, 679 ib); 680 if (r) 681 return r; 682 683 msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr); 684 msg[0] = cpu_to_le32(0x00000028); 685 msg[1] = cpu_to_le32(0x00000038); 686 msg[2] = cpu_to_le32(0x00000001); 687 msg[3] = cpu_to_le32(0x00000000); 688 msg[4] = cpu_to_le32(handle); 689 msg[5] = cpu_to_le32(0x00000000); 690 msg[6] = cpu_to_le32(0x00000001); 691 msg[7] = cpu_to_le32(0x00000028); 692 msg[8] = cpu_to_le32(0x00000010); 693 msg[9] = cpu_to_le32(0x00000000); 694 msg[10] = cpu_to_le32(0x00000007); 695 msg[11] = cpu_to_le32(0x00000000); 696 msg[12] = cpu_to_le32(0x00000780); 697 msg[13] = cpu_to_le32(0x00000440); 698 for (i = 14; i < 1024; ++i) 699 msg[i] = cpu_to_le32(0x0); 700 701 return 0; 702 } 703 704 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 705 struct amdgpu_ib *ib) 706 { 707 struct amdgpu_device *adev = ring->adev; 708 uint32_t *msg; 709 int r, i; 710 711 memset(ib, 0, sizeof(*ib)); 712 r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2, 713 AMDGPU_IB_POOL_DIRECT, 714 ib); 715 if (r) 716 return r; 717 718 msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr); 719 msg[0] = cpu_to_le32(0x00000028); 720 msg[1] = cpu_to_le32(0x00000018); 721 msg[2] = cpu_to_le32(0x00000000); 722 msg[3] = cpu_to_le32(0x00000002); 723 msg[4] = cpu_to_le32(handle); 724 msg[5] = cpu_to_le32(0x00000000); 725 for (i = 6; i < 1024; ++i) 726 msg[i] = cpu_to_le32(0x0); 727 728 return 0; 729 } 730 731 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) 732 { 733 struct dma_fence *fence = NULL; 734 struct amdgpu_ib ib; 735 long r; 736 737 r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib); 738 if (r) 739 goto error; 740 741 r = amdgpu_vcn_dec_send_msg(ring, &ib, NULL); 742 if (r) 743 goto error; 744 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib); 745 if (r) 746 goto error; 747 748 r = amdgpu_vcn_dec_send_msg(ring, &ib, &fence); 749 if (r) 750 goto error; 751 752 r = dma_fence_wait_timeout(fence, false, timeout); 753 if (r == 0) 754 r = -ETIMEDOUT; 755 else if (r > 0) 756 r = 0; 757 758 dma_fence_put(fence); 759 error: 760 return r; 761 } 762 763 static uint32_t *amdgpu_vcn_unified_ring_ib_header(struct amdgpu_ib *ib, 764 uint32_t ib_pack_in_dw, bool enc) 765 { 766 uint32_t *ib_checksum; 767 768 ib->ptr[ib->length_dw++] = 0x00000010; /* single queue checksum */ 769 ib->ptr[ib->length_dw++] = 0x30000002; 770 ib_checksum = &ib->ptr[ib->length_dw++]; 771 ib->ptr[ib->length_dw++] = ib_pack_in_dw; 772 773 ib->ptr[ib->length_dw++] = 0x00000010; /* engine info */ 774 ib->ptr[ib->length_dw++] = 0x30000001; 775 ib->ptr[ib->length_dw++] = enc ? 0x2 : 0x3; 776 ib->ptr[ib->length_dw++] = ib_pack_in_dw * sizeof(uint32_t); 777 778 return ib_checksum; 779 } 780 781 static void amdgpu_vcn_unified_ring_ib_checksum(uint32_t **ib_checksum, 782 uint32_t ib_pack_in_dw) 783 { 784 uint32_t i; 785 uint32_t checksum = 0; 786 787 for (i = 0; i < ib_pack_in_dw; i++) 788 checksum += *(*ib_checksum + 2 + i); 789 790 **ib_checksum = checksum; 791 } 792 793 static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, 794 struct amdgpu_ib *ib_msg, 795 struct dma_fence **fence) 796 { 797 struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; 798 unsigned int ib_size_dw = 64; 799 struct amdgpu_device *adev = ring->adev; 800 struct dma_fence *f = NULL; 801 struct amdgpu_job *job; 802 struct amdgpu_ib *ib; 803 uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); 804 uint32_t *ib_checksum; 805 uint32_t ib_pack_in_dw; 806 int i, r; 807 808 if (adev->vcn.inst[ring->me].using_unified_queue) 809 ib_size_dw += 8; 810 811 r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, 812 ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, 813 &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); 814 if (r) 815 goto err; 816 817 ib = &job->ibs[0]; 818 ib->length_dw = 0; 819 820 /* single queue headers */ 821 if (adev->vcn.inst[ring->me].using_unified_queue) { 822 ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t) 823 + 4 + 2; /* engine info + decoding ib in dw */ 824 ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false); 825 } 826 827 ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8; 828 ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER); 829 decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]); 830 ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4; 831 memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer)); 832 833 decode_buffer->valid_buf_flag |= cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER); 834 decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32); 835 decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr); 836 837 for (i = ib->length_dw; i < ib_size_dw; ++i) 838 ib->ptr[i] = 0x0; 839 840 if (adev->vcn.inst[ring->me].using_unified_queue) 841 amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw); 842 843 r = amdgpu_job_submit_direct(job, ring, &f); 844 if (r) 845 goto err_free; 846 847 amdgpu_ib_free(ib_msg, f); 848 849 if (fence) 850 *fence = dma_fence_get(f); 851 dma_fence_put(f); 852 853 return 0; 854 855 err_free: 856 amdgpu_job_free(job); 857 err: 858 amdgpu_ib_free(ib_msg, f); 859 return r; 860 } 861 862 int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout) 863 { 864 struct dma_fence *fence = NULL; 865 struct amdgpu_ib ib; 866 long r; 867 868 r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib); 869 if (r) 870 goto error; 871 872 r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, NULL); 873 if (r) 874 goto error; 875 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib); 876 if (r) 877 goto error; 878 879 r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, &fence); 880 if (r) 881 goto error; 882 883 r = dma_fence_wait_timeout(fence, false, timeout); 884 if (r == 0) 885 r = -ETIMEDOUT; 886 else if (r > 0) 887 r = 0; 888 889 dma_fence_put(fence); 890 error: 891 return r; 892 } 893 894 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) 895 { 896 struct amdgpu_device *adev = ring->adev; 897 uint32_t rptr; 898 unsigned int i; 899 int r; 900 901 if (amdgpu_sriov_vf(adev)) 902 return 0; 903 904 r = amdgpu_ring_alloc(ring, 16); 905 if (r) 906 return r; 907 908 rptr = amdgpu_ring_get_rptr(ring); 909 910 amdgpu_ring_write(ring, VCN_ENC_CMD_END); 911 amdgpu_ring_commit(ring); 912 913 for (i = 0; i < adev->usec_timeout; i++) { 914 if (amdgpu_ring_get_rptr(ring) != rptr) 915 break; 916 udelay(1); 917 } 918 919 if (i >= adev->usec_timeout) 920 r = -ETIMEDOUT; 921 922 return r; 923 } 924 925 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 926 struct amdgpu_ib *ib_msg, 927 struct dma_fence **fence) 928 { 929 unsigned int ib_size_dw = 16; 930 struct amdgpu_device *adev = ring->adev; 931 struct amdgpu_job *job; 932 struct amdgpu_ib *ib; 933 struct dma_fence *f = NULL; 934 uint32_t *ib_checksum = NULL; 935 uint64_t addr; 936 int i, r; 937 938 if (adev->vcn.inst[ring->me].using_unified_queue) 939 ib_size_dw += 8; 940 941 r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, 942 ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, 943 &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); 944 if (r) 945 return r; 946 947 ib = &job->ibs[0]; 948 addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); 949 950 ib->length_dw = 0; 951 952 if (adev->vcn.inst[ring->me].using_unified_queue) 953 ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); 954 955 ib->ptr[ib->length_dw++] = 0x00000018; 956 ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ 957 ib->ptr[ib->length_dw++] = handle; 958 ib->ptr[ib->length_dw++] = upper_32_bits(addr); 959 ib->ptr[ib->length_dw++] = addr; 960 ib->ptr[ib->length_dw++] = 0x00000000; 961 962 ib->ptr[ib->length_dw++] = 0x00000014; 963 ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ 964 ib->ptr[ib->length_dw++] = 0x0000001c; 965 ib->ptr[ib->length_dw++] = 0x00000000; 966 ib->ptr[ib->length_dw++] = 0x00000000; 967 968 ib->ptr[ib->length_dw++] = 0x00000008; 969 ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */ 970 971 for (i = ib->length_dw; i < ib_size_dw; ++i) 972 ib->ptr[i] = 0x0; 973 974 if (adev->vcn.inst[ring->me].using_unified_queue) 975 amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); 976 977 r = amdgpu_job_submit_direct(job, ring, &f); 978 if (r) 979 goto err; 980 981 if (fence) 982 *fence = dma_fence_get(f); 983 dma_fence_put(f); 984 985 return 0; 986 987 err: 988 amdgpu_job_free(job); 989 return r; 990 } 991 992 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 993 struct amdgpu_ib *ib_msg, 994 struct dma_fence **fence) 995 { 996 unsigned int ib_size_dw = 16; 997 struct amdgpu_device *adev = ring->adev; 998 struct amdgpu_job *job; 999 struct amdgpu_ib *ib; 1000 struct dma_fence *f = NULL; 1001 uint32_t *ib_checksum = NULL; 1002 uint64_t addr; 1003 int i, r; 1004 1005 if (adev->vcn.inst[ring->me].using_unified_queue) 1006 ib_size_dw += 8; 1007 1008 r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, 1009 ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, 1010 &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); 1011 if (r) 1012 return r; 1013 1014 ib = &job->ibs[0]; 1015 addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); 1016 1017 ib->length_dw = 0; 1018 1019 if (adev->vcn.inst[ring->me].using_unified_queue) 1020 ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); 1021 1022 ib->ptr[ib->length_dw++] = 0x00000018; 1023 ib->ptr[ib->length_dw++] = 0x00000001; 1024 ib->ptr[ib->length_dw++] = handle; 1025 ib->ptr[ib->length_dw++] = upper_32_bits(addr); 1026 ib->ptr[ib->length_dw++] = addr; 1027 ib->ptr[ib->length_dw++] = 0x00000000; 1028 1029 ib->ptr[ib->length_dw++] = 0x00000014; 1030 ib->ptr[ib->length_dw++] = 0x00000002; 1031 ib->ptr[ib->length_dw++] = 0x0000001c; 1032 ib->ptr[ib->length_dw++] = 0x00000000; 1033 ib->ptr[ib->length_dw++] = 0x00000000; 1034 1035 ib->ptr[ib->length_dw++] = 0x00000008; 1036 ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */ 1037 1038 for (i = ib->length_dw; i < ib_size_dw; ++i) 1039 ib->ptr[i] = 0x0; 1040 1041 if (adev->vcn.inst[ring->me].using_unified_queue) 1042 amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); 1043 1044 r = amdgpu_job_submit_direct(job, ring, &f); 1045 if (r) 1046 goto err; 1047 1048 if (fence) 1049 *fence = dma_fence_get(f); 1050 dma_fence_put(f); 1051 1052 return 0; 1053 1054 err: 1055 amdgpu_job_free(job); 1056 return r; 1057 } 1058 1059 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1060 { 1061 struct amdgpu_device *adev = ring->adev; 1062 struct dma_fence *fence = NULL; 1063 struct amdgpu_ib ib; 1064 long r; 1065 1066 memset(&ib, 0, sizeof(ib)); 1067 r = amdgpu_ib_get(adev, NULL, (128 << 10) + AMDGPU_GPU_PAGE_SIZE, 1068 AMDGPU_IB_POOL_DIRECT, 1069 &ib); 1070 if (r) 1071 return r; 1072 1073 r = amdgpu_vcn_enc_get_create_msg(ring, 1, &ib, NULL); 1074 if (r) 1075 goto error; 1076 1077 r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &ib, &fence); 1078 if (r) 1079 goto error; 1080 1081 r = dma_fence_wait_timeout(fence, false, timeout); 1082 if (r == 0) 1083 r = -ETIMEDOUT; 1084 else if (r > 0) 1085 r = 0; 1086 1087 error: 1088 amdgpu_ib_free(&ib, fence); 1089 dma_fence_put(fence); 1090 1091 return r; 1092 } 1093 1094 int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1095 { 1096 struct amdgpu_device *adev = ring->adev; 1097 long r; 1098 1099 if ((amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) && 1100 (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 1)) && 1101 (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 2))) { 1102 r = amdgpu_vcn_enc_ring_test_ib(ring, timeout); 1103 if (r) 1104 goto error; 1105 } 1106 1107 r = amdgpu_vcn_dec_sw_ring_test_ib(ring, timeout); 1108 1109 error: 1110 return r; 1111 } 1112 1113 enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring) 1114 { 1115 switch (ring) { 1116 case 0: 1117 return AMDGPU_RING_PRIO_0; 1118 case 1: 1119 return AMDGPU_RING_PRIO_1; 1120 case 2: 1121 return AMDGPU_RING_PRIO_2; 1122 default: 1123 return AMDGPU_RING_PRIO_0; 1124 } 1125 } 1126 1127 void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev, int i) 1128 { 1129 unsigned int idx; 1130 1131 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1132 const struct common_firmware_header *hdr; 1133 1134 if (adev->vcn.harvest_config & (1 << i)) 1135 return; 1136 1137 if ((amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 3) || 1138 amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 1) || 1139 amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 2)) 1140 && (i > 0)) 1141 return; 1142 1143 hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data; 1144 /* currently only support 2 FW instances */ 1145 if (i >= 2) { 1146 dev_info(adev->dev, "More then 2 VCN FW instances!\n"); 1147 return; 1148 } 1149 idx = AMDGPU_UCODE_ID_VCN + i; 1150 adev->firmware.ucode[idx].ucode_id = idx; 1151 adev->firmware.ucode[idx].fw = adev->vcn.inst[i].fw; 1152 adev->firmware.fw_size += 1153 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 1154 } 1155 } 1156 1157 /* 1158 * debugfs for mapping vcn firmware log buffer. 1159 */ 1160 #if defined(CONFIG_DEBUG_FS) 1161 static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf, 1162 size_t size, loff_t *pos) 1163 { 1164 struct amdgpu_vcn_inst *vcn; 1165 void *log_buf; 1166 struct amdgpu_vcn_fwlog *plog; 1167 unsigned int read_pos, write_pos, available, i, read_bytes = 0; 1168 unsigned int read_num[2] = {0}; 1169 1170 vcn = file_inode(f)->i_private; 1171 if (!vcn) 1172 return -ENODEV; 1173 1174 if (!vcn->fw_shared.cpu_addr || !amdgpu_vcnfw_log) 1175 return -EFAULT; 1176 1177 log_buf = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size; 1178 1179 plog = (struct amdgpu_vcn_fwlog *)log_buf; 1180 read_pos = plog->rptr; 1181 write_pos = plog->wptr; 1182 1183 if (read_pos > AMDGPU_VCNFW_LOG_SIZE || write_pos > AMDGPU_VCNFW_LOG_SIZE) 1184 return -EFAULT; 1185 1186 if (!size || (read_pos == write_pos)) 1187 return 0; 1188 1189 if (write_pos > read_pos) { 1190 available = write_pos - read_pos; 1191 read_num[0] = min_t(size_t, size, available); 1192 } else { 1193 read_num[0] = AMDGPU_VCNFW_LOG_SIZE - read_pos; 1194 available = read_num[0] + write_pos - plog->header_size; 1195 if (size > available) 1196 read_num[1] = write_pos - plog->header_size; 1197 else if (size > read_num[0]) 1198 read_num[1] = size - read_num[0]; 1199 else 1200 read_num[0] = size; 1201 } 1202 1203 for (i = 0; i < 2; i++) { 1204 if (read_num[i]) { 1205 if (read_pos == AMDGPU_VCNFW_LOG_SIZE) 1206 read_pos = plog->header_size; 1207 if (read_num[i] == copy_to_user((buf + read_bytes), 1208 (log_buf + read_pos), read_num[i])) 1209 return -EFAULT; 1210 1211 read_bytes += read_num[i]; 1212 read_pos += read_num[i]; 1213 } 1214 } 1215 1216 plog->rptr = read_pos; 1217 *pos += read_bytes; 1218 return read_bytes; 1219 } 1220 1221 static const struct file_operations amdgpu_debugfs_vcnfwlog_fops = { 1222 .owner = THIS_MODULE, 1223 .read = amdgpu_debugfs_vcn_fwlog_read, 1224 .llseek = default_llseek 1225 }; 1226 #endif 1227 1228 void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i, 1229 struct amdgpu_vcn_inst *vcn) 1230 { 1231 #if defined(CONFIG_DEBUG_FS) 1232 struct drm_minor *minor = adev_to_drm(adev)->primary; 1233 struct dentry *root = minor->debugfs_root; 1234 char name[32]; 1235 1236 sprintf(name, "amdgpu_vcn_%d_fwlog", i); 1237 debugfs_create_file_size(name, S_IFREG | 0444, root, vcn, 1238 &amdgpu_debugfs_vcnfwlog_fops, 1239 AMDGPU_VCNFW_LOG_SIZE); 1240 #endif 1241 } 1242 1243 void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn) 1244 { 1245 #if defined(CONFIG_DEBUG_FS) 1246 uint32_t *flag = vcn->fw_shared.cpu_addr; 1247 void *fw_log_cpu_addr = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size; 1248 uint64_t fw_log_gpu_addr = vcn->fw_shared.gpu_addr + vcn->fw_shared.mem_size; 1249 struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr; 1250 struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr 1251 + vcn->fw_shared.log_offset; 1252 *flag |= cpu_to_le32(AMDGPU_VCN_FW_LOGGING_FLAG); 1253 fw_log->is_enabled = 1; 1254 fw_log->addr_lo = cpu_to_le32(fw_log_gpu_addr & 0xFFFFFFFF); 1255 fw_log->addr_hi = cpu_to_le32(fw_log_gpu_addr >> 32); 1256 fw_log->size = cpu_to_le32(AMDGPU_VCNFW_LOG_SIZE); 1257 1258 log_buf->header_size = sizeof(struct amdgpu_vcn_fwlog); 1259 log_buf->buffer_size = AMDGPU_VCNFW_LOG_SIZE; 1260 log_buf->rptr = log_buf->header_size; 1261 log_buf->wptr = log_buf->header_size; 1262 log_buf->wrapped = 0; 1263 #endif 1264 } 1265 1266 int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev, 1267 struct amdgpu_irq_src *source, 1268 struct amdgpu_iv_entry *entry) 1269 { 1270 struct ras_common_if *ras_if = adev->vcn.ras_if; 1271 struct ras_dispatch_if ih_data = { 1272 .entry = entry, 1273 }; 1274 1275 if (!ras_if) 1276 return 0; 1277 1278 if (!amdgpu_sriov_vf(adev)) { 1279 ih_data.head = *ras_if; 1280 amdgpu_ras_interrupt_dispatch(adev, &ih_data); 1281 } else { 1282 if (adev->virt.ops && adev->virt.ops->ras_poison_handler) 1283 adev->virt.ops->ras_poison_handler(adev, ras_if->block); 1284 else 1285 dev_warn(adev->dev, 1286 "No ras_poison_handler interface in SRIOV for VCN!\n"); 1287 } 1288 1289 return 0; 1290 } 1291 1292 int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) 1293 { 1294 int r, i; 1295 1296 r = amdgpu_ras_block_late_init(adev, ras_block); 1297 if (r) 1298 return r; 1299 1300 if (amdgpu_ras_is_supported(adev, ras_block->block)) { 1301 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 1302 if (adev->vcn.harvest_config & (1 << i) || 1303 !adev->vcn.inst[i].ras_poison_irq.funcs) 1304 continue; 1305 1306 r = amdgpu_irq_get(adev, &adev->vcn.inst[i].ras_poison_irq, 0); 1307 if (r) 1308 goto late_fini; 1309 } 1310 } 1311 return 0; 1312 1313 late_fini: 1314 amdgpu_ras_block_late_fini(adev, ras_block); 1315 return r; 1316 } 1317 1318 int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev) 1319 { 1320 int err; 1321 struct amdgpu_vcn_ras *ras; 1322 1323 if (!adev->vcn.ras) 1324 return 0; 1325 1326 ras = adev->vcn.ras; 1327 err = amdgpu_ras_register_ras_block(adev, &ras->ras_block); 1328 if (err) { 1329 dev_err(adev->dev, "Failed to register vcn ras block!\n"); 1330 return err; 1331 } 1332 1333 strcpy(ras->ras_block.ras_comm.name, "vcn"); 1334 ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN; 1335 ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; 1336 adev->vcn.ras_if = &ras->ras_block.ras_comm; 1337 1338 if (!ras->ras_block.ras_late_init) 1339 ras->ras_block.ras_late_init = amdgpu_vcn_ras_late_init; 1340 1341 return 0; 1342 } 1343 1344 int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx, 1345 enum AMDGPU_UCODE_ID ucode_id) 1346 { 1347 struct amdgpu_firmware_info ucode = { 1348 .ucode_id = (ucode_id ? ucode_id : 1349 (inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM : 1350 AMDGPU_UCODE_ID_VCN0_RAM)), 1351 .mc_addr = adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, 1352 .ucode_size = ((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - 1353 (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr), 1354 }; 1355 1356 return psp_execute_ip_fw_load(&adev->psp, &ucode); 1357 } 1358 1359 static ssize_t amdgpu_get_vcn_reset_mask(struct device *dev, 1360 struct device_attribute *attr, 1361 char *buf) 1362 { 1363 struct drm_device *ddev = dev_get_drvdata(dev); 1364 struct amdgpu_device *adev = drm_to_adev(ddev); 1365 1366 if (!adev) 1367 return -ENODEV; 1368 1369 return amdgpu_show_reset_mask(buf, adev->vcn.supported_reset); 1370 } 1371 1372 static DEVICE_ATTR(vcn_reset_mask, 0444, 1373 amdgpu_get_vcn_reset_mask, NULL); 1374 1375 int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev) 1376 { 1377 int r = 0; 1378 1379 if (adev->vcn.num_vcn_inst) { 1380 r = device_create_file(adev->dev, &dev_attr_vcn_reset_mask); 1381 if (r) 1382 return r; 1383 } 1384 1385 return r; 1386 } 1387 1388 void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev) 1389 { 1390 if (adev->dev->kobj.sd) { 1391 if (adev->vcn.num_vcn_inst) 1392 device_remove_file(adev->dev, &dev_attr_vcn_reset_mask); 1393 } 1394 } 1395 1396 /* 1397 * debugfs to enable/disable vcn job submission to specific core or 1398 * instance. It is created only if the queue type is unified. 1399 */ 1400 #if defined(CONFIG_DEBUG_FS) 1401 static int amdgpu_debugfs_vcn_sched_mask_set(void *data, u64 val) 1402 { 1403 struct amdgpu_device *adev = (struct amdgpu_device *)data; 1404 u32 i; 1405 u64 mask; 1406 struct amdgpu_ring *ring; 1407 1408 if (!adev) 1409 return -ENODEV; 1410 1411 mask = (1ULL << adev->vcn.num_vcn_inst) - 1; 1412 if ((val & mask) == 0) 1413 return -EINVAL; 1414 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1415 ring = &adev->vcn.inst[i].ring_enc[0]; 1416 if (val & (1ULL << i)) 1417 ring->sched.ready = true; 1418 else 1419 ring->sched.ready = false; 1420 } 1421 /* publish sched.ready flag update effective immediately across smp */ 1422 smp_rmb(); 1423 return 0; 1424 } 1425 1426 static int amdgpu_debugfs_vcn_sched_mask_get(void *data, u64 *val) 1427 { 1428 struct amdgpu_device *adev = (struct amdgpu_device *)data; 1429 u32 i; 1430 u64 mask = 0; 1431 struct amdgpu_ring *ring; 1432 1433 if (!adev) 1434 return -ENODEV; 1435 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1436 ring = &adev->vcn.inst[i].ring_enc[0]; 1437 if (ring->sched.ready) 1438 mask |= 1ULL << i; 1439 } 1440 *val = mask; 1441 return 0; 1442 } 1443 1444 DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_vcn_sched_mask_fops, 1445 amdgpu_debugfs_vcn_sched_mask_get, 1446 amdgpu_debugfs_vcn_sched_mask_set, "%llx\n"); 1447 #endif 1448 1449 void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev) 1450 { 1451 #if defined(CONFIG_DEBUG_FS) 1452 struct drm_minor *minor = adev_to_drm(adev)->primary; 1453 struct dentry *root = minor->debugfs_root; 1454 char name[32]; 1455 1456 if (adev->vcn.num_vcn_inst <= 1 || !adev->vcn.inst[0].using_unified_queue) 1457 return; 1458 sprintf(name, "amdgpu_vcn_sched_mask"); 1459 debugfs_create_file(name, 0600, root, adev, 1460 &amdgpu_debugfs_vcn_sched_mask_fops); 1461 #endif 1462 } 1463 1464 /** 1465 * vcn_set_powergating_state - set VCN block powergating state 1466 * 1467 * @ip_block: amdgpu_ip_block pointer 1468 * @state: power gating state 1469 * 1470 * Set VCN block powergating state 1471 */ 1472 int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block, 1473 enum amd_powergating_state state) 1474 { 1475 struct amdgpu_device *adev = ip_block->adev; 1476 int ret = 0, i; 1477 1478 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1479 struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; 1480 1481 ret |= vinst->set_pg_state(vinst, state); 1482 } 1483 1484 return ret; 1485 } 1486 1487 /** 1488 * amdgpu_vcn_reset_engine - Reset a specific VCN engine 1489 * @adev: Pointer to the AMDGPU device 1490 * @instance_id: VCN engine instance to reset 1491 * 1492 * Returns: 0 on success, or a negative error code on failure. 1493 */ 1494 static int amdgpu_vcn_reset_engine(struct amdgpu_device *adev, 1495 uint32_t instance_id) 1496 { 1497 struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[instance_id]; 1498 int r, i; 1499 1500 mutex_lock(&vinst->engine_reset_mutex); 1501 /* Stop the scheduler's work queue for the dec and enc rings if they are running. 1502 * This ensures that no new tasks are submitted to the queues while 1503 * the reset is in progress. 1504 */ 1505 drm_sched_wqueue_stop(&vinst->ring_dec.sched); 1506 for (i = 0; i < vinst->num_enc_rings; i++) 1507 drm_sched_wqueue_stop(&vinst->ring_enc[i].sched); 1508 1509 /* Perform the VCN reset for the specified instance */ 1510 r = vinst->reset(vinst); 1511 if (r) 1512 goto unlock; 1513 r = amdgpu_ring_test_ring(&vinst->ring_dec); 1514 if (r) 1515 goto unlock; 1516 for (i = 0; i < vinst->num_enc_rings; i++) { 1517 r = amdgpu_ring_test_ring(&vinst->ring_enc[i]); 1518 if (r) 1519 goto unlock; 1520 } 1521 amdgpu_fence_driver_force_completion(&vinst->ring_dec); 1522 for (i = 0; i < vinst->num_enc_rings; i++) 1523 amdgpu_fence_driver_force_completion(&vinst->ring_enc[i]); 1524 1525 /* Restart the scheduler's work queue for the dec and enc rings 1526 * if they were stopped by this function. This allows new tasks 1527 * to be submitted to the queues after the reset is complete. 1528 */ 1529 drm_sched_wqueue_start(&vinst->ring_dec.sched); 1530 for (i = 0; i < vinst->num_enc_rings; i++) 1531 drm_sched_wqueue_start(&vinst->ring_enc[i].sched); 1532 1533 unlock: 1534 mutex_unlock(&vinst->engine_reset_mutex); 1535 1536 return r; 1537 } 1538 1539 /** 1540 * amdgpu_vcn_ring_reset - Reset a VCN ring 1541 * @ring: ring to reset 1542 * @vmid: vmid of guilty job 1543 * @timedout_fence: fence of timed out job 1544 * 1545 * This helper is for VCN blocks without unified queues because 1546 * resetting the engine resets all queues in that case. With 1547 * unified queues we have one queue per engine. 1548 * Returns: 0 on success, or a negative error code on failure. 1549 */ 1550 int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring, 1551 unsigned int vmid, 1552 struct amdgpu_fence *timedout_fence) 1553 { 1554 struct amdgpu_device *adev = ring->adev; 1555 1556 if (adev->vcn.inst[ring->me].using_unified_queue) 1557 return -EINVAL; 1558 1559 return amdgpu_vcn_reset_engine(adev, ring->me); 1560 } 1561 1562 int amdgpu_vcn_reg_dump_init(struct amdgpu_device *adev, 1563 const struct amdgpu_hwip_reg_entry *reg, u32 count) 1564 { 1565 adev->vcn.ip_dump = kcalloc(adev->vcn.num_vcn_inst * count, 1566 sizeof(uint32_t), GFP_KERNEL); 1567 if (!adev->vcn.ip_dump) 1568 return -ENOMEM; 1569 adev->vcn.reg_list = reg; 1570 adev->vcn.reg_count = count; 1571 1572 return 0; 1573 } 1574 1575 static void amdgpu_vcn_reg_dump_fini(struct amdgpu_device *adev) 1576 { 1577 kfree(adev->vcn.ip_dump); 1578 adev->vcn.ip_dump = NULL; 1579 adev->vcn.reg_list = NULL; 1580 adev->vcn.reg_count = 0; 1581 } 1582 1583 void amdgpu_vcn_dump_ip_state(struct amdgpu_ip_block *ip_block) 1584 { 1585 struct amdgpu_device *adev = ip_block->adev; 1586 int i, j; 1587 bool is_powered; 1588 u32 inst_off; 1589 1590 if (!adev->vcn.ip_dump) 1591 return; 1592 1593 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 1594 if (adev->vcn.harvest_config & (1 << i)) 1595 continue; 1596 1597 inst_off = i * adev->vcn.reg_count; 1598 /* mmUVD_POWER_STATUS is always readable and is the first in reg_list */ 1599 adev->vcn.ip_dump[inst_off] = 1600 RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->vcn.reg_list[0], i)); 1601 is_powered = (adev->vcn.ip_dump[inst_off] & 1602 UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF) != 1603 UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF; 1604 1605 if (is_powered) 1606 for (j = 1; j < adev->vcn.reg_count; j++) 1607 adev->vcn.ip_dump[inst_off + j] = 1608 RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->vcn.reg_list[j], i)); 1609 } 1610 } 1611 1612 void amdgpu_vcn_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) 1613 { 1614 struct amdgpu_device *adev = ip_block->adev; 1615 int i, j; 1616 bool is_powered; 1617 u32 inst_off; 1618 1619 if (!adev->vcn.ip_dump) 1620 return; 1621 1622 drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); 1623 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 1624 if (adev->vcn.harvest_config & (1 << i)) { 1625 drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); 1626 continue; 1627 } 1628 1629 inst_off = i * adev->vcn.reg_count; 1630 is_powered = (adev->vcn.ip_dump[inst_off] & 1631 UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF) != 1632 UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF; 1633 1634 if (is_powered) { 1635 drm_printf(p, "\nActive Instance:VCN%d\n", i); 1636 for (j = 0; j < adev->vcn.reg_count; j++) 1637 drm_printf(p, "%-50s \t 0x%08x\n", adev->vcn.reg_list[j].reg_name, 1638 adev->vcn.ip_dump[inst_off + j]); 1639 } else { 1640 drm_printf(p, "\nInactive Instance:VCN%d\n", i); 1641 } 1642 } 1643 } 1644