1 /* 2 * Copyright 2016-2024 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <linux/module.h> 29 #include <linux/dmi.h> 30 #include <linux/pci.h> 31 #include <linux/debugfs.h> 32 #include <drm/drm_drv.h> 33 34 #include "amdgpu.h" 35 #include "amdgpu_pm.h" 36 #include "amdgpu_vcn.h" 37 #include "soc15d.h" 38 39 /* Firmware Names */ 40 #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" 41 #define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin" 42 #define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin" 43 #define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin" 44 #define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin" 45 #define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin" 46 #define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin" 47 #define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin" 48 #define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin" 49 #define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin" 50 #define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin" 51 #define FIRMWARE_VANGOGH "amdgpu/vangogh_vcn.bin" 52 #define FIRMWARE_DIMGREY_CAVEFISH "amdgpu/dimgrey_cavefish_vcn.bin" 53 #define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin" 54 #define FIRMWARE_BEIGE_GOBY "amdgpu/beige_goby_vcn.bin" 55 #define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin" 56 #define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin" 57 #define FIRMWARE_VCN4_0_0 "amdgpu/vcn_4_0_0.bin" 58 #define FIRMWARE_VCN4_0_2 "amdgpu/vcn_4_0_2.bin" 59 #define FIRMWARE_VCN4_0_3 "amdgpu/vcn_4_0_3.bin" 60 #define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin" 61 #define FIRMWARE_VCN4_0_5 "amdgpu/vcn_4_0_5.bin" 62 #define FIRMWARE_VCN4_0_6 "amdgpu/vcn_4_0_6.bin" 63 #define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin" 64 #define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin" 65 #define FIRMWARE_VCN5_0_1 "amdgpu/vcn_5_0_1.bin" 66 67 MODULE_FIRMWARE(FIRMWARE_RAVEN); 68 MODULE_FIRMWARE(FIRMWARE_PICASSO); 69 MODULE_FIRMWARE(FIRMWARE_RAVEN2); 70 MODULE_FIRMWARE(FIRMWARE_ARCTURUS); 71 MODULE_FIRMWARE(FIRMWARE_RENOIR); 72 MODULE_FIRMWARE(FIRMWARE_GREEN_SARDINE); 73 MODULE_FIRMWARE(FIRMWARE_ALDEBARAN); 74 MODULE_FIRMWARE(FIRMWARE_NAVI10); 75 MODULE_FIRMWARE(FIRMWARE_NAVI14); 76 MODULE_FIRMWARE(FIRMWARE_NAVI12); 77 MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID); 78 MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER); 79 MODULE_FIRMWARE(FIRMWARE_VANGOGH); 80 MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH); 81 MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY); 82 MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP); 83 MODULE_FIRMWARE(FIRMWARE_VCN_3_1_2); 84 MODULE_FIRMWARE(FIRMWARE_VCN4_0_0); 85 MODULE_FIRMWARE(FIRMWARE_VCN4_0_2); 86 MODULE_FIRMWARE(FIRMWARE_VCN4_0_3); 87 MODULE_FIRMWARE(FIRMWARE_VCN4_0_4); 88 MODULE_FIRMWARE(FIRMWARE_VCN4_0_5); 89 MODULE_FIRMWARE(FIRMWARE_VCN4_0_6); 90 MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1); 91 MODULE_FIRMWARE(FIRMWARE_VCN5_0_0); 92 MODULE_FIRMWARE(FIRMWARE_VCN5_0_1); 93 94 static void amdgpu_vcn_idle_work_handler(struct work_struct *work); 95 96 int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i) 97 { 98 char ucode_prefix[25]; 99 int r; 100 101 adev->vcn.inst[i].adev = adev; 102 adev->vcn.inst[i].inst = i; 103 amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix)); 104 105 if (i != 0 && adev->vcn.per_inst_fw) { 106 r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw, 107 AMDGPU_UCODE_REQUIRED, 108 "amdgpu/%s_%d.bin", ucode_prefix, i); 109 if (r) 110 amdgpu_ucode_release(&adev->vcn.inst[i].fw); 111 } else { 112 if (!adev->vcn.inst[0].fw) { 113 r = amdgpu_ucode_request(adev, &adev->vcn.inst[0].fw, 114 AMDGPU_UCODE_REQUIRED, 115 "amdgpu/%s.bin", ucode_prefix); 116 if (r) 117 amdgpu_ucode_release(&adev->vcn.inst[0].fw); 118 } else { 119 r = 0; 120 } 121 adev->vcn.inst[i].fw = adev->vcn.inst[0].fw; 122 } 123 124 return r; 125 } 126 127 int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i) 128 { 129 unsigned long bo_size; 130 const struct common_firmware_header *hdr; 131 unsigned char fw_check; 132 unsigned int fw_shared_size, log_offset; 133 int r; 134 135 mutex_init(&adev->vcn.inst[i].vcn1_jpeg1_workaround); 136 mutex_init(&adev->vcn.inst[i].vcn_pg_lock); 137 atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0); 138 INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, amdgpu_vcn_idle_work_handler); 139 atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0); 140 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && 141 (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) 142 adev->vcn.inst[i].indirect_sram = true; 143 144 /* 145 * Some Steam Deck's BIOS versions are incompatible with the 146 * indirect SRAM mode, leading to amdgpu being unable to get 147 * properly probed (and even potentially crashing the kernel). 148 * Hence, check for these versions here - notice this is 149 * restricted to Vangogh (Deck's APU). 150 */ 151 if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(3, 0, 2)) { 152 const char *bios_ver = dmi_get_system_info(DMI_BIOS_VERSION); 153 154 if (bios_ver && (!strncmp("F7A0113", bios_ver, 7) || 155 !strncmp("F7A0114", bios_ver, 7))) { 156 adev->vcn.inst[i].indirect_sram = false; 157 dev_info(adev->dev, 158 "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver); 159 } 160 } 161 162 /* from vcn4 and above, only unified queue is used */ 163 adev->vcn.inst[i].using_unified_queue = 164 amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0); 165 166 hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data; 167 adev->vcn.inst[i].fw_version = le32_to_cpu(hdr->ucode_version); 168 adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); 169 170 /* Bit 20-23, it is encode major and non-zero for new naming convention. 171 * This field is part of version minor and DRM_DISABLED_FLAG in old naming 172 * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG 173 * is zero in old naming convention, this field is always zero so far. 174 * These four bits are used to tell which naming convention is present. 175 */ 176 fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf; 177 if (fw_check) { 178 unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev; 179 180 fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff; 181 enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff; 182 enc_major = fw_check; 183 dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf; 184 vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf; 185 dev_info(adev->dev, 186 "Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n", 187 enc_major, enc_minor, dec_ver, vep, fw_rev); 188 } else { 189 unsigned int version_major, version_minor, family_id; 190 191 family_id = le32_to_cpu(hdr->ucode_version) & 0xff; 192 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; 193 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; 194 dev_info(adev->dev, "Found VCN firmware Version: %u.%u Family ID: %u\n", 195 version_major, version_minor, family_id); 196 } 197 198 bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; 199 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 200 bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); 201 202 if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(5, 0, 0)) { 203 fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)); 204 log_offset = offsetof(struct amdgpu_vcn5_fw_shared, fw_log); 205 } else if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) { 206 fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)); 207 log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log); 208 } else { 209 fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); 210 log_offset = offsetof(struct amdgpu_fw_shared, fw_log); 211 } 212 213 bo_size += fw_shared_size; 214 215 if (amdgpu_vcnfw_log) 216 bo_size += AMDGPU_VCNFW_LOG_SIZE; 217 218 r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, 219 AMDGPU_GEM_DOMAIN_VRAM | 220 AMDGPU_GEM_DOMAIN_GTT, 221 &adev->vcn.inst[i].vcpu_bo, 222 &adev->vcn.inst[i].gpu_addr, 223 &adev->vcn.inst[i].cpu_addr); 224 if (r) { 225 dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); 226 return r; 227 } 228 229 adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr + 230 bo_size - fw_shared_size; 231 adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr + 232 bo_size - fw_shared_size; 233 234 adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size; 235 236 if (amdgpu_vcnfw_log) { 237 adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE; 238 adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE; 239 adev->vcn.inst[i].fw_shared.log_offset = log_offset; 240 } 241 242 if (adev->vcn.inst[i].indirect_sram) { 243 r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, 244 AMDGPU_GEM_DOMAIN_VRAM | 245 AMDGPU_GEM_DOMAIN_GTT, 246 &adev->vcn.inst[i].dpg_sram_bo, 247 &adev->vcn.inst[i].dpg_sram_gpu_addr, 248 &adev->vcn.inst[i].dpg_sram_cpu_addr); 249 if (r) { 250 dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r); 251 return r; 252 } 253 } 254 255 return 0; 256 } 257 258 int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i) 259 { 260 int j; 261 262 if (adev->vcn.harvest_config & (1 << i)) 263 return 0; 264 265 amdgpu_bo_free_kernel( 266 &adev->vcn.inst[i].dpg_sram_bo, 267 &adev->vcn.inst[i].dpg_sram_gpu_addr, 268 (void **)&adev->vcn.inst[i].dpg_sram_cpu_addr); 269 270 kvfree(adev->vcn.inst[i].saved_bo); 271 272 amdgpu_bo_free_kernel(&adev->vcn.inst[i].vcpu_bo, 273 &adev->vcn.inst[i].gpu_addr, 274 (void **)&adev->vcn.inst[i].cpu_addr); 275 276 amdgpu_ring_fini(&adev->vcn.inst[i].ring_dec); 277 278 for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) 279 amdgpu_ring_fini(&adev->vcn.inst[i].ring_enc[j]); 280 281 if (adev->vcn.per_inst_fw) { 282 amdgpu_ucode_release(&adev->vcn.inst[i].fw); 283 } else { 284 amdgpu_ucode_release(&adev->vcn.inst[0].fw); 285 adev->vcn.inst[i].fw = NULL; 286 } 287 mutex_destroy(&adev->vcn.inst[i].vcn_pg_lock); 288 mutex_destroy(&adev->vcn.inst[i].vcn1_jpeg1_workaround); 289 290 return 0; 291 } 292 293 bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) 294 { 295 bool ret = false; 296 int vcn_config = adev->vcn.inst[vcn_instance].vcn_config; 297 298 if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK)) 299 ret = true; 300 else if ((type == VCN_DECODE_RING) && (vcn_config & VCN_BLOCK_DECODE_DISABLE_MASK)) 301 ret = true; 302 else if ((type == VCN_UNIFIED_RING) && (vcn_config & VCN_BLOCK_QUEUE_DISABLE_MASK)) 303 ret = true; 304 305 return ret; 306 } 307 308 static int amdgpu_vcn_save_vcpu_bo_inst(struct amdgpu_device *adev, int i) 309 { 310 unsigned int size; 311 void *ptr; 312 int idx; 313 314 if (adev->vcn.harvest_config & (1 << i)) 315 return 0; 316 if (adev->vcn.inst[i].vcpu_bo == NULL) 317 return 0; 318 319 size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); 320 ptr = adev->vcn.inst[i].cpu_addr; 321 322 adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL); 323 if (!adev->vcn.inst[i].saved_bo) 324 return -ENOMEM; 325 326 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 327 memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); 328 drm_dev_exit(idx); 329 } 330 331 return 0; 332 } 333 334 int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev) 335 { 336 int ret, i; 337 338 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 339 ret = amdgpu_vcn_save_vcpu_bo_inst(adev, i); 340 if (ret) 341 return ret; 342 } 343 344 return 0; 345 } 346 347 int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i) 348 { 349 bool in_ras_intr = amdgpu_ras_intr_triggered(); 350 351 if (adev->vcn.harvest_config & (1 << i)) 352 return 0; 353 354 cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work); 355 356 /* err_event_athub will corrupt VCPU buffer, so we need to 357 * restore fw data and clear buffer in amdgpu_vcn_resume() */ 358 if (in_ras_intr) 359 return 0; 360 361 return amdgpu_vcn_save_vcpu_bo_inst(adev, i); 362 } 363 364 int amdgpu_vcn_resume(struct amdgpu_device *adev, int i) 365 { 366 unsigned int size; 367 void *ptr; 368 int idx; 369 370 if (adev->vcn.harvest_config & (1 << i)) 371 return 0; 372 if (adev->vcn.inst[i].vcpu_bo == NULL) 373 return -EINVAL; 374 375 size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); 376 ptr = adev->vcn.inst[i].cpu_addr; 377 378 if (adev->vcn.inst[i].saved_bo != NULL) { 379 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 380 memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); 381 drm_dev_exit(idx); 382 } 383 kvfree(adev->vcn.inst[i].saved_bo); 384 adev->vcn.inst[i].saved_bo = NULL; 385 } else { 386 const struct common_firmware_header *hdr; 387 unsigned int offset; 388 389 hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data; 390 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 391 offset = le32_to_cpu(hdr->ucode_array_offset_bytes); 392 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 393 memcpy_toio(adev->vcn.inst[i].cpu_addr, 394 adev->vcn.inst[i].fw->data + offset, 395 le32_to_cpu(hdr->ucode_size_bytes)); 396 drm_dev_exit(idx); 397 } 398 size -= le32_to_cpu(hdr->ucode_size_bytes); 399 ptr += le32_to_cpu(hdr->ucode_size_bytes); 400 } 401 memset_io(ptr, 0, size); 402 } 403 404 return 0; 405 } 406 407 static void amdgpu_vcn_idle_work_handler(struct work_struct *work) 408 { 409 struct amdgpu_vcn_inst *vcn_inst = 410 container_of(work, struct amdgpu_vcn_inst, idle_work.work); 411 struct amdgpu_device *adev = vcn_inst->adev; 412 unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; 413 unsigned int i = vcn_inst->inst, j; 414 int r = 0; 415 416 if (adev->vcn.harvest_config & (1 << i)) 417 return; 418 419 for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) 420 fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[j]); 421 422 /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ 423 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && 424 !adev->vcn.inst[i].using_unified_queue) { 425 struct dpg_pause_state new_state; 426 427 if (fence[i] || 428 unlikely(atomic_read(&vcn_inst->dpg_enc_submission_cnt))) 429 new_state.fw_based = VCN_DPG_STATE__PAUSE; 430 else 431 new_state.fw_based = VCN_DPG_STATE__UNPAUSE; 432 433 adev->vcn.inst[i].pause_dpg_mode(vcn_inst, &new_state); 434 } 435 436 fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_dec); 437 fences += fence[i]; 438 439 if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) { 440 vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE); 441 mutex_lock(&adev->vcn.workload_profile_mutex); 442 if (adev->vcn.workload_profile_active) { 443 r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, 444 false); 445 if (r) 446 dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); 447 adev->vcn.workload_profile_active = false; 448 } 449 mutex_unlock(&adev->vcn.workload_profile_mutex); 450 } else { 451 schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT); 452 } 453 } 454 455 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) 456 { 457 struct amdgpu_device *adev = ring->adev; 458 struct amdgpu_vcn_inst *vcn_inst = &adev->vcn.inst[ring->me]; 459 int r = 0; 460 461 atomic_inc(&vcn_inst->total_submission_cnt); 462 463 cancel_delayed_work_sync(&vcn_inst->idle_work); 464 465 /* We can safely return early here because we've cancelled the 466 * the delayed work so there is no one else to set it to false 467 * and we don't care if someone else sets it to true. 468 */ 469 if (adev->vcn.workload_profile_active) 470 goto pg_lock; 471 472 mutex_lock(&adev->vcn.workload_profile_mutex); 473 if (!adev->vcn.workload_profile_active) { 474 r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, 475 true); 476 if (r) 477 dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); 478 adev->vcn.workload_profile_active = true; 479 } 480 mutex_unlock(&adev->vcn.workload_profile_mutex); 481 482 pg_lock: 483 mutex_lock(&vcn_inst->vcn_pg_lock); 484 vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE); 485 486 /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ 487 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && 488 !vcn_inst->using_unified_queue) { 489 struct dpg_pause_state new_state; 490 491 if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { 492 atomic_inc(&vcn_inst->dpg_enc_submission_cnt); 493 new_state.fw_based = VCN_DPG_STATE__PAUSE; 494 } else { 495 unsigned int fences = 0; 496 unsigned int i; 497 498 for (i = 0; i < vcn_inst->num_enc_rings; ++i) 499 fences += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[i]); 500 501 if (fences || atomic_read(&vcn_inst->dpg_enc_submission_cnt)) 502 new_state.fw_based = VCN_DPG_STATE__PAUSE; 503 else 504 new_state.fw_based = VCN_DPG_STATE__UNPAUSE; 505 } 506 507 vcn_inst->pause_dpg_mode(vcn_inst, &new_state); 508 } 509 mutex_unlock(&vcn_inst->vcn_pg_lock); 510 } 511 512 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) 513 { 514 struct amdgpu_device *adev = ring->adev; 515 516 /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ 517 if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && 518 ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC && 519 !adev->vcn.inst[ring->me].using_unified_queue) 520 atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt); 521 522 atomic_dec(&ring->adev->vcn.inst[ring->me].total_submission_cnt); 523 524 schedule_delayed_work(&ring->adev->vcn.inst[ring->me].idle_work, 525 VCN_IDLE_TIMEOUT); 526 } 527 528 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) 529 { 530 struct amdgpu_device *adev = ring->adev; 531 uint32_t tmp = 0; 532 unsigned int i; 533 int r; 534 535 /* VCN in SRIOV does not support direct register read/write */ 536 if (amdgpu_sriov_vf(adev)) 537 return 0; 538 539 WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); 540 r = amdgpu_ring_alloc(ring, 3); 541 if (r) 542 return r; 543 amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.scratch9, 0)); 544 amdgpu_ring_write(ring, 0xDEADBEEF); 545 amdgpu_ring_commit(ring); 546 for (i = 0; i < adev->usec_timeout; i++) { 547 tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9); 548 if (tmp == 0xDEADBEEF) 549 break; 550 udelay(1); 551 } 552 553 if (i >= adev->usec_timeout) 554 r = -ETIMEDOUT; 555 556 return r; 557 } 558 559 int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring) 560 { 561 struct amdgpu_device *adev = ring->adev; 562 uint32_t rptr; 563 unsigned int i; 564 int r; 565 566 if (amdgpu_sriov_vf(adev)) 567 return 0; 568 569 r = amdgpu_ring_alloc(ring, 16); 570 if (r) 571 return r; 572 573 rptr = amdgpu_ring_get_rptr(ring); 574 575 amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END); 576 amdgpu_ring_commit(ring); 577 578 for (i = 0; i < adev->usec_timeout; i++) { 579 if (amdgpu_ring_get_rptr(ring) != rptr) 580 break; 581 udelay(1); 582 } 583 584 if (i >= adev->usec_timeout) 585 r = -ETIMEDOUT; 586 587 return r; 588 } 589 590 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, 591 struct amdgpu_ib *ib_msg, 592 struct dma_fence **fence) 593 { 594 u64 addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); 595 struct amdgpu_device *adev = ring->adev; 596 struct dma_fence *f = NULL; 597 struct amdgpu_job *job; 598 struct amdgpu_ib *ib; 599 int i, r; 600 601 r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, 602 64, AMDGPU_IB_POOL_DIRECT, 603 &job); 604 if (r) 605 goto err; 606 607 ib = &job->ibs[0]; 608 ib->ptr[0] = PACKET0(adev->vcn.inst[ring->me].internal.data0, 0); 609 ib->ptr[1] = addr; 610 ib->ptr[2] = PACKET0(adev->vcn.inst[ring->me].internal.data1, 0); 611 ib->ptr[3] = addr >> 32; 612 ib->ptr[4] = PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0); 613 ib->ptr[5] = 0; 614 for (i = 6; i < 16; i += 2) { 615 ib->ptr[i] = PACKET0(adev->vcn.inst[ring->me].internal.nop, 0); 616 ib->ptr[i+1] = 0; 617 } 618 ib->length_dw = 16; 619 620 r = amdgpu_job_submit_direct(job, ring, &f); 621 if (r) 622 goto err_free; 623 624 amdgpu_ib_free(ib_msg, f); 625 626 if (fence) 627 *fence = dma_fence_get(f); 628 dma_fence_put(f); 629 630 return 0; 631 632 err_free: 633 amdgpu_job_free(job); 634 err: 635 amdgpu_ib_free(ib_msg, f); 636 return r; 637 } 638 639 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 640 struct amdgpu_ib *ib) 641 { 642 struct amdgpu_device *adev = ring->adev; 643 uint32_t *msg; 644 int r, i; 645 646 memset(ib, 0, sizeof(*ib)); 647 r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2, 648 AMDGPU_IB_POOL_DIRECT, 649 ib); 650 if (r) 651 return r; 652 653 msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr); 654 msg[0] = cpu_to_le32(0x00000028); 655 msg[1] = cpu_to_le32(0x00000038); 656 msg[2] = cpu_to_le32(0x00000001); 657 msg[3] = cpu_to_le32(0x00000000); 658 msg[4] = cpu_to_le32(handle); 659 msg[5] = cpu_to_le32(0x00000000); 660 msg[6] = cpu_to_le32(0x00000001); 661 msg[7] = cpu_to_le32(0x00000028); 662 msg[8] = cpu_to_le32(0x00000010); 663 msg[9] = cpu_to_le32(0x00000000); 664 msg[10] = cpu_to_le32(0x00000007); 665 msg[11] = cpu_to_le32(0x00000000); 666 msg[12] = cpu_to_le32(0x00000780); 667 msg[13] = cpu_to_le32(0x00000440); 668 for (i = 14; i < 1024; ++i) 669 msg[i] = cpu_to_le32(0x0); 670 671 return 0; 672 } 673 674 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 675 struct amdgpu_ib *ib) 676 { 677 struct amdgpu_device *adev = ring->adev; 678 uint32_t *msg; 679 int r, i; 680 681 memset(ib, 0, sizeof(*ib)); 682 r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2, 683 AMDGPU_IB_POOL_DIRECT, 684 ib); 685 if (r) 686 return r; 687 688 msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr); 689 msg[0] = cpu_to_le32(0x00000028); 690 msg[1] = cpu_to_le32(0x00000018); 691 msg[2] = cpu_to_le32(0x00000000); 692 msg[3] = cpu_to_le32(0x00000002); 693 msg[4] = cpu_to_le32(handle); 694 msg[5] = cpu_to_le32(0x00000000); 695 for (i = 6; i < 1024; ++i) 696 msg[i] = cpu_to_le32(0x0); 697 698 return 0; 699 } 700 701 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) 702 { 703 struct dma_fence *fence = NULL; 704 struct amdgpu_ib ib; 705 long r; 706 707 r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib); 708 if (r) 709 goto error; 710 711 r = amdgpu_vcn_dec_send_msg(ring, &ib, NULL); 712 if (r) 713 goto error; 714 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib); 715 if (r) 716 goto error; 717 718 r = amdgpu_vcn_dec_send_msg(ring, &ib, &fence); 719 if (r) 720 goto error; 721 722 r = dma_fence_wait_timeout(fence, false, timeout); 723 if (r == 0) 724 r = -ETIMEDOUT; 725 else if (r > 0) 726 r = 0; 727 728 dma_fence_put(fence); 729 error: 730 return r; 731 } 732 733 static uint32_t *amdgpu_vcn_unified_ring_ib_header(struct amdgpu_ib *ib, 734 uint32_t ib_pack_in_dw, bool enc) 735 { 736 uint32_t *ib_checksum; 737 738 ib->ptr[ib->length_dw++] = 0x00000010; /* single queue checksum */ 739 ib->ptr[ib->length_dw++] = 0x30000002; 740 ib_checksum = &ib->ptr[ib->length_dw++]; 741 ib->ptr[ib->length_dw++] = ib_pack_in_dw; 742 743 ib->ptr[ib->length_dw++] = 0x00000010; /* engine info */ 744 ib->ptr[ib->length_dw++] = 0x30000001; 745 ib->ptr[ib->length_dw++] = enc ? 0x2 : 0x3; 746 ib->ptr[ib->length_dw++] = ib_pack_in_dw * sizeof(uint32_t); 747 748 return ib_checksum; 749 } 750 751 static void amdgpu_vcn_unified_ring_ib_checksum(uint32_t **ib_checksum, 752 uint32_t ib_pack_in_dw) 753 { 754 uint32_t i; 755 uint32_t checksum = 0; 756 757 for (i = 0; i < ib_pack_in_dw; i++) 758 checksum += *(*ib_checksum + 2 + i); 759 760 **ib_checksum = checksum; 761 } 762 763 static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, 764 struct amdgpu_ib *ib_msg, 765 struct dma_fence **fence) 766 { 767 struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; 768 unsigned int ib_size_dw = 64; 769 struct amdgpu_device *adev = ring->adev; 770 struct dma_fence *f = NULL; 771 struct amdgpu_job *job; 772 struct amdgpu_ib *ib; 773 uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); 774 uint32_t *ib_checksum; 775 uint32_t ib_pack_in_dw; 776 int i, r; 777 778 if (adev->vcn.inst[ring->me].using_unified_queue) 779 ib_size_dw += 8; 780 781 r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, 782 ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, 783 &job); 784 if (r) 785 goto err; 786 787 ib = &job->ibs[0]; 788 ib->length_dw = 0; 789 790 /* single queue headers */ 791 if (adev->vcn.inst[ring->me].using_unified_queue) { 792 ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t) 793 + 4 + 2; /* engine info + decoding ib in dw */ 794 ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false); 795 } 796 797 ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8; 798 ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER); 799 decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]); 800 ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4; 801 memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer)); 802 803 decode_buffer->valid_buf_flag |= cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER); 804 decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32); 805 decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr); 806 807 for (i = ib->length_dw; i < ib_size_dw; ++i) 808 ib->ptr[i] = 0x0; 809 810 if (adev->vcn.inst[ring->me].using_unified_queue) 811 amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw); 812 813 r = amdgpu_job_submit_direct(job, ring, &f); 814 if (r) 815 goto err_free; 816 817 amdgpu_ib_free(ib_msg, f); 818 819 if (fence) 820 *fence = dma_fence_get(f); 821 dma_fence_put(f); 822 823 return 0; 824 825 err_free: 826 amdgpu_job_free(job); 827 err: 828 amdgpu_ib_free(ib_msg, f); 829 return r; 830 } 831 832 int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout) 833 { 834 struct dma_fence *fence = NULL; 835 struct amdgpu_ib ib; 836 long r; 837 838 r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib); 839 if (r) 840 goto error; 841 842 r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, NULL); 843 if (r) 844 goto error; 845 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib); 846 if (r) 847 goto error; 848 849 r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, &fence); 850 if (r) 851 goto error; 852 853 r = dma_fence_wait_timeout(fence, false, timeout); 854 if (r == 0) 855 r = -ETIMEDOUT; 856 else if (r > 0) 857 r = 0; 858 859 dma_fence_put(fence); 860 error: 861 return r; 862 } 863 864 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) 865 { 866 struct amdgpu_device *adev = ring->adev; 867 uint32_t rptr; 868 unsigned int i; 869 int r; 870 871 if (amdgpu_sriov_vf(adev)) 872 return 0; 873 874 r = amdgpu_ring_alloc(ring, 16); 875 if (r) 876 return r; 877 878 rptr = amdgpu_ring_get_rptr(ring); 879 880 amdgpu_ring_write(ring, VCN_ENC_CMD_END); 881 amdgpu_ring_commit(ring); 882 883 for (i = 0; i < adev->usec_timeout; i++) { 884 if (amdgpu_ring_get_rptr(ring) != rptr) 885 break; 886 udelay(1); 887 } 888 889 if (i >= adev->usec_timeout) 890 r = -ETIMEDOUT; 891 892 return r; 893 } 894 895 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 896 struct amdgpu_ib *ib_msg, 897 struct dma_fence **fence) 898 { 899 unsigned int ib_size_dw = 16; 900 struct amdgpu_device *adev = ring->adev; 901 struct amdgpu_job *job; 902 struct amdgpu_ib *ib; 903 struct dma_fence *f = NULL; 904 uint32_t *ib_checksum = NULL; 905 uint64_t addr; 906 int i, r; 907 908 if (adev->vcn.inst[ring->me].using_unified_queue) 909 ib_size_dw += 8; 910 911 r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, 912 ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, 913 &job); 914 if (r) 915 return r; 916 917 ib = &job->ibs[0]; 918 addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); 919 920 ib->length_dw = 0; 921 922 if (adev->vcn.inst[ring->me].using_unified_queue) 923 ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); 924 925 ib->ptr[ib->length_dw++] = 0x00000018; 926 ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ 927 ib->ptr[ib->length_dw++] = handle; 928 ib->ptr[ib->length_dw++] = upper_32_bits(addr); 929 ib->ptr[ib->length_dw++] = addr; 930 ib->ptr[ib->length_dw++] = 0x00000000; 931 932 ib->ptr[ib->length_dw++] = 0x00000014; 933 ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ 934 ib->ptr[ib->length_dw++] = 0x0000001c; 935 ib->ptr[ib->length_dw++] = 0x00000000; 936 ib->ptr[ib->length_dw++] = 0x00000000; 937 938 ib->ptr[ib->length_dw++] = 0x00000008; 939 ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */ 940 941 for (i = ib->length_dw; i < ib_size_dw; ++i) 942 ib->ptr[i] = 0x0; 943 944 if (adev->vcn.inst[ring->me].using_unified_queue) 945 amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); 946 947 r = amdgpu_job_submit_direct(job, ring, &f); 948 if (r) 949 goto err; 950 951 if (fence) 952 *fence = dma_fence_get(f); 953 dma_fence_put(f); 954 955 return 0; 956 957 err: 958 amdgpu_job_free(job); 959 return r; 960 } 961 962 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 963 struct amdgpu_ib *ib_msg, 964 struct dma_fence **fence) 965 { 966 unsigned int ib_size_dw = 16; 967 struct amdgpu_device *adev = ring->adev; 968 struct amdgpu_job *job; 969 struct amdgpu_ib *ib; 970 struct dma_fence *f = NULL; 971 uint32_t *ib_checksum = NULL; 972 uint64_t addr; 973 int i, r; 974 975 if (adev->vcn.inst[ring->me].using_unified_queue) 976 ib_size_dw += 8; 977 978 r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, 979 ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, 980 &job); 981 if (r) 982 return r; 983 984 ib = &job->ibs[0]; 985 addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); 986 987 ib->length_dw = 0; 988 989 if (adev->vcn.inst[ring->me].using_unified_queue) 990 ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); 991 992 ib->ptr[ib->length_dw++] = 0x00000018; 993 ib->ptr[ib->length_dw++] = 0x00000001; 994 ib->ptr[ib->length_dw++] = handle; 995 ib->ptr[ib->length_dw++] = upper_32_bits(addr); 996 ib->ptr[ib->length_dw++] = addr; 997 ib->ptr[ib->length_dw++] = 0x00000000; 998 999 ib->ptr[ib->length_dw++] = 0x00000014; 1000 ib->ptr[ib->length_dw++] = 0x00000002; 1001 ib->ptr[ib->length_dw++] = 0x0000001c; 1002 ib->ptr[ib->length_dw++] = 0x00000000; 1003 ib->ptr[ib->length_dw++] = 0x00000000; 1004 1005 ib->ptr[ib->length_dw++] = 0x00000008; 1006 ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */ 1007 1008 for (i = ib->length_dw; i < ib_size_dw; ++i) 1009 ib->ptr[i] = 0x0; 1010 1011 if (adev->vcn.inst[ring->me].using_unified_queue) 1012 amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); 1013 1014 r = amdgpu_job_submit_direct(job, ring, &f); 1015 if (r) 1016 goto err; 1017 1018 if (fence) 1019 *fence = dma_fence_get(f); 1020 dma_fence_put(f); 1021 1022 return 0; 1023 1024 err: 1025 amdgpu_job_free(job); 1026 return r; 1027 } 1028 1029 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1030 { 1031 struct amdgpu_device *adev = ring->adev; 1032 struct dma_fence *fence = NULL; 1033 struct amdgpu_ib ib; 1034 long r; 1035 1036 memset(&ib, 0, sizeof(ib)); 1037 r = amdgpu_ib_get(adev, NULL, (128 << 10) + AMDGPU_GPU_PAGE_SIZE, 1038 AMDGPU_IB_POOL_DIRECT, 1039 &ib); 1040 if (r) 1041 return r; 1042 1043 r = amdgpu_vcn_enc_get_create_msg(ring, 1, &ib, NULL); 1044 if (r) 1045 goto error; 1046 1047 r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &ib, &fence); 1048 if (r) 1049 goto error; 1050 1051 r = dma_fence_wait_timeout(fence, false, timeout); 1052 if (r == 0) 1053 r = -ETIMEDOUT; 1054 else if (r > 0) 1055 r = 0; 1056 1057 error: 1058 amdgpu_ib_free(&ib, fence); 1059 dma_fence_put(fence); 1060 1061 return r; 1062 } 1063 1064 int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1065 { 1066 struct amdgpu_device *adev = ring->adev; 1067 long r; 1068 1069 if ((amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) && 1070 (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 1))) { 1071 r = amdgpu_vcn_enc_ring_test_ib(ring, timeout); 1072 if (r) 1073 goto error; 1074 } 1075 1076 r = amdgpu_vcn_dec_sw_ring_test_ib(ring, timeout); 1077 1078 error: 1079 return r; 1080 } 1081 1082 enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring) 1083 { 1084 switch (ring) { 1085 case 0: 1086 return AMDGPU_RING_PRIO_0; 1087 case 1: 1088 return AMDGPU_RING_PRIO_1; 1089 case 2: 1090 return AMDGPU_RING_PRIO_2; 1091 default: 1092 return AMDGPU_RING_PRIO_0; 1093 } 1094 } 1095 1096 void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev, int i) 1097 { 1098 unsigned int idx; 1099 1100 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1101 const struct common_firmware_header *hdr; 1102 1103 if (adev->vcn.harvest_config & (1 << i)) 1104 return; 1105 1106 if ((amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 3) || 1107 amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 1)) 1108 && (i > 0)) 1109 return; 1110 1111 hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data; 1112 /* currently only support 2 FW instances */ 1113 if (i >= 2) { 1114 dev_info(adev->dev, "More then 2 VCN FW instances!\n"); 1115 return; 1116 } 1117 idx = AMDGPU_UCODE_ID_VCN + i; 1118 adev->firmware.ucode[idx].ucode_id = idx; 1119 adev->firmware.ucode[idx].fw = adev->vcn.inst[i].fw; 1120 adev->firmware.fw_size += 1121 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 1122 } 1123 } 1124 1125 /* 1126 * debugfs for mapping vcn firmware log buffer. 1127 */ 1128 #if defined(CONFIG_DEBUG_FS) 1129 static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf, 1130 size_t size, loff_t *pos) 1131 { 1132 struct amdgpu_vcn_inst *vcn; 1133 void *log_buf; 1134 volatile struct amdgpu_vcn_fwlog *plog; 1135 unsigned int read_pos, write_pos, available, i, read_bytes = 0; 1136 unsigned int read_num[2] = {0}; 1137 1138 vcn = file_inode(f)->i_private; 1139 if (!vcn) 1140 return -ENODEV; 1141 1142 if (!vcn->fw_shared.cpu_addr || !amdgpu_vcnfw_log) 1143 return -EFAULT; 1144 1145 log_buf = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size; 1146 1147 plog = (volatile struct amdgpu_vcn_fwlog *)log_buf; 1148 read_pos = plog->rptr; 1149 write_pos = plog->wptr; 1150 1151 if (read_pos > AMDGPU_VCNFW_LOG_SIZE || write_pos > AMDGPU_VCNFW_LOG_SIZE) 1152 return -EFAULT; 1153 1154 if (!size || (read_pos == write_pos)) 1155 return 0; 1156 1157 if (write_pos > read_pos) { 1158 available = write_pos - read_pos; 1159 read_num[0] = min_t(size_t, size, available); 1160 } else { 1161 read_num[0] = AMDGPU_VCNFW_LOG_SIZE - read_pos; 1162 available = read_num[0] + write_pos - plog->header_size; 1163 if (size > available) 1164 read_num[1] = write_pos - plog->header_size; 1165 else if (size > read_num[0]) 1166 read_num[1] = size - read_num[0]; 1167 else 1168 read_num[0] = size; 1169 } 1170 1171 for (i = 0; i < 2; i++) { 1172 if (read_num[i]) { 1173 if (read_pos == AMDGPU_VCNFW_LOG_SIZE) 1174 read_pos = plog->header_size; 1175 if (read_num[i] == copy_to_user((buf + read_bytes), 1176 (log_buf + read_pos), read_num[i])) 1177 return -EFAULT; 1178 1179 read_bytes += read_num[i]; 1180 read_pos += read_num[i]; 1181 } 1182 } 1183 1184 plog->rptr = read_pos; 1185 *pos += read_bytes; 1186 return read_bytes; 1187 } 1188 1189 static const struct file_operations amdgpu_debugfs_vcnfwlog_fops = { 1190 .owner = THIS_MODULE, 1191 .read = amdgpu_debugfs_vcn_fwlog_read, 1192 .llseek = default_llseek 1193 }; 1194 #endif 1195 1196 void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i, 1197 struct amdgpu_vcn_inst *vcn) 1198 { 1199 #if defined(CONFIG_DEBUG_FS) 1200 struct drm_minor *minor = adev_to_drm(adev)->primary; 1201 struct dentry *root = minor->debugfs_root; 1202 char name[32]; 1203 1204 sprintf(name, "amdgpu_vcn_%d_fwlog", i); 1205 debugfs_create_file_size(name, S_IFREG | 0444, root, vcn, 1206 &amdgpu_debugfs_vcnfwlog_fops, 1207 AMDGPU_VCNFW_LOG_SIZE); 1208 #endif 1209 } 1210 1211 void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn) 1212 { 1213 #if defined(CONFIG_DEBUG_FS) 1214 volatile uint32_t *flag = vcn->fw_shared.cpu_addr; 1215 void *fw_log_cpu_addr = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size; 1216 uint64_t fw_log_gpu_addr = vcn->fw_shared.gpu_addr + vcn->fw_shared.mem_size; 1217 volatile struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr; 1218 volatile struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr 1219 + vcn->fw_shared.log_offset; 1220 *flag |= cpu_to_le32(AMDGPU_VCN_FW_LOGGING_FLAG); 1221 fw_log->is_enabled = 1; 1222 fw_log->addr_lo = cpu_to_le32(fw_log_gpu_addr & 0xFFFFFFFF); 1223 fw_log->addr_hi = cpu_to_le32(fw_log_gpu_addr >> 32); 1224 fw_log->size = cpu_to_le32(AMDGPU_VCNFW_LOG_SIZE); 1225 1226 log_buf->header_size = sizeof(struct amdgpu_vcn_fwlog); 1227 log_buf->buffer_size = AMDGPU_VCNFW_LOG_SIZE; 1228 log_buf->rptr = log_buf->header_size; 1229 log_buf->wptr = log_buf->header_size; 1230 log_buf->wrapped = 0; 1231 #endif 1232 } 1233 1234 int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev, 1235 struct amdgpu_irq_src *source, 1236 struct amdgpu_iv_entry *entry) 1237 { 1238 struct ras_common_if *ras_if = adev->vcn.ras_if; 1239 struct ras_dispatch_if ih_data = { 1240 .entry = entry, 1241 }; 1242 1243 if (!ras_if) 1244 return 0; 1245 1246 if (!amdgpu_sriov_vf(adev)) { 1247 ih_data.head = *ras_if; 1248 amdgpu_ras_interrupt_dispatch(adev, &ih_data); 1249 } else { 1250 if (adev->virt.ops && adev->virt.ops->ras_poison_handler) 1251 adev->virt.ops->ras_poison_handler(adev, ras_if->block); 1252 else 1253 dev_warn(adev->dev, 1254 "No ras_poison_handler interface in SRIOV for VCN!\n"); 1255 } 1256 1257 return 0; 1258 } 1259 1260 int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) 1261 { 1262 int r, i; 1263 1264 r = amdgpu_ras_block_late_init(adev, ras_block); 1265 if (r) 1266 return r; 1267 1268 if (amdgpu_ras_is_supported(adev, ras_block->block)) { 1269 for (i = 0; i < adev->vcn.num_vcn_inst; i++) { 1270 if (adev->vcn.harvest_config & (1 << i) || 1271 !adev->vcn.inst[i].ras_poison_irq.funcs) 1272 continue; 1273 1274 r = amdgpu_irq_get(adev, &adev->vcn.inst[i].ras_poison_irq, 0); 1275 if (r) 1276 goto late_fini; 1277 } 1278 } 1279 return 0; 1280 1281 late_fini: 1282 amdgpu_ras_block_late_fini(adev, ras_block); 1283 return r; 1284 } 1285 1286 int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev) 1287 { 1288 int err; 1289 struct amdgpu_vcn_ras *ras; 1290 1291 if (!adev->vcn.ras) 1292 return 0; 1293 1294 ras = adev->vcn.ras; 1295 err = amdgpu_ras_register_ras_block(adev, &ras->ras_block); 1296 if (err) { 1297 dev_err(adev->dev, "Failed to register vcn ras block!\n"); 1298 return err; 1299 } 1300 1301 strcpy(ras->ras_block.ras_comm.name, "vcn"); 1302 ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN; 1303 ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; 1304 adev->vcn.ras_if = &ras->ras_block.ras_comm; 1305 1306 if (!ras->ras_block.ras_late_init) 1307 ras->ras_block.ras_late_init = amdgpu_vcn_ras_late_init; 1308 1309 return 0; 1310 } 1311 1312 int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx, 1313 enum AMDGPU_UCODE_ID ucode_id) 1314 { 1315 struct amdgpu_firmware_info ucode = { 1316 .ucode_id = (ucode_id ? ucode_id : 1317 (inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM : 1318 AMDGPU_UCODE_ID_VCN0_RAM)), 1319 .mc_addr = adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, 1320 .ucode_size = ((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - 1321 (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr), 1322 }; 1323 1324 return psp_execute_ip_fw_load(&adev->psp, &ucode); 1325 } 1326 1327 static ssize_t amdgpu_get_vcn_reset_mask(struct device *dev, 1328 struct device_attribute *attr, 1329 char *buf) 1330 { 1331 struct drm_device *ddev = dev_get_drvdata(dev); 1332 struct amdgpu_device *adev = drm_to_adev(ddev); 1333 1334 if (!adev) 1335 return -ENODEV; 1336 1337 return amdgpu_show_reset_mask(buf, adev->vcn.supported_reset); 1338 } 1339 1340 static DEVICE_ATTR(vcn_reset_mask, 0444, 1341 amdgpu_get_vcn_reset_mask, NULL); 1342 1343 int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev) 1344 { 1345 int r = 0; 1346 1347 if (adev->vcn.num_vcn_inst) { 1348 r = device_create_file(adev->dev, &dev_attr_vcn_reset_mask); 1349 if (r) 1350 return r; 1351 } 1352 1353 return r; 1354 } 1355 1356 void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev) 1357 { 1358 if (adev->dev->kobj.sd) { 1359 if (adev->vcn.num_vcn_inst) 1360 device_remove_file(adev->dev, &dev_attr_vcn_reset_mask); 1361 } 1362 } 1363 1364 /* 1365 * debugfs to enable/disable vcn job submission to specific core or 1366 * instance. It is created only if the queue type is unified. 1367 */ 1368 #if defined(CONFIG_DEBUG_FS) 1369 static int amdgpu_debugfs_vcn_sched_mask_set(void *data, u64 val) 1370 { 1371 struct amdgpu_device *adev = (struct amdgpu_device *)data; 1372 u32 i; 1373 u64 mask; 1374 struct amdgpu_ring *ring; 1375 1376 if (!adev) 1377 return -ENODEV; 1378 1379 mask = (1ULL << adev->vcn.num_vcn_inst) - 1; 1380 if ((val & mask) == 0) 1381 return -EINVAL; 1382 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1383 ring = &adev->vcn.inst[i].ring_enc[0]; 1384 if (val & (1ULL << i)) 1385 ring->sched.ready = true; 1386 else 1387 ring->sched.ready = false; 1388 } 1389 /* publish sched.ready flag update effective immediately across smp */ 1390 smp_rmb(); 1391 return 0; 1392 } 1393 1394 static int amdgpu_debugfs_vcn_sched_mask_get(void *data, u64 *val) 1395 { 1396 struct amdgpu_device *adev = (struct amdgpu_device *)data; 1397 u32 i; 1398 u64 mask = 0; 1399 struct amdgpu_ring *ring; 1400 1401 if (!adev) 1402 return -ENODEV; 1403 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1404 ring = &adev->vcn.inst[i].ring_enc[0]; 1405 if (ring->sched.ready) 1406 mask |= 1ULL << i; 1407 } 1408 *val = mask; 1409 return 0; 1410 } 1411 1412 DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_vcn_sched_mask_fops, 1413 amdgpu_debugfs_vcn_sched_mask_get, 1414 amdgpu_debugfs_vcn_sched_mask_set, "%llx\n"); 1415 #endif 1416 1417 void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev) 1418 { 1419 #if defined(CONFIG_DEBUG_FS) 1420 struct drm_minor *minor = adev_to_drm(adev)->primary; 1421 struct dentry *root = minor->debugfs_root; 1422 char name[32]; 1423 1424 if (adev->vcn.num_vcn_inst <= 1 || !adev->vcn.inst[0].using_unified_queue) 1425 return; 1426 sprintf(name, "amdgpu_vcn_sched_mask"); 1427 debugfs_create_file(name, 0600, root, adev, 1428 &amdgpu_debugfs_vcn_sched_mask_fops); 1429 #endif 1430 } 1431 1432 /** 1433 * vcn_set_powergating_state - set VCN block powergating state 1434 * 1435 * @ip_block: amdgpu_ip_block pointer 1436 * @state: power gating state 1437 * 1438 * Set VCN block powergating state 1439 */ 1440 int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block, 1441 enum amd_powergating_state state) 1442 { 1443 struct amdgpu_device *adev = ip_block->adev; 1444 int ret = 0, i; 1445 1446 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 1447 struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; 1448 1449 ret |= vinst->set_pg_state(vinst, state); 1450 } 1451 1452 return ret; 1453 } 1454