1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <linux/module.h> 29 #include <drm/drmP.h> 30 #include <drm/drm.h> 31 32 #include "amdgpu.h" 33 #include "amdgpu_pm.h" 34 #include "amdgpu_vcn.h" 35 #include "soc15d.h" 36 #include "soc15_common.h" 37 38 #include "vcn/vcn_1_0_offset.h" 39 40 /* 1 second timeout */ 41 #define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) 42 43 /* Firmware Names */ 44 #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" 45 46 MODULE_FIRMWARE(FIRMWARE_RAVEN); 47 48 static void amdgpu_vcn_idle_work_handler(struct work_struct *work); 49 50 int amdgpu_vcn_sw_init(struct amdgpu_device *adev) 51 { 52 unsigned long bo_size; 53 const char *fw_name; 54 const struct common_firmware_header *hdr; 55 unsigned char fw_check; 56 int r; 57 58 INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); 59 60 switch (adev->asic_type) { 61 case CHIP_RAVEN: 62 fw_name = FIRMWARE_RAVEN; 63 break; 64 default: 65 return -EINVAL; 66 } 67 68 r = request_firmware(&adev->vcn.fw, fw_name, adev->dev); 69 if (r) { 70 dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n", 71 fw_name); 72 return r; 73 } 74 75 r = amdgpu_ucode_validate(adev->vcn.fw); 76 if (r) { 77 dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n", 78 fw_name); 79 release_firmware(adev->vcn.fw); 80 adev->vcn.fw = NULL; 81 return r; 82 } 83 84 hdr = (const struct common_firmware_header *)adev->vcn.fw->data; 85 adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); 86 87 /* Bit 20-23, it is encode major and non-zero for new naming convention. 88 * This field is part of version minor and DRM_DISABLED_FLAG in old naming 89 * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG 90 * is zero in old naming convention, this field is always zero so far. 91 * These four bits are used to tell which naming convention is present. 92 */ 93 fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf; 94 if (fw_check) { 95 unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev; 96 97 fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff; 98 enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff; 99 enc_major = fw_check; 100 dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf; 101 vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf; 102 DRM_INFO("Found VCN firmware Version ENC: %hu.%hu DEC: %hu VEP: %hu Revision: %hu\n", 103 enc_major, enc_minor, dec_ver, vep, fw_rev); 104 } else { 105 unsigned int version_major, version_minor, family_id; 106 107 family_id = le32_to_cpu(hdr->ucode_version) & 0xff; 108 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; 109 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; 110 DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n", 111 version_major, version_minor, family_id); 112 } 113 114 bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) 115 + AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE 116 + AMDGPU_VCN_SESSION_SIZE * 40; 117 r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, 118 AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo, 119 &adev->vcn.gpu_addr, &adev->vcn.cpu_addr); 120 if (r) { 121 dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); 122 return r; 123 } 124 125 return 0; 126 } 127 128 int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) 129 { 130 int i; 131 132 kfree(adev->vcn.saved_bo); 133 134 amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, 135 &adev->vcn.gpu_addr, 136 (void **)&adev->vcn.cpu_addr); 137 138 amdgpu_ring_fini(&adev->vcn.ring_dec); 139 140 for (i = 0; i < adev->vcn.num_enc_rings; ++i) 141 amdgpu_ring_fini(&adev->vcn.ring_enc[i]); 142 143 release_firmware(adev->vcn.fw); 144 145 return 0; 146 } 147 148 int amdgpu_vcn_suspend(struct amdgpu_device *adev) 149 { 150 unsigned size; 151 void *ptr; 152 153 if (adev->vcn.vcpu_bo == NULL) 154 return 0; 155 156 cancel_delayed_work_sync(&adev->vcn.idle_work); 157 158 size = amdgpu_bo_size(adev->vcn.vcpu_bo); 159 ptr = adev->vcn.cpu_addr; 160 161 adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL); 162 if (!adev->vcn.saved_bo) 163 return -ENOMEM; 164 165 memcpy_fromio(adev->vcn.saved_bo, ptr, size); 166 167 return 0; 168 } 169 170 int amdgpu_vcn_resume(struct amdgpu_device *adev) 171 { 172 unsigned size; 173 void *ptr; 174 175 if (adev->vcn.vcpu_bo == NULL) 176 return -EINVAL; 177 178 size = amdgpu_bo_size(adev->vcn.vcpu_bo); 179 ptr = adev->vcn.cpu_addr; 180 181 if (adev->vcn.saved_bo != NULL) { 182 memcpy_toio(ptr, adev->vcn.saved_bo, size); 183 kfree(adev->vcn.saved_bo); 184 adev->vcn.saved_bo = NULL; 185 } else { 186 const struct common_firmware_header *hdr; 187 unsigned offset; 188 189 hdr = (const struct common_firmware_header *)adev->vcn.fw->data; 190 offset = le32_to_cpu(hdr->ucode_array_offset_bytes); 191 memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, 192 le32_to_cpu(hdr->ucode_size_bytes)); 193 size -= le32_to_cpu(hdr->ucode_size_bytes); 194 ptr += le32_to_cpu(hdr->ucode_size_bytes); 195 memset_io(ptr, 0, size); 196 } 197 198 return 0; 199 } 200 201 static void amdgpu_vcn_idle_work_handler(struct work_struct *work) 202 { 203 struct amdgpu_device *adev = 204 container_of(work, struct amdgpu_device, vcn.idle_work.work); 205 unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec); 206 unsigned i; 207 208 for (i = 0; i < adev->vcn.num_enc_rings; ++i) { 209 fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); 210 } 211 212 if (fences == 0) { 213 if (adev->pm.dpm_enabled) 214 amdgpu_dpm_enable_uvd(adev, false); 215 else 216 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, 217 AMD_PG_STATE_GATE); 218 } else { 219 schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); 220 } 221 } 222 223 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) 224 { 225 struct amdgpu_device *adev = ring->adev; 226 bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); 227 228 if (set_clocks && adev->pm.dpm_enabled) { 229 if (adev->pm.dpm_enabled) 230 amdgpu_dpm_enable_uvd(adev, true); 231 else 232 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, 233 AMD_PG_STATE_UNGATE); 234 } 235 } 236 237 void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) 238 { 239 schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); 240 } 241 242 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) 243 { 244 struct amdgpu_device *adev = ring->adev; 245 uint32_t tmp = 0; 246 unsigned i; 247 int r; 248 249 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD); 250 r = amdgpu_ring_alloc(ring, 3); 251 if (r) { 252 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 253 ring->idx, r); 254 return r; 255 } 256 amdgpu_ring_write(ring, 257 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); 258 amdgpu_ring_write(ring, 0xDEADBEEF); 259 amdgpu_ring_commit(ring); 260 for (i = 0; i < adev->usec_timeout; i++) { 261 tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID)); 262 if (tmp == 0xDEADBEEF) 263 break; 264 DRM_UDELAY(1); 265 } 266 267 if (i < adev->usec_timeout) { 268 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 269 ring->idx, i); 270 } else { 271 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", 272 ring->idx, tmp); 273 r = -EINVAL; 274 } 275 return r; 276 } 277 278 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, 279 struct amdgpu_bo *bo, 280 struct dma_fence **fence) 281 { 282 struct amdgpu_device *adev = ring->adev; 283 struct dma_fence *f = NULL; 284 struct amdgpu_job *job; 285 struct amdgpu_ib *ib; 286 uint64_t addr; 287 int i, r; 288 289 r = amdgpu_job_alloc_with_ib(adev, 64, &job); 290 if (r) 291 goto err; 292 293 ib = &job->ibs[0]; 294 addr = amdgpu_bo_gpu_offset(bo); 295 ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0); 296 ib->ptr[1] = addr; 297 ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0); 298 ib->ptr[3] = addr >> 32; 299 ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0); 300 ib->ptr[5] = 0; 301 for (i = 6; i < 16; i += 2) { 302 ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0); 303 ib->ptr[i+1] = 0; 304 } 305 ib->length_dw = 16; 306 307 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); 308 job->fence = dma_fence_get(f); 309 if (r) 310 goto err_free; 311 312 amdgpu_job_free(job); 313 314 amdgpu_bo_fence(bo, f, false); 315 amdgpu_bo_unreserve(bo); 316 amdgpu_bo_unref(&bo); 317 318 if (fence) 319 *fence = dma_fence_get(f); 320 dma_fence_put(f); 321 322 return 0; 323 324 err_free: 325 amdgpu_job_free(job); 326 327 err: 328 amdgpu_bo_unreserve(bo); 329 amdgpu_bo_unref(&bo); 330 return r; 331 } 332 333 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 334 struct dma_fence **fence) 335 { 336 struct amdgpu_device *adev = ring->adev; 337 struct amdgpu_bo *bo = NULL; 338 uint32_t *msg; 339 int r, i; 340 341 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, 342 AMDGPU_GEM_DOMAIN_VRAM, 343 &bo, NULL, (void **)&msg); 344 if (r) 345 return r; 346 347 msg[0] = cpu_to_le32(0x00000028); 348 msg[1] = cpu_to_le32(0x00000038); 349 msg[2] = cpu_to_le32(0x00000001); 350 msg[3] = cpu_to_le32(0x00000000); 351 msg[4] = cpu_to_le32(handle); 352 msg[5] = cpu_to_le32(0x00000000); 353 msg[6] = cpu_to_le32(0x00000001); 354 msg[7] = cpu_to_le32(0x00000028); 355 msg[8] = cpu_to_le32(0x00000010); 356 msg[9] = cpu_to_le32(0x00000000); 357 msg[10] = cpu_to_le32(0x00000007); 358 msg[11] = cpu_to_le32(0x00000000); 359 msg[12] = cpu_to_le32(0x00000780); 360 msg[13] = cpu_to_le32(0x00000440); 361 for (i = 14; i < 1024; ++i) 362 msg[i] = cpu_to_le32(0x0); 363 364 return amdgpu_vcn_dec_send_msg(ring, bo, fence); 365 } 366 367 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 368 struct dma_fence **fence) 369 { 370 struct amdgpu_device *adev = ring->adev; 371 struct amdgpu_bo *bo = NULL; 372 uint32_t *msg; 373 int r, i; 374 375 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, 376 AMDGPU_GEM_DOMAIN_VRAM, 377 &bo, NULL, (void **)&msg); 378 if (r) 379 return r; 380 381 msg[0] = cpu_to_le32(0x00000028); 382 msg[1] = cpu_to_le32(0x00000018); 383 msg[2] = cpu_to_le32(0x00000000); 384 msg[3] = cpu_to_le32(0x00000002); 385 msg[4] = cpu_to_le32(handle); 386 msg[5] = cpu_to_le32(0x00000000); 387 for (i = 6; i < 1024; ++i) 388 msg[i] = cpu_to_le32(0x0); 389 390 return amdgpu_vcn_dec_send_msg(ring, bo, fence); 391 } 392 393 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) 394 { 395 struct dma_fence *fence; 396 long r; 397 398 r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL); 399 if (r) { 400 DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); 401 goto error; 402 } 403 404 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence); 405 if (r) { 406 DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); 407 goto error; 408 } 409 410 r = dma_fence_wait_timeout(fence, false, timeout); 411 if (r == 0) { 412 DRM_ERROR("amdgpu: IB test timed out.\n"); 413 r = -ETIMEDOUT; 414 } else if (r < 0) { 415 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 416 } else { 417 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 418 r = 0; 419 } 420 421 dma_fence_put(fence); 422 423 error: 424 return r; 425 } 426 427 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) 428 { 429 struct amdgpu_device *adev = ring->adev; 430 uint32_t rptr = amdgpu_ring_get_rptr(ring); 431 unsigned i; 432 int r; 433 434 r = amdgpu_ring_alloc(ring, 16); 435 if (r) { 436 DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n", 437 ring->idx, r); 438 return r; 439 } 440 amdgpu_ring_write(ring, VCN_ENC_CMD_END); 441 amdgpu_ring_commit(ring); 442 443 for (i = 0; i < adev->usec_timeout; i++) { 444 if (amdgpu_ring_get_rptr(ring) != rptr) 445 break; 446 DRM_UDELAY(1); 447 } 448 449 if (i < adev->usec_timeout) { 450 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 451 ring->idx, i); 452 } else { 453 DRM_ERROR("amdgpu: ring %d test failed\n", 454 ring->idx); 455 r = -ETIMEDOUT; 456 } 457 458 return r; 459 } 460 461 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 462 struct dma_fence **fence) 463 { 464 const unsigned ib_size_dw = 16; 465 struct amdgpu_job *job; 466 struct amdgpu_ib *ib; 467 struct dma_fence *f = NULL; 468 uint64_t dummy; 469 int i, r; 470 471 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); 472 if (r) 473 return r; 474 475 ib = &job->ibs[0]; 476 dummy = ib->gpu_addr + 1024; 477 478 ib->length_dw = 0; 479 ib->ptr[ib->length_dw++] = 0x00000018; 480 ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ 481 ib->ptr[ib->length_dw++] = handle; 482 ib->ptr[ib->length_dw++] = upper_32_bits(dummy); 483 ib->ptr[ib->length_dw++] = dummy; 484 ib->ptr[ib->length_dw++] = 0x0000000b; 485 486 ib->ptr[ib->length_dw++] = 0x00000014; 487 ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ 488 ib->ptr[ib->length_dw++] = 0x0000001c; 489 ib->ptr[ib->length_dw++] = 0x00000000; 490 ib->ptr[ib->length_dw++] = 0x00000000; 491 492 ib->ptr[ib->length_dw++] = 0x00000008; 493 ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */ 494 495 for (i = ib->length_dw; i < ib_size_dw; ++i) 496 ib->ptr[i] = 0x0; 497 498 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); 499 job->fence = dma_fence_get(f); 500 if (r) 501 goto err; 502 503 amdgpu_job_free(job); 504 if (fence) 505 *fence = dma_fence_get(f); 506 dma_fence_put(f); 507 508 return 0; 509 510 err: 511 amdgpu_job_free(job); 512 return r; 513 } 514 515 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 516 struct dma_fence **fence) 517 { 518 const unsigned ib_size_dw = 16; 519 struct amdgpu_job *job; 520 struct amdgpu_ib *ib; 521 struct dma_fence *f = NULL; 522 uint64_t dummy; 523 int i, r; 524 525 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); 526 if (r) 527 return r; 528 529 ib = &job->ibs[0]; 530 dummy = ib->gpu_addr + 1024; 531 532 ib->length_dw = 0; 533 ib->ptr[ib->length_dw++] = 0x00000018; 534 ib->ptr[ib->length_dw++] = 0x00000001; 535 ib->ptr[ib->length_dw++] = handle; 536 ib->ptr[ib->length_dw++] = upper_32_bits(dummy); 537 ib->ptr[ib->length_dw++] = dummy; 538 ib->ptr[ib->length_dw++] = 0x0000000b; 539 540 ib->ptr[ib->length_dw++] = 0x00000014; 541 ib->ptr[ib->length_dw++] = 0x00000002; 542 ib->ptr[ib->length_dw++] = 0x0000001c; 543 ib->ptr[ib->length_dw++] = 0x00000000; 544 ib->ptr[ib->length_dw++] = 0x00000000; 545 546 ib->ptr[ib->length_dw++] = 0x00000008; 547 ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */ 548 549 for (i = ib->length_dw; i < ib_size_dw; ++i) 550 ib->ptr[i] = 0x0; 551 552 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); 553 job->fence = dma_fence_get(f); 554 if (r) 555 goto err; 556 557 amdgpu_job_free(job); 558 if (fence) 559 *fence = dma_fence_get(f); 560 dma_fence_put(f); 561 562 return 0; 563 564 err: 565 amdgpu_job_free(job); 566 return r; 567 } 568 569 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) 570 { 571 struct dma_fence *fence = NULL; 572 long r; 573 574 r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL); 575 if (r) { 576 DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); 577 goto error; 578 } 579 580 r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence); 581 if (r) { 582 DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); 583 goto error; 584 } 585 586 r = dma_fence_wait_timeout(fence, false, timeout); 587 if (r == 0) { 588 DRM_ERROR("amdgpu: IB test timed out.\n"); 589 r = -ETIMEDOUT; 590 } else if (r < 0) { 591 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 592 } else { 593 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 594 r = 0; 595 } 596 error: 597 dma_fence_put(fence); 598 return r; 599 } 600