1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <drm/drm_drv.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_vce.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "soc15_common.h" 35 #include "mmsch_v1_0.h" 36 37 #include "vce/vce_4_0_offset.h" 38 #include "vce/vce_4_0_default.h" 39 #include "vce/vce_4_0_sh_mask.h" 40 #include "mmhub/mmhub_1_0_offset.h" 41 #include "mmhub/mmhub_1_0_sh_mask.h" 42 43 #include "ivsrcid/vce/irqsrcs_vce_4_0.h" 44 45 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 46 47 #define VCE_V4_0_FW_SIZE (384 * 1024) 48 #define VCE_V4_0_STACK_SIZE (64 * 1024) 49 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024)) 50 51 static void vce_v4_0_mc_resume(struct amdgpu_device *adev); 52 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); 53 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); 54 55 /** 56 * vce_v4_0_ring_get_rptr - get read pointer 57 * 58 * @ring: amdgpu_ring pointer 59 * 60 * Returns the current hardware read pointer 61 */ 62 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring) 63 { 64 struct amdgpu_device *adev = ring->adev; 65 66 if (ring->me == 0) 67 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR)); 68 else if (ring->me == 1) 69 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2)); 70 else 71 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3)); 72 } 73 74 /** 75 * vce_v4_0_ring_get_wptr - get write pointer 76 * 77 * @ring: amdgpu_ring pointer 78 * 79 * Returns the current hardware write pointer 80 */ 81 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring) 82 { 83 struct amdgpu_device *adev = ring->adev; 84 85 if (ring->use_doorbell) 86 return *ring->wptr_cpu_addr; 87 88 if (ring->me == 0) 89 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR)); 90 else if (ring->me == 1) 91 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2)); 92 else 93 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3)); 94 } 95 96 /** 97 * vce_v4_0_ring_set_wptr - set write pointer 98 * 99 * @ring: amdgpu_ring pointer 100 * 101 * Commits the write pointer to the hardware 102 */ 103 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring) 104 { 105 struct amdgpu_device *adev = ring->adev; 106 107 if (ring->use_doorbell) { 108 /* XXX check if swapping is necessary on BE */ 109 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); 110 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 111 return; 112 } 113 114 if (ring->me == 0) 115 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), 116 lower_32_bits(ring->wptr)); 117 else if (ring->me == 1) 118 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), 119 lower_32_bits(ring->wptr)); 120 else 121 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), 122 lower_32_bits(ring->wptr)); 123 } 124 125 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev) 126 { 127 int i, j; 128 129 for (i = 0; i < 10; ++i) { 130 for (j = 0; j < 100; ++j) { 131 uint32_t status = 132 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS)); 133 134 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK) 135 return 0; 136 mdelay(10); 137 } 138 139 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); 140 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 141 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 142 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 143 mdelay(10); 144 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 145 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 146 mdelay(10); 147 148 } 149 150 return -ETIMEDOUT; 151 } 152 153 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, 154 struct amdgpu_mm_table *table) 155 { 156 uint32_t data = 0, loop; 157 uint64_t addr = table->gpu_addr; 158 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; 159 uint32_t size; 160 161 size = header->header_size + header->vce_table_size + header->uvd_table_size; 162 163 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ 164 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); 165 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); 166 167 /* 2, update vmid of descriptor */ 168 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); 169 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; 170 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ 171 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); 172 173 /* 3, notify mmsch about the size of this descriptor */ 174 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); 175 176 /* 4, set resp to zero */ 177 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); 178 179 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0); 180 *adev->vce.ring[0].wptr_cpu_addr = 0; 181 adev->vce.ring[0].wptr = 0; 182 adev->vce.ring[0].wptr_old = 0; 183 184 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ 185 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); 186 187 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 188 loop = 1000; 189 while ((data & 0x10000002) != 0x10000002) { 190 udelay(10); 191 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 192 loop--; 193 if (!loop) 194 break; 195 } 196 197 if (!loop) { 198 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); 199 return -EBUSY; 200 } 201 202 return 0; 203 } 204 205 static int vce_v4_0_sriov_start(struct amdgpu_device *adev) 206 { 207 struct amdgpu_ring *ring; 208 uint32_t offset, size; 209 uint32_t table_size = 0; 210 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; 211 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; 212 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; 213 struct mmsch_v1_0_cmd_end end = { { 0 } }; 214 uint32_t *init_table = adev->virt.mm_table.cpu_addr; 215 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; 216 217 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; 218 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 219 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; 220 end.cmd_header.command_type = MMSCH_COMMAND__END; 221 222 if (header->vce_table_offset == 0 && header->vce_table_size == 0) { 223 header->version = MMSCH_VERSION; 224 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; 225 226 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) 227 header->vce_table_offset = header->header_size; 228 else 229 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset; 230 231 init_table += header->vce_table_offset; 232 233 ring = &adev->vce.ring[0]; 234 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), 235 lower_32_bits(ring->gpu_addr)); 236 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), 237 upper_32_bits(ring->gpu_addr)); 238 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), 239 ring->ring_size / 4); 240 241 /* BEGING OF MC_RESUME */ 242 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); 243 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); 244 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 245 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 246 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 247 248 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 249 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 250 uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo; 251 uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi; 252 uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low; 253 254 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 255 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8); 256 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 257 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 258 (tmr_mc_addr >> 40) & 0xff); 259 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0); 260 } else { 261 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 262 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 263 adev->vce.gpu_addr >> 8); 264 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 265 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 266 (adev->vce.gpu_addr >> 40) & 0xff); 267 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 268 offset & ~0x0f000000); 269 270 } 271 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 272 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), 273 adev->vce.gpu_addr >> 8); 274 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 275 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), 276 (adev->vce.gpu_addr >> 40) & 0xff); 277 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 278 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), 279 adev->vce.gpu_addr >> 8); 280 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 281 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), 282 (adev->vce.gpu_addr >> 40) & 0xff); 283 284 size = VCE_V4_0_FW_SIZE; 285 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 286 287 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 288 size = VCE_V4_0_STACK_SIZE; 289 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), 290 (offset & ~0x0f000000) | (1 << 24)); 291 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 292 293 offset += size; 294 size = VCE_V4_0_DATA_SIZE; 295 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), 296 (offset & ~0x0f000000) | (2 << 24)); 297 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 298 299 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); 300 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 301 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 302 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 303 304 /* end of MC_RESUME */ 305 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 306 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK); 307 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 308 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); 309 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 310 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); 311 312 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 313 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, 314 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); 315 316 /* clear BUSY flag */ 317 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 318 ~VCE_STATUS__JOB_BUSY_MASK, 0); 319 320 /* add end packet */ 321 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); 322 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; 323 header->vce_table_size = table_size; 324 } 325 326 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); 327 } 328 329 /** 330 * vce_v4_0_start - start VCE block 331 * 332 * @adev: amdgpu_device pointer 333 * 334 * Setup and start the VCE block 335 */ 336 static int vce_v4_0_start(struct amdgpu_device *adev) 337 { 338 struct amdgpu_ring *ring; 339 int r; 340 341 ring = &adev->vce.ring[0]; 342 343 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr)); 344 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr)); 345 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr); 346 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 347 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); 348 349 ring = &adev->vce.ring[1]; 350 351 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr)); 352 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr)); 353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr); 354 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); 355 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4); 356 357 ring = &adev->vce.ring[2]; 358 359 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr)); 360 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr)); 361 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr); 362 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr)); 363 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4); 364 365 vce_v4_0_mc_resume(adev); 366 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK, 367 ~VCE_STATUS__JOB_BUSY_MASK); 368 369 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001); 370 371 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 372 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 373 mdelay(100); 374 375 r = vce_v4_0_firmware_loaded(adev); 376 377 /* clear BUSY flag */ 378 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 379 380 if (r) { 381 DRM_ERROR("VCE not responding, giving up!!!\n"); 382 return r; 383 } 384 385 return 0; 386 } 387 388 static int vce_v4_0_stop(struct amdgpu_device *adev) 389 { 390 391 /* Disable VCPU */ 392 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); 393 394 /* hold on ECPU */ 395 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 396 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 397 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 398 399 /* clear VCE_STATUS */ 400 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0); 401 402 /* Set Clock-Gating off */ 403 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) 404 vce_v4_0_set_vce_sw_clock_gating(adev, false); 405 */ 406 407 return 0; 408 } 409 410 static int vce_v4_0_early_init(struct amdgpu_ip_block *ip_block) 411 { 412 struct amdgpu_device *adev = ip_block->adev; 413 int r; 414 415 r = amdgpu_vce_early_init(adev); 416 if (r) 417 return r; 418 419 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */ 420 adev->vce.num_rings = 1; 421 else 422 adev->vce.num_rings = 3; 423 424 vce_v4_0_set_ring_funcs(adev); 425 vce_v4_0_set_irq_funcs(adev); 426 427 return 0; 428 } 429 430 static int vce_v4_0_sw_init(struct amdgpu_ip_block *ip_block) 431 { 432 struct amdgpu_device *adev = ip_block->adev; 433 struct amdgpu_ring *ring; 434 435 unsigned size; 436 int r, i; 437 438 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq); 439 if (r) 440 return r; 441 442 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE; 443 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 444 size += VCE_V4_0_FW_SIZE; 445 446 r = amdgpu_vce_sw_init(adev, size); 447 if (r) 448 return r; 449 450 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 451 const struct common_firmware_header *hdr; 452 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 453 454 adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL); 455 if (!adev->vce.saved_bo) 456 return -ENOMEM; 457 458 hdr = (const struct common_firmware_header *)adev->vce.fw->data; 459 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; 460 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; 461 adev->firmware.fw_size += 462 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 463 DRM_INFO("PSP loading VCE firmware\n"); 464 } else { 465 r = amdgpu_vce_resume(adev); 466 if (r) 467 return r; 468 } 469 470 for (i = 0; i < adev->vce.num_rings; i++) { 471 enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i); 472 473 ring = &adev->vce.ring[i]; 474 ring->vm_hub = AMDGPU_MMHUB0(0); 475 sprintf(ring->name, "vce%d", i); 476 if (amdgpu_sriov_vf(adev)) { 477 /* DOORBELL only works under SRIOV */ 478 ring->use_doorbell = true; 479 480 /* currently only use the first encoding ring for sriov, 481 * so set unused location for other unused rings. 482 */ 483 if (i == 0) 484 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2; 485 else 486 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1; 487 } 488 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0, 489 hw_prio, NULL); 490 if (r) 491 return r; 492 } 493 494 r = amdgpu_virt_alloc_mm_table(adev); 495 if (r) 496 return r; 497 498 return r; 499 } 500 501 static int vce_v4_0_sw_fini(struct amdgpu_ip_block *ip_block) 502 { 503 int r; 504 struct amdgpu_device *adev = ip_block->adev; 505 506 /* free MM table */ 507 amdgpu_virt_free_mm_table(adev); 508 509 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 510 kvfree(adev->vce.saved_bo); 511 adev->vce.saved_bo = NULL; 512 } 513 514 r = amdgpu_vce_suspend(adev); 515 if (r) 516 return r; 517 518 return amdgpu_vce_sw_fini(adev); 519 } 520 521 static int vce_v4_0_hw_init(struct amdgpu_ip_block *ip_block) 522 { 523 int r, i; 524 struct amdgpu_device *adev = ip_block->adev; 525 526 if (amdgpu_sriov_vf(adev)) 527 r = vce_v4_0_sriov_start(adev); 528 else 529 r = vce_v4_0_start(adev); 530 if (r) 531 return r; 532 533 for (i = 0; i < adev->vce.num_rings; i++) { 534 r = amdgpu_ring_test_helper(&adev->vce.ring[i]); 535 if (r) 536 return r; 537 } 538 539 DRM_INFO("VCE initialized successfully.\n"); 540 541 return 0; 542 } 543 544 static int vce_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) 545 { 546 struct amdgpu_device *adev = ip_block->adev; 547 548 cancel_delayed_work_sync(&adev->vce.idle_work); 549 550 if (!amdgpu_sriov_vf(adev)) { 551 /* vce_v4_0_wait_for_idle(ip_block); */ 552 vce_v4_0_stop(adev); 553 } else { 554 /* full access mode, so don't touch any VCE register */ 555 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); 556 } 557 558 return 0; 559 } 560 561 static int vce_v4_0_suspend(struct amdgpu_ip_block *ip_block) 562 { 563 struct amdgpu_device *adev = ip_block->adev; 564 int r, idx; 565 566 if (adev->vce.vcpu_bo == NULL) 567 return 0; 568 569 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 570 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 571 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 572 void *ptr = adev->vce.cpu_addr; 573 574 memcpy_fromio(adev->vce.saved_bo, ptr, size); 575 } 576 drm_dev_exit(idx); 577 } 578 579 /* 580 * Proper cleanups before halting the HW engine: 581 * - cancel the delayed idle work 582 * - enable powergating 583 * - enable clockgating 584 * - disable dpm 585 * 586 * TODO: to align with the VCN implementation, move the 587 * jobs for clockgating/powergating/dpm setting to 588 * ->set_powergating_state(). 589 */ 590 cancel_delayed_work_sync(&adev->vce.idle_work); 591 592 if (adev->pm.dpm_enabled) { 593 amdgpu_dpm_enable_vce(adev, false); 594 } else { 595 amdgpu_asic_set_vce_clocks(adev, 0, 0); 596 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, 597 AMD_PG_STATE_GATE); 598 amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, 599 AMD_CG_STATE_GATE); 600 } 601 602 r = vce_v4_0_hw_fini(ip_block); 603 if (r) 604 return r; 605 606 return amdgpu_vce_suspend(adev); 607 } 608 609 static int vce_v4_0_resume(struct amdgpu_ip_block *ip_block) 610 { 611 struct amdgpu_device *adev = ip_block->adev; 612 int r, idx; 613 614 if (adev->vce.vcpu_bo == NULL) 615 return -EINVAL; 616 617 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 618 619 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 620 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 621 void *ptr = adev->vce.cpu_addr; 622 623 memcpy_toio(ptr, adev->vce.saved_bo, size); 624 drm_dev_exit(idx); 625 } 626 } else { 627 r = amdgpu_vce_resume(adev); 628 if (r) 629 return r; 630 } 631 632 return vce_v4_0_hw_init(ip_block); 633 } 634 635 static void vce_v4_0_mc_resume(struct amdgpu_device *adev) 636 { 637 uint32_t offset, size; 638 uint64_t tmr_mc_addr; 639 640 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16)); 641 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000); 642 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F); 643 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); 644 645 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000); 646 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1); 647 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 648 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 649 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 650 651 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 652 653 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 654 tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 | 655 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo; 656 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 657 (tmr_mc_addr >> 8)); 658 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 659 (tmr_mc_addr >> 40) & 0xff); 660 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0); 661 } else { 662 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 663 (adev->vce.gpu_addr >> 8)); 664 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 665 (adev->vce.gpu_addr >> 40) & 0xff); 666 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); 667 } 668 669 size = VCE_V4_0_FW_SIZE; 670 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 671 672 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8)); 673 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff); 674 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 675 size = VCE_V4_0_STACK_SIZE; 676 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24)); 677 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 678 679 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8)); 680 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff); 681 offset += size; 682 size = VCE_V4_0_DATA_SIZE; 683 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24)); 684 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 685 686 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100); 687 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 688 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 689 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 690 } 691 692 static int vce_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, 693 enum amd_clockgating_state state) 694 { 695 /* needed for driver unload*/ 696 return 0; 697 } 698 699 static int vce_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block, 700 enum amd_powergating_state state) 701 { 702 /* This doesn't actually powergate the VCE block. 703 * That's done in the dpm code via the SMC. This 704 * just re-inits the block as necessary. The actual 705 * gating still happens in the dpm code. We should 706 * revisit this when there is a cleaner line between 707 * the smc and the hw blocks 708 */ 709 struct amdgpu_device *adev = ip_block->adev; 710 711 if (state == AMD_PG_STATE_GATE) 712 return vce_v4_0_stop(adev); 713 else 714 return vce_v4_0_start(adev); 715 } 716 717 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, 718 struct amdgpu_ib *ib, uint32_t flags) 719 { 720 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 721 722 amdgpu_ring_write(ring, VCE_CMD_IB_VM); 723 amdgpu_ring_write(ring, vmid); 724 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 725 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 726 amdgpu_ring_write(ring, ib->length_dw); 727 } 728 729 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 730 u64 seq, unsigned flags) 731 { 732 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 733 734 amdgpu_ring_write(ring, VCE_CMD_FENCE); 735 amdgpu_ring_write(ring, addr); 736 amdgpu_ring_write(ring, upper_32_bits(addr)); 737 amdgpu_ring_write(ring, seq); 738 amdgpu_ring_write(ring, VCE_CMD_TRAP); 739 } 740 741 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) 742 { 743 amdgpu_ring_write(ring, VCE_CMD_END); 744 } 745 746 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 747 uint32_t val, uint32_t mask) 748 { 749 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 750 amdgpu_ring_write(ring, reg << 2); 751 amdgpu_ring_write(ring, mask); 752 amdgpu_ring_write(ring, val); 753 } 754 755 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, 756 unsigned int vmid, uint64_t pd_addr) 757 { 758 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; 759 760 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 761 762 /* wait for reg writes */ 763 vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + 764 vmid * hub->ctx_addr_distance, 765 lower_32_bits(pd_addr), 0xffffffff); 766 } 767 768 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring, 769 uint32_t reg, uint32_t val) 770 { 771 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 772 amdgpu_ring_write(ring, reg << 2); 773 amdgpu_ring_write(ring, val); 774 } 775 776 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, 777 struct amdgpu_irq_src *source, 778 unsigned type, 779 enum amdgpu_interrupt_state state) 780 { 781 uint32_t val = 0; 782 783 if (!amdgpu_sriov_vf(adev)) { 784 if (state == AMDGPU_IRQ_STATE_ENABLE) 785 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; 786 787 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, 788 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 789 } 790 return 0; 791 } 792 793 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev, 794 struct amdgpu_irq_src *source, 795 struct amdgpu_iv_entry *entry) 796 { 797 DRM_DEBUG("IH: VCE\n"); 798 799 switch (entry->src_data[0]) { 800 case 0: 801 case 1: 802 case 2: 803 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); 804 break; 805 default: 806 DRM_ERROR("Unhandled interrupt: %d %d\n", 807 entry->src_id, entry->src_data[0]); 808 break; 809 } 810 811 return 0; 812 } 813 814 const struct amd_ip_funcs vce_v4_0_ip_funcs = { 815 .name = "vce_v4_0", 816 .early_init = vce_v4_0_early_init, 817 .sw_init = vce_v4_0_sw_init, 818 .sw_fini = vce_v4_0_sw_fini, 819 .hw_init = vce_v4_0_hw_init, 820 .hw_fini = vce_v4_0_hw_fini, 821 .suspend = vce_v4_0_suspend, 822 .resume = vce_v4_0_resume, 823 .set_clockgating_state = vce_v4_0_set_clockgating_state, 824 .set_powergating_state = vce_v4_0_set_powergating_state, 825 }; 826 827 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { 828 .type = AMDGPU_RING_TYPE_VCE, 829 .align_mask = 0x3f, 830 .nop = VCE_CMD_NO_OP, 831 .support_64bit_ptrs = false, 832 .no_user_fence = true, 833 .get_rptr = vce_v4_0_ring_get_rptr, 834 .get_wptr = vce_v4_0_ring_get_wptr, 835 .set_wptr = vce_v4_0_ring_set_wptr, 836 .patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm, 837 .emit_frame_size = 838 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 839 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + 840 4 + /* vce_v4_0_emit_vm_flush */ 841 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 842 1, /* vce_v4_0_ring_insert_end */ 843 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ 844 .emit_ib = vce_v4_0_ring_emit_ib, 845 .emit_vm_flush = vce_v4_0_emit_vm_flush, 846 .emit_fence = vce_v4_0_ring_emit_fence, 847 .test_ring = amdgpu_vce_ring_test_ring, 848 .test_ib = amdgpu_vce_ring_test_ib, 849 .insert_nop = amdgpu_ring_insert_nop, 850 .insert_end = vce_v4_0_ring_insert_end, 851 .pad_ib = amdgpu_ring_generic_pad_ib, 852 .begin_use = amdgpu_vce_ring_begin_use, 853 .end_use = amdgpu_vce_ring_end_use, 854 .emit_wreg = vce_v4_0_emit_wreg, 855 .emit_reg_wait = vce_v4_0_emit_reg_wait, 856 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 857 }; 858 859 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) 860 { 861 int i; 862 863 for (i = 0; i < adev->vce.num_rings; i++) { 864 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; 865 adev->vce.ring[i].me = i; 866 } 867 DRM_INFO("VCE enabled in VM mode\n"); 868 } 869 870 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = { 871 .set = vce_v4_0_set_interrupt_state, 872 .process = vce_v4_0_process_interrupt, 873 }; 874 875 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev) 876 { 877 adev->vce.irq.num_types = 1; 878 adev->vce.irq.funcs = &vce_v4_0_irq_funcs; 879 }; 880 881 const struct amdgpu_ip_block_version vce_v4_0_ip_block = 882 { 883 .type = AMD_IP_BLOCK_TYPE_VCE, 884 .major = 4, 885 .minor = 0, 886 .rev = 0, 887 .funcs = &vce_v4_0_ip_funcs, 888 }; 889