1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <drm/drm_drv.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_vce.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "soc15_common.h" 35 #include "mmsch_v1_0.h" 36 37 #include "vce/vce_4_0_offset.h" 38 #include "vce/vce_4_0_default.h" 39 #include "vce/vce_4_0_sh_mask.h" 40 #include "mmhub/mmhub_1_0_offset.h" 41 #include "mmhub/mmhub_1_0_sh_mask.h" 42 43 #include "ivsrcid/vce/irqsrcs_vce_4_0.h" 44 45 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 46 47 #define VCE_V4_0_FW_SIZE (384 * 1024) 48 #define VCE_V4_0_STACK_SIZE (64 * 1024) 49 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024)) 50 51 static void vce_v4_0_mc_resume(struct amdgpu_device *adev); 52 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); 53 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); 54 55 /** 56 * vce_v4_0_ring_get_rptr - get read pointer 57 * 58 * @ring: amdgpu_ring pointer 59 * 60 * Returns the current hardware read pointer 61 */ 62 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring) 63 { 64 struct amdgpu_device *adev = ring->adev; 65 66 if (ring->me == 0) 67 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR)); 68 else if (ring->me == 1) 69 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2)); 70 else 71 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3)); 72 } 73 74 /** 75 * vce_v4_0_ring_get_wptr - get write pointer 76 * 77 * @ring: amdgpu_ring pointer 78 * 79 * Returns the current hardware write pointer 80 */ 81 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring) 82 { 83 struct amdgpu_device *adev = ring->adev; 84 85 if (ring->use_doorbell) 86 return *ring->wptr_cpu_addr; 87 88 if (ring->me == 0) 89 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR)); 90 else if (ring->me == 1) 91 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2)); 92 else 93 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3)); 94 } 95 96 /** 97 * vce_v4_0_ring_set_wptr - set write pointer 98 * 99 * @ring: amdgpu_ring pointer 100 * 101 * Commits the write pointer to the hardware 102 */ 103 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring) 104 { 105 struct amdgpu_device *adev = ring->adev; 106 107 if (ring->use_doorbell) { 108 /* XXX check if swapping is necessary on BE */ 109 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); 110 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 111 return; 112 } 113 114 if (ring->me == 0) 115 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), 116 lower_32_bits(ring->wptr)); 117 else if (ring->me == 1) 118 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), 119 lower_32_bits(ring->wptr)); 120 else 121 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), 122 lower_32_bits(ring->wptr)); 123 } 124 125 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev) 126 { 127 int i, j; 128 129 for (i = 0; i < 10; ++i) { 130 for (j = 0; j < 100; ++j) { 131 uint32_t status = 132 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS)); 133 134 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK) 135 return 0; 136 mdelay(10); 137 } 138 139 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); 140 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 141 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 142 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 143 mdelay(10); 144 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 145 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 146 mdelay(10); 147 148 } 149 150 return -ETIMEDOUT; 151 } 152 153 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, 154 struct amdgpu_mm_table *table) 155 { 156 uint32_t data = 0, loop; 157 uint64_t addr = table->gpu_addr; 158 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; 159 uint32_t size; 160 161 size = header->header_size + header->vce_table_size + header->uvd_table_size; 162 163 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ 164 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); 165 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); 166 167 /* 2, update vmid of descriptor */ 168 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); 169 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; 170 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ 171 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); 172 173 /* 3, notify mmsch about the size of this descriptor */ 174 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); 175 176 /* 4, set resp to zero */ 177 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); 178 179 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0); 180 *adev->vce.ring[0].wptr_cpu_addr = 0; 181 adev->vce.ring[0].wptr = 0; 182 adev->vce.ring[0].wptr_old = 0; 183 184 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ 185 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); 186 187 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 188 loop = 1000; 189 while ((data & 0x10000002) != 0x10000002) { 190 udelay(10); 191 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 192 loop--; 193 if (!loop) 194 break; 195 } 196 197 if (!loop) { 198 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); 199 return -EBUSY; 200 } 201 202 return 0; 203 } 204 205 static int vce_v4_0_sriov_start(struct amdgpu_device *adev) 206 { 207 struct amdgpu_ring *ring; 208 uint32_t offset, size; 209 uint32_t table_size = 0; 210 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; 211 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; 212 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; 213 struct mmsch_v1_0_cmd_end end = { { 0 } }; 214 uint32_t *init_table = adev->virt.mm_table.cpu_addr; 215 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; 216 217 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; 218 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 219 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; 220 end.cmd_header.command_type = MMSCH_COMMAND__END; 221 222 if (header->vce_table_offset == 0 && header->vce_table_size == 0) { 223 header->version = MMSCH_VERSION; 224 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; 225 226 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) 227 header->vce_table_offset = header->header_size; 228 else 229 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset; 230 231 init_table += header->vce_table_offset; 232 233 ring = &adev->vce.ring[0]; 234 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), 235 lower_32_bits(ring->gpu_addr)); 236 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), 237 upper_32_bits(ring->gpu_addr)); 238 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), 239 ring->ring_size / 4); 240 241 /* BEGING OF MC_RESUME */ 242 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); 243 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); 244 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 245 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 246 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 247 248 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 249 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 250 uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo; 251 uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi; 252 uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low; 253 254 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 255 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8); 256 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 257 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 258 (tmr_mc_addr >> 40) & 0xff); 259 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0); 260 } else { 261 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 262 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 263 adev->vce.gpu_addr >> 8); 264 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 265 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 266 (adev->vce.gpu_addr >> 40) & 0xff); 267 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 268 offset & ~0x0f000000); 269 270 } 271 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 272 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), 273 adev->vce.gpu_addr >> 8); 274 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 275 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), 276 (adev->vce.gpu_addr >> 40) & 0xff); 277 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 278 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), 279 adev->vce.gpu_addr >> 8); 280 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 281 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), 282 (adev->vce.gpu_addr >> 40) & 0xff); 283 284 size = VCE_V4_0_FW_SIZE; 285 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 286 287 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 288 size = VCE_V4_0_STACK_SIZE; 289 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), 290 (offset & ~0x0f000000) | (1 << 24)); 291 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 292 293 offset += size; 294 size = VCE_V4_0_DATA_SIZE; 295 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), 296 (offset & ~0x0f000000) | (2 << 24)); 297 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 298 299 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); 300 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 301 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 302 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 303 304 /* end of MC_RESUME */ 305 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 306 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK); 307 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 308 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); 309 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 310 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); 311 312 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 313 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, 314 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); 315 316 /* clear BUSY flag */ 317 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 318 ~VCE_STATUS__JOB_BUSY_MASK, 0); 319 320 /* add end packet */ 321 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); 322 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; 323 header->vce_table_size = table_size; 324 } 325 326 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); 327 } 328 329 /** 330 * vce_v4_0_start - start VCE block 331 * 332 * @adev: amdgpu_device pointer 333 * 334 * Setup and start the VCE block 335 */ 336 static int vce_v4_0_start(struct amdgpu_device *adev) 337 { 338 struct amdgpu_ring *ring; 339 int r; 340 341 ring = &adev->vce.ring[0]; 342 343 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr)); 344 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr)); 345 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr); 346 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 347 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); 348 349 ring = &adev->vce.ring[1]; 350 351 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr)); 352 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr)); 353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr); 354 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); 355 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4); 356 357 ring = &adev->vce.ring[2]; 358 359 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr)); 360 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr)); 361 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr); 362 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr)); 363 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4); 364 365 vce_v4_0_mc_resume(adev); 366 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK, 367 ~VCE_STATUS__JOB_BUSY_MASK); 368 369 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001); 370 371 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 372 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 373 mdelay(100); 374 375 r = vce_v4_0_firmware_loaded(adev); 376 377 /* clear BUSY flag */ 378 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 379 380 if (r) { 381 DRM_ERROR("VCE not responding, giving up!!!\n"); 382 return r; 383 } 384 385 return 0; 386 } 387 388 static int vce_v4_0_stop(struct amdgpu_device *adev) 389 { 390 391 /* Disable VCPU */ 392 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); 393 394 /* hold on ECPU */ 395 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 396 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 397 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 398 399 /* clear VCE_STATUS */ 400 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0); 401 402 /* Set Clock-Gating off */ 403 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) 404 vce_v4_0_set_vce_sw_clock_gating(adev, false); 405 */ 406 407 return 0; 408 } 409 410 static int vce_v4_0_early_init(struct amdgpu_ip_block *ip_block) 411 { 412 struct amdgpu_device *adev = ip_block->adev; 413 414 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */ 415 adev->vce.num_rings = 1; 416 else 417 adev->vce.num_rings = 3; 418 419 vce_v4_0_set_ring_funcs(adev); 420 vce_v4_0_set_irq_funcs(adev); 421 422 return 0; 423 } 424 425 static int vce_v4_0_sw_init(struct amdgpu_ip_block *ip_block) 426 { 427 struct amdgpu_device *adev = ip_block->adev; 428 struct amdgpu_ring *ring; 429 430 unsigned size; 431 int r, i; 432 433 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq); 434 if (r) 435 return r; 436 437 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE; 438 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 439 size += VCE_V4_0_FW_SIZE; 440 441 r = amdgpu_vce_sw_init(adev, size); 442 if (r) 443 return r; 444 445 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 446 const struct common_firmware_header *hdr; 447 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 448 449 adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL); 450 if (!adev->vce.saved_bo) 451 return -ENOMEM; 452 453 hdr = (const struct common_firmware_header *)adev->vce.fw->data; 454 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; 455 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; 456 adev->firmware.fw_size += 457 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 458 DRM_INFO("PSP loading VCE firmware\n"); 459 } else { 460 r = amdgpu_vce_resume(adev); 461 if (r) 462 return r; 463 } 464 465 for (i = 0; i < adev->vce.num_rings; i++) { 466 enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i); 467 468 ring = &adev->vce.ring[i]; 469 ring->vm_hub = AMDGPU_MMHUB0(0); 470 sprintf(ring->name, "vce%d", i); 471 if (amdgpu_sriov_vf(adev)) { 472 /* DOORBELL only works under SRIOV */ 473 ring->use_doorbell = true; 474 475 /* currently only use the first encoding ring for sriov, 476 * so set unused location for other unused rings. 477 */ 478 if (i == 0) 479 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2; 480 else 481 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1; 482 } 483 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0, 484 hw_prio, NULL); 485 if (r) 486 return r; 487 } 488 489 r = amdgpu_virt_alloc_mm_table(adev); 490 if (r) 491 return r; 492 493 return r; 494 } 495 496 static int vce_v4_0_sw_fini(struct amdgpu_ip_block *ip_block) 497 { 498 int r; 499 struct amdgpu_device *adev = ip_block->adev; 500 501 /* free MM table */ 502 amdgpu_virt_free_mm_table(adev); 503 504 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 505 kvfree(adev->vce.saved_bo); 506 adev->vce.saved_bo = NULL; 507 } 508 509 r = amdgpu_vce_suspend(adev); 510 if (r) 511 return r; 512 513 return amdgpu_vce_sw_fini(adev); 514 } 515 516 static int vce_v4_0_hw_init(struct amdgpu_ip_block *ip_block) 517 { 518 int r, i; 519 struct amdgpu_device *adev = ip_block->adev; 520 521 if (amdgpu_sriov_vf(adev)) 522 r = vce_v4_0_sriov_start(adev); 523 else 524 r = vce_v4_0_start(adev); 525 if (r) 526 return r; 527 528 for (i = 0; i < adev->vce.num_rings; i++) { 529 r = amdgpu_ring_test_helper(&adev->vce.ring[i]); 530 if (r) 531 return r; 532 } 533 534 DRM_INFO("VCE initialized successfully.\n"); 535 536 return 0; 537 } 538 539 static int vce_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) 540 { 541 struct amdgpu_device *adev = ip_block->adev; 542 543 cancel_delayed_work_sync(&adev->vce.idle_work); 544 545 if (!amdgpu_sriov_vf(adev)) { 546 /* vce_v4_0_wait_for_idle(ip_block); */ 547 vce_v4_0_stop(adev); 548 } else { 549 /* full access mode, so don't touch any VCE register */ 550 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); 551 } 552 553 return 0; 554 } 555 556 static int vce_v4_0_suspend(struct amdgpu_ip_block *ip_block) 557 { 558 struct amdgpu_device *adev = ip_block->adev; 559 int r, idx; 560 561 if (adev->vce.vcpu_bo == NULL) 562 return 0; 563 564 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 565 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 566 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 567 void *ptr = adev->vce.cpu_addr; 568 569 memcpy_fromio(adev->vce.saved_bo, ptr, size); 570 } 571 drm_dev_exit(idx); 572 } 573 574 /* 575 * Proper cleanups before halting the HW engine: 576 * - cancel the delayed idle work 577 * - enable powergating 578 * - enable clockgating 579 * - disable dpm 580 * 581 * TODO: to align with the VCN implementation, move the 582 * jobs for clockgating/powergating/dpm setting to 583 * ->set_powergating_state(). 584 */ 585 cancel_delayed_work_sync(&adev->vce.idle_work); 586 587 if (adev->pm.dpm_enabled) { 588 amdgpu_dpm_enable_vce(adev, false); 589 } else { 590 amdgpu_asic_set_vce_clocks(adev, 0, 0); 591 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, 592 AMD_PG_STATE_GATE); 593 amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, 594 AMD_CG_STATE_GATE); 595 } 596 597 r = vce_v4_0_hw_fini(ip_block); 598 if (r) 599 return r; 600 601 return amdgpu_vce_suspend(adev); 602 } 603 604 static int vce_v4_0_resume(struct amdgpu_ip_block *ip_block) 605 { 606 struct amdgpu_device *adev = ip_block->adev; 607 int r, idx; 608 609 if (adev->vce.vcpu_bo == NULL) 610 return -EINVAL; 611 612 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 613 614 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 615 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 616 void *ptr = adev->vce.cpu_addr; 617 618 memcpy_toio(ptr, adev->vce.saved_bo, size); 619 drm_dev_exit(idx); 620 } 621 } else { 622 r = amdgpu_vce_resume(adev); 623 if (r) 624 return r; 625 } 626 627 return vce_v4_0_hw_init(ip_block); 628 } 629 630 static void vce_v4_0_mc_resume(struct amdgpu_device *adev) 631 { 632 uint32_t offset, size; 633 uint64_t tmr_mc_addr; 634 635 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16)); 636 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000); 637 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F); 638 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); 639 640 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000); 641 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1); 642 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 643 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 644 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 645 646 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 647 648 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 649 tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 | 650 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo; 651 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 652 (tmr_mc_addr >> 8)); 653 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 654 (tmr_mc_addr >> 40) & 0xff); 655 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0); 656 } else { 657 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 658 (adev->vce.gpu_addr >> 8)); 659 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 660 (adev->vce.gpu_addr >> 40) & 0xff); 661 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); 662 } 663 664 size = VCE_V4_0_FW_SIZE; 665 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 666 667 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8)); 668 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff); 669 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 670 size = VCE_V4_0_STACK_SIZE; 671 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24)); 672 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 673 674 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8)); 675 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff); 676 offset += size; 677 size = VCE_V4_0_DATA_SIZE; 678 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24)); 679 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 680 681 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100); 682 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 683 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 684 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 685 } 686 687 static int vce_v4_0_set_clockgating_state(void *handle, 688 enum amd_clockgating_state state) 689 { 690 /* needed for driver unload*/ 691 return 0; 692 } 693 694 static int vce_v4_0_set_powergating_state(void *handle, 695 enum amd_powergating_state state) 696 { 697 /* This doesn't actually powergate the VCE block. 698 * That's done in the dpm code via the SMC. This 699 * just re-inits the block as necessary. The actual 700 * gating still happens in the dpm code. We should 701 * revisit this when there is a cleaner line between 702 * the smc and the hw blocks 703 */ 704 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 705 706 if (state == AMD_PG_STATE_GATE) 707 return vce_v4_0_stop(adev); 708 else 709 return vce_v4_0_start(adev); 710 } 711 712 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, 713 struct amdgpu_ib *ib, uint32_t flags) 714 { 715 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 716 717 amdgpu_ring_write(ring, VCE_CMD_IB_VM); 718 amdgpu_ring_write(ring, vmid); 719 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 720 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 721 amdgpu_ring_write(ring, ib->length_dw); 722 } 723 724 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 725 u64 seq, unsigned flags) 726 { 727 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 728 729 amdgpu_ring_write(ring, VCE_CMD_FENCE); 730 amdgpu_ring_write(ring, addr); 731 amdgpu_ring_write(ring, upper_32_bits(addr)); 732 amdgpu_ring_write(ring, seq); 733 amdgpu_ring_write(ring, VCE_CMD_TRAP); 734 } 735 736 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) 737 { 738 amdgpu_ring_write(ring, VCE_CMD_END); 739 } 740 741 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 742 uint32_t val, uint32_t mask) 743 { 744 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 745 amdgpu_ring_write(ring, reg << 2); 746 amdgpu_ring_write(ring, mask); 747 amdgpu_ring_write(ring, val); 748 } 749 750 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, 751 unsigned int vmid, uint64_t pd_addr) 752 { 753 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; 754 755 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 756 757 /* wait for reg writes */ 758 vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + 759 vmid * hub->ctx_addr_distance, 760 lower_32_bits(pd_addr), 0xffffffff); 761 } 762 763 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring, 764 uint32_t reg, uint32_t val) 765 { 766 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 767 amdgpu_ring_write(ring, reg << 2); 768 amdgpu_ring_write(ring, val); 769 } 770 771 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, 772 struct amdgpu_irq_src *source, 773 unsigned type, 774 enum amdgpu_interrupt_state state) 775 { 776 uint32_t val = 0; 777 778 if (!amdgpu_sriov_vf(adev)) { 779 if (state == AMDGPU_IRQ_STATE_ENABLE) 780 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; 781 782 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, 783 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 784 } 785 return 0; 786 } 787 788 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev, 789 struct amdgpu_irq_src *source, 790 struct amdgpu_iv_entry *entry) 791 { 792 DRM_DEBUG("IH: VCE\n"); 793 794 switch (entry->src_data[0]) { 795 case 0: 796 case 1: 797 case 2: 798 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); 799 break; 800 default: 801 DRM_ERROR("Unhandled interrupt: %d %d\n", 802 entry->src_id, entry->src_data[0]); 803 break; 804 } 805 806 return 0; 807 } 808 809 const struct amd_ip_funcs vce_v4_0_ip_funcs = { 810 .name = "vce_v4_0", 811 .early_init = vce_v4_0_early_init, 812 .sw_init = vce_v4_0_sw_init, 813 .sw_fini = vce_v4_0_sw_fini, 814 .hw_init = vce_v4_0_hw_init, 815 .hw_fini = vce_v4_0_hw_fini, 816 .suspend = vce_v4_0_suspend, 817 .resume = vce_v4_0_resume, 818 .set_clockgating_state = vce_v4_0_set_clockgating_state, 819 .set_powergating_state = vce_v4_0_set_powergating_state, 820 }; 821 822 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { 823 .type = AMDGPU_RING_TYPE_VCE, 824 .align_mask = 0x3f, 825 .nop = VCE_CMD_NO_OP, 826 .support_64bit_ptrs = false, 827 .no_user_fence = true, 828 .get_rptr = vce_v4_0_ring_get_rptr, 829 .get_wptr = vce_v4_0_ring_get_wptr, 830 .set_wptr = vce_v4_0_ring_set_wptr, 831 .patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm, 832 .emit_frame_size = 833 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 834 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + 835 4 + /* vce_v4_0_emit_vm_flush */ 836 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 837 1, /* vce_v4_0_ring_insert_end */ 838 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ 839 .emit_ib = vce_v4_0_ring_emit_ib, 840 .emit_vm_flush = vce_v4_0_emit_vm_flush, 841 .emit_fence = vce_v4_0_ring_emit_fence, 842 .test_ring = amdgpu_vce_ring_test_ring, 843 .test_ib = amdgpu_vce_ring_test_ib, 844 .insert_nop = amdgpu_ring_insert_nop, 845 .insert_end = vce_v4_0_ring_insert_end, 846 .pad_ib = amdgpu_ring_generic_pad_ib, 847 .begin_use = amdgpu_vce_ring_begin_use, 848 .end_use = amdgpu_vce_ring_end_use, 849 .emit_wreg = vce_v4_0_emit_wreg, 850 .emit_reg_wait = vce_v4_0_emit_reg_wait, 851 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 852 }; 853 854 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) 855 { 856 int i; 857 858 for (i = 0; i < adev->vce.num_rings; i++) { 859 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; 860 adev->vce.ring[i].me = i; 861 } 862 DRM_INFO("VCE enabled in VM mode\n"); 863 } 864 865 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = { 866 .set = vce_v4_0_set_interrupt_state, 867 .process = vce_v4_0_process_interrupt, 868 }; 869 870 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev) 871 { 872 adev->vce.irq.num_types = 1; 873 adev->vce.irq.funcs = &vce_v4_0_irq_funcs; 874 }; 875 876 const struct amdgpu_ip_block_version vce_v4_0_ip_block = 877 { 878 .type = AMD_IP_BLOCK_TYPE_VCE, 879 .major = 4, 880 .minor = 0, 881 .rev = 0, 882 .funcs = &vce_v4_0_ip_funcs, 883 }; 884