1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <drm/drm_drv.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_vce.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "soc15_common.h" 35 #include "mmsch_v1_0.h" 36 37 #include "vce/vce_4_0_offset.h" 38 #include "vce/vce_4_0_default.h" 39 #include "vce/vce_4_0_sh_mask.h" 40 #include "mmhub/mmhub_1_0_offset.h" 41 #include "mmhub/mmhub_1_0_sh_mask.h" 42 43 #include "ivsrcid/vce/irqsrcs_vce_4_0.h" 44 45 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 46 47 #define VCE_V4_0_FW_SIZE (384 * 1024) 48 #define VCE_V4_0_STACK_SIZE (64 * 1024) 49 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024)) 50 51 static void vce_v4_0_mc_resume(struct amdgpu_device *adev); 52 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); 53 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); 54 55 /** 56 * vce_v4_0_ring_get_rptr - get read pointer 57 * 58 * @ring: amdgpu_ring pointer 59 * 60 * Returns the current hardware read pointer 61 */ 62 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring) 63 { 64 struct amdgpu_device *adev = ring->adev; 65 66 if (ring->me == 0) 67 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR)); 68 else if (ring->me == 1) 69 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2)); 70 else 71 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3)); 72 } 73 74 /** 75 * vce_v4_0_ring_get_wptr - get write pointer 76 * 77 * @ring: amdgpu_ring pointer 78 * 79 * Returns the current hardware write pointer 80 */ 81 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring) 82 { 83 struct amdgpu_device *adev = ring->adev; 84 85 if (ring->use_doorbell) 86 return *ring->wptr_cpu_addr; 87 88 if (ring->me == 0) 89 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR)); 90 else if (ring->me == 1) 91 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2)); 92 else 93 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3)); 94 } 95 96 /** 97 * vce_v4_0_ring_set_wptr - set write pointer 98 * 99 * @ring: amdgpu_ring pointer 100 * 101 * Commits the write pointer to the hardware 102 */ 103 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring) 104 { 105 struct amdgpu_device *adev = ring->adev; 106 107 if (ring->use_doorbell) { 108 /* XXX check if swapping is necessary on BE */ 109 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); 110 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 111 return; 112 } 113 114 if (ring->me == 0) 115 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), 116 lower_32_bits(ring->wptr)); 117 else if (ring->me == 1) 118 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), 119 lower_32_bits(ring->wptr)); 120 else 121 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), 122 lower_32_bits(ring->wptr)); 123 } 124 125 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev) 126 { 127 int i, j; 128 129 for (i = 0; i < 10; ++i) { 130 for (j = 0; j < 100; ++j) { 131 uint32_t status = 132 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS)); 133 134 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK) 135 return 0; 136 mdelay(10); 137 } 138 139 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); 140 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 141 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 142 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 143 mdelay(10); 144 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 145 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 146 mdelay(10); 147 148 } 149 150 return -ETIMEDOUT; 151 } 152 153 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, 154 struct amdgpu_mm_table *table) 155 { 156 uint32_t data = 0, loop; 157 uint64_t addr = table->gpu_addr; 158 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; 159 uint32_t size; 160 161 size = header->header_size + header->vce_table_size + header->uvd_table_size; 162 163 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ 164 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); 165 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); 166 167 /* 2, update vmid of descriptor */ 168 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); 169 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; 170 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ 171 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); 172 173 /* 3, notify mmsch about the size of this descriptor */ 174 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); 175 176 /* 4, set resp to zero */ 177 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); 178 179 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0); 180 *adev->vce.ring[0].wptr_cpu_addr = 0; 181 adev->vce.ring[0].wptr = 0; 182 adev->vce.ring[0].wptr_old = 0; 183 184 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ 185 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); 186 187 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 188 loop = 1000; 189 while ((data & 0x10000002) != 0x10000002) { 190 udelay(10); 191 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 192 loop--; 193 if (!loop) 194 break; 195 } 196 197 if (!loop) { 198 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); 199 return -EBUSY; 200 } 201 202 return 0; 203 } 204 205 static int vce_v4_0_sriov_start(struct amdgpu_device *adev) 206 { 207 struct amdgpu_ring *ring; 208 uint32_t offset, size; 209 uint32_t table_size = 0; 210 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; 211 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; 212 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; 213 struct mmsch_v1_0_cmd_end end = { { 0 } }; 214 uint32_t *init_table = adev->virt.mm_table.cpu_addr; 215 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; 216 217 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; 218 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 219 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; 220 end.cmd_header.command_type = MMSCH_COMMAND__END; 221 222 if (header->vce_table_offset == 0 && header->vce_table_size == 0) { 223 header->version = MMSCH_VERSION; 224 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; 225 226 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) 227 header->vce_table_offset = header->header_size; 228 else 229 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset; 230 231 init_table += header->vce_table_offset; 232 233 ring = &adev->vce.ring[0]; 234 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), 235 lower_32_bits(ring->gpu_addr)); 236 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), 237 upper_32_bits(ring->gpu_addr)); 238 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), 239 ring->ring_size / 4); 240 241 /* BEGING OF MC_RESUME */ 242 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); 243 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); 244 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 245 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 246 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 247 248 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 249 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 250 uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo; 251 uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi; 252 uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low; 253 254 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 255 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8); 256 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 257 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 258 (tmr_mc_addr >> 40) & 0xff); 259 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0); 260 } else { 261 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 262 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 263 adev->vce.gpu_addr >> 8); 264 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 265 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 266 (adev->vce.gpu_addr >> 40) & 0xff); 267 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 268 offset & ~0x0f000000); 269 270 } 271 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 272 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), 273 adev->vce.gpu_addr >> 8); 274 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 275 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), 276 (adev->vce.gpu_addr >> 40) & 0xff); 277 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 278 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), 279 adev->vce.gpu_addr >> 8); 280 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 281 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), 282 (adev->vce.gpu_addr >> 40) & 0xff); 283 284 size = VCE_V4_0_FW_SIZE; 285 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 286 287 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 288 size = VCE_V4_0_STACK_SIZE; 289 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), 290 (offset & ~0x0f000000) | (1 << 24)); 291 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 292 293 offset += size; 294 size = VCE_V4_0_DATA_SIZE; 295 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), 296 (offset & ~0x0f000000) | (2 << 24)); 297 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 298 299 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); 300 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 301 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 302 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 303 304 /* end of MC_RESUME */ 305 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 306 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK); 307 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 308 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); 309 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 310 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); 311 312 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 313 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, 314 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); 315 316 /* clear BUSY flag */ 317 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 318 ~VCE_STATUS__JOB_BUSY_MASK, 0); 319 320 /* add end packet */ 321 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); 322 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; 323 header->vce_table_size = table_size; 324 } 325 326 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); 327 } 328 329 /** 330 * vce_v4_0_start - start VCE block 331 * 332 * @adev: amdgpu_device pointer 333 * 334 * Setup and start the VCE block 335 */ 336 static int vce_v4_0_start(struct amdgpu_device *adev) 337 { 338 struct amdgpu_ring *ring; 339 int r; 340 341 ring = &adev->vce.ring[0]; 342 343 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr)); 344 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr)); 345 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr); 346 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 347 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); 348 349 ring = &adev->vce.ring[1]; 350 351 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr)); 352 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr)); 353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr); 354 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); 355 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4); 356 357 ring = &adev->vce.ring[2]; 358 359 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr)); 360 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr)); 361 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr); 362 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr)); 363 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4); 364 365 vce_v4_0_mc_resume(adev); 366 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK, 367 ~VCE_STATUS__JOB_BUSY_MASK); 368 369 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001); 370 371 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 372 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 373 mdelay(100); 374 375 r = vce_v4_0_firmware_loaded(adev); 376 377 /* clear BUSY flag */ 378 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 379 380 if (r) { 381 DRM_ERROR("VCE not responding, giving up!!!\n"); 382 return r; 383 } 384 385 return 0; 386 } 387 388 static int vce_v4_0_stop(struct amdgpu_device *adev) 389 { 390 391 /* Disable VCPU */ 392 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); 393 394 /* hold on ECPU */ 395 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 396 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 397 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 398 399 /* clear VCE_STATUS */ 400 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0); 401 402 /* Set Clock-Gating off */ 403 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) 404 vce_v4_0_set_vce_sw_clock_gating(adev, false); 405 */ 406 407 return 0; 408 } 409 410 static int vce_v4_0_early_init(void *handle) 411 { 412 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 413 414 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */ 415 adev->vce.num_rings = 1; 416 else 417 adev->vce.num_rings = 3; 418 419 vce_v4_0_set_ring_funcs(adev); 420 vce_v4_0_set_irq_funcs(adev); 421 422 return 0; 423 } 424 425 static int vce_v4_0_sw_init(void *handle) 426 { 427 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 428 struct amdgpu_ring *ring; 429 430 unsigned size; 431 int r, i; 432 433 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq); 434 if (r) 435 return r; 436 437 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE; 438 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 439 size += VCE_V4_0_FW_SIZE; 440 441 r = amdgpu_vce_sw_init(adev, size); 442 if (r) 443 return r; 444 445 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 446 const struct common_firmware_header *hdr; 447 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 448 449 adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL); 450 if (!adev->vce.saved_bo) 451 return -ENOMEM; 452 453 hdr = (const struct common_firmware_header *)adev->vce.fw->data; 454 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; 455 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; 456 adev->firmware.fw_size += 457 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 458 DRM_INFO("PSP loading VCE firmware\n"); 459 } else { 460 r = amdgpu_vce_resume(adev); 461 if (r) 462 return r; 463 } 464 465 for (i = 0; i < adev->vce.num_rings; i++) { 466 enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i); 467 468 ring = &adev->vce.ring[i]; 469 sprintf(ring->name, "vce%d", i); 470 if (amdgpu_sriov_vf(adev)) { 471 /* DOORBELL only works under SRIOV */ 472 ring->use_doorbell = true; 473 474 /* currently only use the first encoding ring for sriov, 475 * so set unused location for other unused rings. 476 */ 477 if (i == 0) 478 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2; 479 else 480 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1; 481 } 482 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0, 483 hw_prio, NULL); 484 if (r) 485 return r; 486 } 487 488 489 r = amdgpu_vce_entity_init(adev); 490 if (r) 491 return r; 492 493 r = amdgpu_virt_alloc_mm_table(adev); 494 if (r) 495 return r; 496 497 return r; 498 } 499 500 static int vce_v4_0_sw_fini(void *handle) 501 { 502 int r; 503 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 504 505 /* free MM table */ 506 amdgpu_virt_free_mm_table(adev); 507 508 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 509 kvfree(adev->vce.saved_bo); 510 adev->vce.saved_bo = NULL; 511 } 512 513 r = amdgpu_vce_suspend(adev); 514 if (r) 515 return r; 516 517 return amdgpu_vce_sw_fini(adev); 518 } 519 520 static int vce_v4_0_hw_init(void *handle) 521 { 522 int r, i; 523 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 524 525 if (amdgpu_sriov_vf(adev)) 526 r = vce_v4_0_sriov_start(adev); 527 else 528 r = vce_v4_0_start(adev); 529 if (r) 530 return r; 531 532 for (i = 0; i < adev->vce.num_rings; i++) { 533 r = amdgpu_ring_test_helper(&adev->vce.ring[i]); 534 if (r) 535 return r; 536 } 537 538 DRM_INFO("VCE initialized successfully.\n"); 539 540 return 0; 541 } 542 543 static int vce_v4_0_hw_fini(void *handle) 544 { 545 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 546 547 cancel_delayed_work_sync(&adev->vce.idle_work); 548 549 if (!amdgpu_sriov_vf(adev)) { 550 /* vce_v4_0_wait_for_idle(handle); */ 551 vce_v4_0_stop(adev); 552 } else { 553 /* full access mode, so don't touch any VCE register */ 554 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); 555 } 556 557 return 0; 558 } 559 560 static int vce_v4_0_suspend(void *handle) 561 { 562 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 563 int r, idx; 564 565 if (adev->vce.vcpu_bo == NULL) 566 return 0; 567 568 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 569 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 570 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 571 void *ptr = adev->vce.cpu_addr; 572 573 memcpy_fromio(adev->vce.saved_bo, ptr, size); 574 } 575 drm_dev_exit(idx); 576 } 577 578 /* 579 * Proper cleanups before halting the HW engine: 580 * - cancel the delayed idle work 581 * - enable powergating 582 * - enable clockgating 583 * - disable dpm 584 * 585 * TODO: to align with the VCN implementation, move the 586 * jobs for clockgating/powergating/dpm setting to 587 * ->set_powergating_state(). 588 */ 589 cancel_delayed_work_sync(&adev->vce.idle_work); 590 591 if (adev->pm.dpm_enabled) { 592 amdgpu_dpm_enable_vce(adev, false); 593 } else { 594 amdgpu_asic_set_vce_clocks(adev, 0, 0); 595 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, 596 AMD_PG_STATE_GATE); 597 amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, 598 AMD_CG_STATE_GATE); 599 } 600 601 r = vce_v4_0_hw_fini(adev); 602 if (r) 603 return r; 604 605 return amdgpu_vce_suspend(adev); 606 } 607 608 static int vce_v4_0_resume(void *handle) 609 { 610 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 611 int r, idx; 612 613 if (adev->vce.vcpu_bo == NULL) 614 return -EINVAL; 615 616 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 617 618 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 619 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 620 void *ptr = adev->vce.cpu_addr; 621 622 memcpy_toio(ptr, adev->vce.saved_bo, size); 623 drm_dev_exit(idx); 624 } 625 } else { 626 r = amdgpu_vce_resume(adev); 627 if (r) 628 return r; 629 } 630 631 return vce_v4_0_hw_init(adev); 632 } 633 634 static void vce_v4_0_mc_resume(struct amdgpu_device *adev) 635 { 636 uint32_t offset, size; 637 uint64_t tmr_mc_addr; 638 639 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16)); 640 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000); 641 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F); 642 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); 643 644 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000); 645 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1); 646 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 647 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 648 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 649 650 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 651 652 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 653 tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 | 654 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo; 655 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 656 (tmr_mc_addr >> 8)); 657 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 658 (tmr_mc_addr >> 40) & 0xff); 659 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0); 660 } else { 661 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 662 (adev->vce.gpu_addr >> 8)); 663 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 664 (adev->vce.gpu_addr >> 40) & 0xff); 665 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); 666 } 667 668 size = VCE_V4_0_FW_SIZE; 669 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 670 671 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8)); 672 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff); 673 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 674 size = VCE_V4_0_STACK_SIZE; 675 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24)); 676 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 677 678 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8)); 679 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff); 680 offset += size; 681 size = VCE_V4_0_DATA_SIZE; 682 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24)); 683 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 684 685 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100); 686 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 687 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 688 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 689 } 690 691 static int vce_v4_0_set_clockgating_state(void *handle, 692 enum amd_clockgating_state state) 693 { 694 /* needed for driver unload*/ 695 return 0; 696 } 697 698 #if 0 699 static bool vce_v4_0_is_idle(void *handle) 700 { 701 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 702 u32 mask = 0; 703 704 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK; 705 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK; 706 707 return !(RREG32(mmSRBM_STATUS2) & mask); 708 } 709 710 static int vce_v4_0_wait_for_idle(void *handle) 711 { 712 unsigned i; 713 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 714 715 for (i = 0; i < adev->usec_timeout; i++) 716 if (vce_v4_0_is_idle(handle)) 717 return 0; 718 719 return -ETIMEDOUT; 720 } 721 722 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */ 723 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */ 724 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */ 725 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \ 726 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK) 727 728 static bool vce_v4_0_check_soft_reset(void *handle) 729 { 730 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 731 u32 srbm_soft_reset = 0; 732 733 /* According to VCE team , we should use VCE_STATUS instead 734 * SRBM_STATUS.VCE_BUSY bit for busy status checking. 735 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE 736 * instance's registers are accessed 737 * (0 for 1st instance, 10 for 2nd instance). 738 * 739 *VCE_STATUS 740 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB | 741 *|----+----+-----------+----+----+----+----------+---------+----| 742 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0| 743 * 744 * VCE team suggest use bit 3--bit 6 for busy status check 745 */ 746 mutex_lock(&adev->grbm_idx_mutex); 747 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 748 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 749 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 750 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 751 } 752 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10); 753 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 754 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 755 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 756 } 757 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 758 mutex_unlock(&adev->grbm_idx_mutex); 759 760 if (srbm_soft_reset) { 761 adev->vce.srbm_soft_reset = srbm_soft_reset; 762 return true; 763 } else { 764 adev->vce.srbm_soft_reset = 0; 765 return false; 766 } 767 } 768 769 static int vce_v4_0_soft_reset(void *handle) 770 { 771 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 772 u32 srbm_soft_reset; 773 774 if (!adev->vce.srbm_soft_reset) 775 return 0; 776 srbm_soft_reset = adev->vce.srbm_soft_reset; 777 778 if (srbm_soft_reset) { 779 u32 tmp; 780 781 tmp = RREG32(mmSRBM_SOFT_RESET); 782 tmp |= srbm_soft_reset; 783 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 784 WREG32(mmSRBM_SOFT_RESET, tmp); 785 tmp = RREG32(mmSRBM_SOFT_RESET); 786 787 udelay(50); 788 789 tmp &= ~srbm_soft_reset; 790 WREG32(mmSRBM_SOFT_RESET, tmp); 791 tmp = RREG32(mmSRBM_SOFT_RESET); 792 793 /* Wait a little for things to settle down */ 794 udelay(50); 795 } 796 797 return 0; 798 } 799 800 static int vce_v4_0_pre_soft_reset(void *handle) 801 { 802 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 803 804 if (!adev->vce.srbm_soft_reset) 805 return 0; 806 807 mdelay(5); 808 809 return vce_v4_0_suspend(adev); 810 } 811 812 813 static int vce_v4_0_post_soft_reset(void *handle) 814 { 815 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 816 817 if (!adev->vce.srbm_soft_reset) 818 return 0; 819 820 mdelay(5); 821 822 return vce_v4_0_resume(adev); 823 } 824 825 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) 826 { 827 u32 tmp, data; 828 829 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL)); 830 if (override) 831 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 832 else 833 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 834 835 if (tmp != data) 836 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data); 837 } 838 839 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, 840 bool gated) 841 { 842 u32 data; 843 844 /* Set Override to disable Clock Gating */ 845 vce_v4_0_override_vce_clock_gating(adev, true); 846 847 /* This function enables MGCG which is controlled by firmware. 848 With the clocks in the gated state the core is still 849 accessible but the firmware will throttle the clocks on the 850 fly as necessary. 851 */ 852 if (gated) { 853 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 854 data |= 0x1ff; 855 data &= ~0xef0000; 856 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 857 858 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 859 data |= 0x3ff000; 860 data &= ~0xffc00000; 861 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 862 863 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 864 data |= 0x2; 865 data &= ~0x00010000; 866 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 867 868 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 869 data |= 0x37f; 870 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 871 872 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 873 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 874 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 875 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 876 0x8; 877 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 878 } else { 879 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 880 data &= ~0x80010; 881 data |= 0xe70008; 882 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 883 884 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 885 data |= 0xffc00000; 886 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 887 888 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 889 data |= 0x10000; 890 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 891 892 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 893 data &= ~0xffc00000; 894 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 895 896 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 897 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 898 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 899 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 900 0x8); 901 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 902 } 903 vce_v4_0_override_vce_clock_gating(adev, false); 904 } 905 906 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) 907 { 908 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL); 909 910 if (enable) 911 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 912 else 913 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 914 915 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp); 916 } 917 918 static int vce_v4_0_set_clockgating_state(void *handle, 919 enum amd_clockgating_state state) 920 { 921 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 922 bool enable = (state == AMD_CG_STATE_GATE); 923 int i; 924 925 if ((adev->asic_type == CHIP_POLARIS10) || 926 (adev->asic_type == CHIP_TONGA) || 927 (adev->asic_type == CHIP_FIJI)) 928 vce_v4_0_set_bypass_mode(adev, enable); 929 930 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) 931 return 0; 932 933 mutex_lock(&adev->grbm_idx_mutex); 934 for (i = 0; i < 2; i++) { 935 /* Program VCE Instance 0 or 1 if not harvested */ 936 if (adev->vce.harvest_config & (1 << i)) 937 continue; 938 939 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i); 940 941 if (enable) { 942 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */ 943 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A); 944 data &= ~(0xf | 0xff0); 945 data |= ((0x0 << 0) | (0x04 << 4)); 946 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data); 947 948 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */ 949 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING); 950 data &= ~(0xf | 0xff0); 951 data |= ((0x0 << 0) | (0x04 << 4)); 952 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data); 953 } 954 955 vce_v4_0_set_vce_sw_clock_gating(adev, enable); 956 } 957 958 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0); 959 mutex_unlock(&adev->grbm_idx_mutex); 960 961 return 0; 962 } 963 #endif 964 965 static int vce_v4_0_set_powergating_state(void *handle, 966 enum amd_powergating_state state) 967 { 968 /* This doesn't actually powergate the VCE block. 969 * That's done in the dpm code via the SMC. This 970 * just re-inits the block as necessary. The actual 971 * gating still happens in the dpm code. We should 972 * revisit this when there is a cleaner line between 973 * the smc and the hw blocks 974 */ 975 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 976 977 if (state == AMD_PG_STATE_GATE) 978 return vce_v4_0_stop(adev); 979 else 980 return vce_v4_0_start(adev); 981 } 982 983 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, 984 struct amdgpu_ib *ib, uint32_t flags) 985 { 986 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 987 988 amdgpu_ring_write(ring, VCE_CMD_IB_VM); 989 amdgpu_ring_write(ring, vmid); 990 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 991 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 992 amdgpu_ring_write(ring, ib->length_dw); 993 } 994 995 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 996 u64 seq, unsigned flags) 997 { 998 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 999 1000 amdgpu_ring_write(ring, VCE_CMD_FENCE); 1001 amdgpu_ring_write(ring, addr); 1002 amdgpu_ring_write(ring, upper_32_bits(addr)); 1003 amdgpu_ring_write(ring, seq); 1004 amdgpu_ring_write(ring, VCE_CMD_TRAP); 1005 } 1006 1007 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) 1008 { 1009 amdgpu_ring_write(ring, VCE_CMD_END); 1010 } 1011 1012 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 1013 uint32_t val, uint32_t mask) 1014 { 1015 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 1016 amdgpu_ring_write(ring, reg << 2); 1017 amdgpu_ring_write(ring, mask); 1018 amdgpu_ring_write(ring, val); 1019 } 1020 1021 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, 1022 unsigned int vmid, uint64_t pd_addr) 1023 { 1024 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 1025 1026 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 1027 1028 /* wait for reg writes */ 1029 vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + 1030 vmid * hub->ctx_addr_distance, 1031 lower_32_bits(pd_addr), 0xffffffff); 1032 } 1033 1034 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring, 1035 uint32_t reg, uint32_t val) 1036 { 1037 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 1038 amdgpu_ring_write(ring, reg << 2); 1039 amdgpu_ring_write(ring, val); 1040 } 1041 1042 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, 1043 struct amdgpu_irq_src *source, 1044 unsigned type, 1045 enum amdgpu_interrupt_state state) 1046 { 1047 uint32_t val = 0; 1048 1049 if (!amdgpu_sriov_vf(adev)) { 1050 if (state == AMDGPU_IRQ_STATE_ENABLE) 1051 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; 1052 1053 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, 1054 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 1055 } 1056 return 0; 1057 } 1058 1059 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev, 1060 struct amdgpu_irq_src *source, 1061 struct amdgpu_iv_entry *entry) 1062 { 1063 DRM_DEBUG("IH: VCE\n"); 1064 1065 switch (entry->src_data[0]) { 1066 case 0: 1067 case 1: 1068 case 2: 1069 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); 1070 break; 1071 default: 1072 DRM_ERROR("Unhandled interrupt: %d %d\n", 1073 entry->src_id, entry->src_data[0]); 1074 break; 1075 } 1076 1077 return 0; 1078 } 1079 1080 const struct amd_ip_funcs vce_v4_0_ip_funcs = { 1081 .name = "vce_v4_0", 1082 .early_init = vce_v4_0_early_init, 1083 .late_init = NULL, 1084 .sw_init = vce_v4_0_sw_init, 1085 .sw_fini = vce_v4_0_sw_fini, 1086 .hw_init = vce_v4_0_hw_init, 1087 .hw_fini = vce_v4_0_hw_fini, 1088 .suspend = vce_v4_0_suspend, 1089 .resume = vce_v4_0_resume, 1090 .is_idle = NULL /* vce_v4_0_is_idle */, 1091 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */, 1092 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */, 1093 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */, 1094 .soft_reset = NULL /* vce_v4_0_soft_reset */, 1095 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */, 1096 .set_clockgating_state = vce_v4_0_set_clockgating_state, 1097 .set_powergating_state = vce_v4_0_set_powergating_state, 1098 }; 1099 1100 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { 1101 .type = AMDGPU_RING_TYPE_VCE, 1102 .align_mask = 0x3f, 1103 .nop = VCE_CMD_NO_OP, 1104 .support_64bit_ptrs = false, 1105 .no_user_fence = true, 1106 .vmhub = AMDGPU_MMHUB_0, 1107 .get_rptr = vce_v4_0_ring_get_rptr, 1108 .get_wptr = vce_v4_0_ring_get_wptr, 1109 .set_wptr = vce_v4_0_ring_set_wptr, 1110 .parse_cs = amdgpu_vce_ring_parse_cs_vm, 1111 .emit_frame_size = 1112 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 1113 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + 1114 4 + /* vce_v4_0_emit_vm_flush */ 1115 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 1116 1, /* vce_v4_0_ring_insert_end */ 1117 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ 1118 .emit_ib = vce_v4_0_ring_emit_ib, 1119 .emit_vm_flush = vce_v4_0_emit_vm_flush, 1120 .emit_fence = vce_v4_0_ring_emit_fence, 1121 .test_ring = amdgpu_vce_ring_test_ring, 1122 .test_ib = amdgpu_vce_ring_test_ib, 1123 .insert_nop = amdgpu_ring_insert_nop, 1124 .insert_end = vce_v4_0_ring_insert_end, 1125 .pad_ib = amdgpu_ring_generic_pad_ib, 1126 .begin_use = amdgpu_vce_ring_begin_use, 1127 .end_use = amdgpu_vce_ring_end_use, 1128 .emit_wreg = vce_v4_0_emit_wreg, 1129 .emit_reg_wait = vce_v4_0_emit_reg_wait, 1130 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 1131 }; 1132 1133 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) 1134 { 1135 int i; 1136 1137 for (i = 0; i < adev->vce.num_rings; i++) { 1138 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; 1139 adev->vce.ring[i].me = i; 1140 } 1141 DRM_INFO("VCE enabled in VM mode\n"); 1142 } 1143 1144 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = { 1145 .set = vce_v4_0_set_interrupt_state, 1146 .process = vce_v4_0_process_interrupt, 1147 }; 1148 1149 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev) 1150 { 1151 adev->vce.irq.num_types = 1; 1152 adev->vce.irq.funcs = &vce_v4_0_irq_funcs; 1153 }; 1154 1155 const struct amdgpu_ip_block_version vce_v4_0_ip_block = 1156 { 1157 .type = AMD_IP_BLOCK_TYPE_VCE, 1158 .major = 4, 1159 .minor = 0, 1160 .rev = 0, 1161 .funcs = &vce_v4_0_ip_funcs, 1162 }; 1163