1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <drm/drmP.h> 29 #include "amdgpu.h" 30 #include "amdgpu_vce.h" 31 #include "soc15d.h" 32 #include "soc15_common.h" 33 #include "mmsch_v1_0.h" 34 35 #include "vce/vce_4_0_offset.h" 36 #include "vce/vce_4_0_default.h" 37 #include "vce/vce_4_0_sh_mask.h" 38 #include "mmhub/mmhub_1_0_offset.h" 39 #include "mmhub/mmhub_1_0_sh_mask.h" 40 41 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 42 43 #define VCE_V4_0_FW_SIZE (384 * 1024) 44 #define VCE_V4_0_STACK_SIZE (64 * 1024) 45 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024)) 46 47 static void vce_v4_0_mc_resume(struct amdgpu_device *adev); 48 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); 49 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); 50 51 /** 52 * vce_v4_0_ring_get_rptr - get read pointer 53 * 54 * @ring: amdgpu_ring pointer 55 * 56 * Returns the current hardware read pointer 57 */ 58 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring) 59 { 60 struct amdgpu_device *adev = ring->adev; 61 62 if (ring == &adev->vce.ring[0]) 63 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR)); 64 else if (ring == &adev->vce.ring[1]) 65 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2)); 66 else 67 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3)); 68 } 69 70 /** 71 * vce_v4_0_ring_get_wptr - get write pointer 72 * 73 * @ring: amdgpu_ring pointer 74 * 75 * Returns the current hardware write pointer 76 */ 77 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring) 78 { 79 struct amdgpu_device *adev = ring->adev; 80 81 if (ring->use_doorbell) 82 return adev->wb.wb[ring->wptr_offs]; 83 84 if (ring == &adev->vce.ring[0]) 85 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR)); 86 else if (ring == &adev->vce.ring[1]) 87 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2)); 88 else 89 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3)); 90 } 91 92 /** 93 * vce_v4_0_ring_set_wptr - set write pointer 94 * 95 * @ring: amdgpu_ring pointer 96 * 97 * Commits the write pointer to the hardware 98 */ 99 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring) 100 { 101 struct amdgpu_device *adev = ring->adev; 102 103 if (ring->use_doorbell) { 104 /* XXX check if swapping is necessary on BE */ 105 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 106 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 107 return; 108 } 109 110 if (ring == &adev->vce.ring[0]) 111 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), 112 lower_32_bits(ring->wptr)); 113 else if (ring == &adev->vce.ring[1]) 114 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), 115 lower_32_bits(ring->wptr)); 116 else 117 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), 118 lower_32_bits(ring->wptr)); 119 } 120 121 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev) 122 { 123 int i, j; 124 125 for (i = 0; i < 10; ++i) { 126 for (j = 0; j < 100; ++j) { 127 uint32_t status = 128 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS)); 129 130 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK) 131 return 0; 132 mdelay(10); 133 } 134 135 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); 136 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 137 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 138 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 139 mdelay(10); 140 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 141 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 142 mdelay(10); 143 144 } 145 146 return -ETIMEDOUT; 147 } 148 149 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, 150 struct amdgpu_mm_table *table) 151 { 152 uint32_t data = 0, loop; 153 uint64_t addr = table->gpu_addr; 154 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; 155 uint32_t size; 156 157 size = header->header_size + header->vce_table_size + header->uvd_table_size; 158 159 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ 160 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); 161 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); 162 163 /* 2, update vmid of descriptor */ 164 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); 165 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; 166 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ 167 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); 168 169 /* 3, notify mmsch about the size of this descriptor */ 170 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); 171 172 /* 4, set resp to zero */ 173 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); 174 175 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0); 176 adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0; 177 adev->vce.ring[0].wptr = 0; 178 adev->vce.ring[0].wptr_old = 0; 179 180 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ 181 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); 182 183 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 184 loop = 1000; 185 while ((data & 0x10000002) != 0x10000002) { 186 udelay(10); 187 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 188 loop--; 189 if (!loop) 190 break; 191 } 192 193 if (!loop) { 194 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); 195 return -EBUSY; 196 } 197 198 return 0; 199 } 200 201 static int vce_v4_0_sriov_start(struct amdgpu_device *adev) 202 { 203 struct amdgpu_ring *ring; 204 uint32_t offset, size; 205 uint32_t table_size = 0; 206 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; 207 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; 208 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; 209 struct mmsch_v1_0_cmd_end end = { { 0 } }; 210 uint32_t *init_table = adev->virt.mm_table.cpu_addr; 211 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; 212 213 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; 214 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 215 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; 216 end.cmd_header.command_type = MMSCH_COMMAND__END; 217 218 if (header->vce_table_offset == 0 && header->vce_table_size == 0) { 219 header->version = MMSCH_VERSION; 220 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; 221 222 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) 223 header->vce_table_offset = header->header_size; 224 else 225 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset; 226 227 init_table += header->vce_table_offset; 228 229 ring = &adev->vce.ring[0]; 230 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), 231 lower_32_bits(ring->gpu_addr)); 232 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), 233 upper_32_bits(ring->gpu_addr)); 234 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), 235 ring->ring_size / 4); 236 237 /* BEGING OF MC_RESUME */ 238 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); 239 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); 240 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 241 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 242 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 243 244 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 245 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 246 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 247 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); 248 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 249 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 250 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff); 251 } else { 252 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 253 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 254 adev->vce.gpu_addr >> 8); 255 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 256 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 257 (adev->vce.gpu_addr >> 40) & 0xff); 258 } 259 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 260 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), 261 adev->vce.gpu_addr >> 8); 262 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 263 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), 264 (adev->vce.gpu_addr >> 40) & 0xff); 265 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 266 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), 267 adev->vce.gpu_addr >> 8); 268 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, 269 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), 270 (adev->vce.gpu_addr >> 40) & 0xff); 271 272 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 273 size = VCE_V4_0_FW_SIZE; 274 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 275 offset & ~0x0f000000); 276 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 277 278 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 279 size = VCE_V4_0_STACK_SIZE; 280 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), 281 (offset & ~0x0f000000) | (1 << 24)); 282 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 283 284 offset += size; 285 size = VCE_V4_0_DATA_SIZE; 286 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), 287 (offset & ~0x0f000000) | (2 << 24)); 288 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 289 290 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); 291 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 292 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 293 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 294 295 /* end of MC_RESUME */ 296 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 297 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK); 298 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 299 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); 300 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 301 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); 302 303 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 304 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, 305 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); 306 307 /* clear BUSY flag */ 308 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 309 ~VCE_STATUS__JOB_BUSY_MASK, 0); 310 311 /* add end packet */ 312 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); 313 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; 314 header->vce_table_size = table_size; 315 } 316 317 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); 318 } 319 320 /** 321 * vce_v4_0_start - start VCE block 322 * 323 * @adev: amdgpu_device pointer 324 * 325 * Setup and start the VCE block 326 */ 327 static int vce_v4_0_start(struct amdgpu_device *adev) 328 { 329 struct amdgpu_ring *ring; 330 int r; 331 332 ring = &adev->vce.ring[0]; 333 334 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr)); 335 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr)); 336 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr); 337 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 338 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); 339 340 ring = &adev->vce.ring[1]; 341 342 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr)); 343 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr)); 344 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr); 345 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); 346 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4); 347 348 ring = &adev->vce.ring[2]; 349 350 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr)); 351 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr)); 352 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr); 353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr)); 354 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4); 355 356 vce_v4_0_mc_resume(adev); 357 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK, 358 ~VCE_STATUS__JOB_BUSY_MASK); 359 360 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001); 361 362 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 363 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 364 mdelay(100); 365 366 r = vce_v4_0_firmware_loaded(adev); 367 368 /* clear BUSY flag */ 369 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 370 371 if (r) { 372 DRM_ERROR("VCE not responding, giving up!!!\n"); 373 return r; 374 } 375 376 return 0; 377 } 378 379 static int vce_v4_0_stop(struct amdgpu_device *adev) 380 { 381 382 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); 383 384 /* hold on ECPU */ 385 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 386 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 387 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 388 389 /* clear BUSY flag */ 390 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 391 392 /* Set Clock-Gating off */ 393 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) 394 vce_v4_0_set_vce_sw_clock_gating(adev, false); 395 */ 396 397 return 0; 398 } 399 400 static int vce_v4_0_early_init(void *handle) 401 { 402 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 403 404 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */ 405 adev->vce.num_rings = 1; 406 else 407 adev->vce.num_rings = 3; 408 409 vce_v4_0_set_ring_funcs(adev); 410 vce_v4_0_set_irq_funcs(adev); 411 412 return 0; 413 } 414 415 static int vce_v4_0_sw_init(void *handle) 416 { 417 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 418 struct amdgpu_ring *ring; 419 unsigned size; 420 int r, i; 421 422 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq); 423 if (r) 424 return r; 425 426 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE; 427 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 428 size += VCE_V4_0_FW_SIZE; 429 430 r = amdgpu_vce_sw_init(adev, size); 431 if (r) 432 return r; 433 434 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 435 const struct common_firmware_header *hdr; 436 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 437 438 adev->vce.saved_bo = kmalloc(size, GFP_KERNEL); 439 if (!adev->vce.saved_bo) 440 return -ENOMEM; 441 442 hdr = (const struct common_firmware_header *)adev->vce.fw->data; 443 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; 444 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; 445 adev->firmware.fw_size += 446 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 447 DRM_INFO("PSP loading VCE firmware\n"); 448 } else { 449 r = amdgpu_vce_resume(adev); 450 if (r) 451 return r; 452 } 453 454 for (i = 0; i < adev->vce.num_rings; i++) { 455 ring = &adev->vce.ring[i]; 456 sprintf(ring->name, "vce%d", i); 457 if (amdgpu_sriov_vf(adev)) { 458 /* DOORBELL only works under SRIOV */ 459 ring->use_doorbell = true; 460 461 /* currently only use the first encoding ring for sriov, 462 * so set unused location for other unused rings. 463 */ 464 if (i == 0) 465 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2; 466 else 467 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1; 468 } 469 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); 470 if (r) 471 return r; 472 } 473 474 r = amdgpu_virt_alloc_mm_table(adev); 475 if (r) 476 return r; 477 478 return r; 479 } 480 481 static int vce_v4_0_sw_fini(void *handle) 482 { 483 int r; 484 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 485 486 /* free MM table */ 487 amdgpu_virt_free_mm_table(adev); 488 489 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 490 kfree(adev->vce.saved_bo); 491 adev->vce.saved_bo = NULL; 492 } 493 494 r = amdgpu_vce_suspend(adev); 495 if (r) 496 return r; 497 498 return amdgpu_vce_sw_fini(adev); 499 } 500 501 static int vce_v4_0_hw_init(void *handle) 502 { 503 int r, i; 504 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 505 506 if (amdgpu_sriov_vf(adev)) 507 r = vce_v4_0_sriov_start(adev); 508 else 509 r = vce_v4_0_start(adev); 510 if (r) 511 return r; 512 513 for (i = 0; i < adev->vce.num_rings; i++) 514 adev->vce.ring[i].ready = false; 515 516 for (i = 0; i < adev->vce.num_rings; i++) { 517 r = amdgpu_ring_test_ring(&adev->vce.ring[i]); 518 if (r) 519 return r; 520 else 521 adev->vce.ring[i].ready = true; 522 } 523 524 DRM_INFO("VCE initialized successfully.\n"); 525 526 return 0; 527 } 528 529 static int vce_v4_0_hw_fini(void *handle) 530 { 531 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 532 int i; 533 534 if (!amdgpu_sriov_vf(adev)) { 535 /* vce_v4_0_wait_for_idle(handle); */ 536 vce_v4_0_stop(adev); 537 } else { 538 /* full access mode, so don't touch any VCE register */ 539 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); 540 } 541 542 for (i = 0; i < adev->vce.num_rings; i++) 543 adev->vce.ring[i].ready = false; 544 545 return 0; 546 } 547 548 static int vce_v4_0_suspend(void *handle) 549 { 550 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 551 int r; 552 553 if (adev->vce.vcpu_bo == NULL) 554 return 0; 555 556 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 557 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 558 void *ptr = adev->vce.cpu_addr; 559 560 memcpy_fromio(adev->vce.saved_bo, ptr, size); 561 } 562 563 r = vce_v4_0_hw_fini(adev); 564 if (r) 565 return r; 566 567 return amdgpu_vce_suspend(adev); 568 } 569 570 static int vce_v4_0_resume(void *handle) 571 { 572 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 573 int r; 574 575 if (adev->vce.vcpu_bo == NULL) 576 return -EINVAL; 577 578 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 579 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 580 void *ptr = adev->vce.cpu_addr; 581 582 memcpy_toio(ptr, adev->vce.saved_bo, size); 583 } else { 584 r = amdgpu_vce_resume(adev); 585 if (r) 586 return r; 587 } 588 589 return vce_v4_0_hw_init(adev); 590 } 591 592 static void vce_v4_0_mc_resume(struct amdgpu_device *adev) 593 { 594 uint32_t offset, size; 595 596 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16)); 597 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000); 598 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F); 599 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); 600 601 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000); 602 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1); 603 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 604 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 605 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 606 607 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 608 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 609 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8)); 610 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 611 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff); 612 } else { 613 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 614 (adev->vce.gpu_addr >> 8)); 615 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 616 (adev->vce.gpu_addr >> 40) & 0xff); 617 } 618 619 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 620 size = VCE_V4_0_FW_SIZE; 621 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); 622 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 623 624 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8)); 625 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff); 626 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 627 size = VCE_V4_0_STACK_SIZE; 628 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24)); 629 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 630 631 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8)); 632 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff); 633 offset += size; 634 size = VCE_V4_0_DATA_SIZE; 635 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24)); 636 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 637 638 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100); 639 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 640 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 641 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 642 } 643 644 static int vce_v4_0_set_clockgating_state(void *handle, 645 enum amd_clockgating_state state) 646 { 647 /* needed for driver unload*/ 648 return 0; 649 } 650 651 #if 0 652 static bool vce_v4_0_is_idle(void *handle) 653 { 654 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 655 u32 mask = 0; 656 657 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK; 658 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK; 659 660 return !(RREG32(mmSRBM_STATUS2) & mask); 661 } 662 663 static int vce_v4_0_wait_for_idle(void *handle) 664 { 665 unsigned i; 666 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 667 668 for (i = 0; i < adev->usec_timeout; i++) 669 if (vce_v4_0_is_idle(handle)) 670 return 0; 671 672 return -ETIMEDOUT; 673 } 674 675 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */ 676 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */ 677 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */ 678 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \ 679 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK) 680 681 static bool vce_v4_0_check_soft_reset(void *handle) 682 { 683 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 684 u32 srbm_soft_reset = 0; 685 686 /* According to VCE team , we should use VCE_STATUS instead 687 * SRBM_STATUS.VCE_BUSY bit for busy status checking. 688 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE 689 * instance's registers are accessed 690 * (0 for 1st instance, 10 for 2nd instance). 691 * 692 *VCE_STATUS 693 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB | 694 *|----+----+-----------+----+----+----+----------+---------+----| 695 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0| 696 * 697 * VCE team suggest use bit 3--bit 6 for busy status check 698 */ 699 mutex_lock(&adev->grbm_idx_mutex); 700 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 701 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 702 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 703 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 704 } 705 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10); 706 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 707 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 708 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 709 } 710 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 711 mutex_unlock(&adev->grbm_idx_mutex); 712 713 if (srbm_soft_reset) { 714 adev->vce.srbm_soft_reset = srbm_soft_reset; 715 return true; 716 } else { 717 adev->vce.srbm_soft_reset = 0; 718 return false; 719 } 720 } 721 722 static int vce_v4_0_soft_reset(void *handle) 723 { 724 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 725 u32 srbm_soft_reset; 726 727 if (!adev->vce.srbm_soft_reset) 728 return 0; 729 srbm_soft_reset = adev->vce.srbm_soft_reset; 730 731 if (srbm_soft_reset) { 732 u32 tmp; 733 734 tmp = RREG32(mmSRBM_SOFT_RESET); 735 tmp |= srbm_soft_reset; 736 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 737 WREG32(mmSRBM_SOFT_RESET, tmp); 738 tmp = RREG32(mmSRBM_SOFT_RESET); 739 740 udelay(50); 741 742 tmp &= ~srbm_soft_reset; 743 WREG32(mmSRBM_SOFT_RESET, tmp); 744 tmp = RREG32(mmSRBM_SOFT_RESET); 745 746 /* Wait a little for things to settle down */ 747 udelay(50); 748 } 749 750 return 0; 751 } 752 753 static int vce_v4_0_pre_soft_reset(void *handle) 754 { 755 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 756 757 if (!adev->vce.srbm_soft_reset) 758 return 0; 759 760 mdelay(5); 761 762 return vce_v4_0_suspend(adev); 763 } 764 765 766 static int vce_v4_0_post_soft_reset(void *handle) 767 { 768 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 769 770 if (!adev->vce.srbm_soft_reset) 771 return 0; 772 773 mdelay(5); 774 775 return vce_v4_0_resume(adev); 776 } 777 778 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) 779 { 780 u32 tmp, data; 781 782 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL)); 783 if (override) 784 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 785 else 786 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 787 788 if (tmp != data) 789 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data); 790 } 791 792 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, 793 bool gated) 794 { 795 u32 data; 796 797 /* Set Override to disable Clock Gating */ 798 vce_v4_0_override_vce_clock_gating(adev, true); 799 800 /* This function enables MGCG which is controlled by firmware. 801 With the clocks in the gated state the core is still 802 accessible but the firmware will throttle the clocks on the 803 fly as necessary. 804 */ 805 if (gated) { 806 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 807 data |= 0x1ff; 808 data &= ~0xef0000; 809 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 810 811 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 812 data |= 0x3ff000; 813 data &= ~0xffc00000; 814 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 815 816 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 817 data |= 0x2; 818 data &= ~0x00010000; 819 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 820 821 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 822 data |= 0x37f; 823 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 824 825 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 826 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 827 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 828 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 829 0x8; 830 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 831 } else { 832 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 833 data &= ~0x80010; 834 data |= 0xe70008; 835 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 836 837 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 838 data |= 0xffc00000; 839 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 840 841 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 842 data |= 0x10000; 843 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 844 845 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 846 data &= ~0xffc00000; 847 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 848 849 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 850 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 851 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 852 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 853 0x8); 854 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 855 } 856 vce_v4_0_override_vce_clock_gating(adev, false); 857 } 858 859 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) 860 { 861 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL); 862 863 if (enable) 864 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 865 else 866 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 867 868 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp); 869 } 870 871 static int vce_v4_0_set_clockgating_state(void *handle, 872 enum amd_clockgating_state state) 873 { 874 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 875 bool enable = (state == AMD_CG_STATE_GATE) ? true : false; 876 int i; 877 878 if ((adev->asic_type == CHIP_POLARIS10) || 879 (adev->asic_type == CHIP_TONGA) || 880 (adev->asic_type == CHIP_FIJI)) 881 vce_v4_0_set_bypass_mode(adev, enable); 882 883 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) 884 return 0; 885 886 mutex_lock(&adev->grbm_idx_mutex); 887 for (i = 0; i < 2; i++) { 888 /* Program VCE Instance 0 or 1 if not harvested */ 889 if (adev->vce.harvest_config & (1 << i)) 890 continue; 891 892 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i); 893 894 if (enable) { 895 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */ 896 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A); 897 data &= ~(0xf | 0xff0); 898 data |= ((0x0 << 0) | (0x04 << 4)); 899 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data); 900 901 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */ 902 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING); 903 data &= ~(0xf | 0xff0); 904 data |= ((0x0 << 0) | (0x04 << 4)); 905 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data); 906 } 907 908 vce_v4_0_set_vce_sw_clock_gating(adev, enable); 909 } 910 911 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0); 912 mutex_unlock(&adev->grbm_idx_mutex); 913 914 return 0; 915 } 916 917 static int vce_v4_0_set_powergating_state(void *handle, 918 enum amd_powergating_state state) 919 { 920 /* This doesn't actually powergate the VCE block. 921 * That's done in the dpm code via the SMC. This 922 * just re-inits the block as necessary. The actual 923 * gating still happens in the dpm code. We should 924 * revisit this when there is a cleaner line between 925 * the smc and the hw blocks 926 */ 927 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 928 929 if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE)) 930 return 0; 931 932 if (state == AMD_PG_STATE_GATE) 933 /* XXX do we need a vce_v4_0_stop()? */ 934 return 0; 935 else 936 return vce_v4_0_start(adev); 937 } 938 #endif 939 940 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, 941 struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) 942 { 943 amdgpu_ring_write(ring, VCE_CMD_IB_VM); 944 amdgpu_ring_write(ring, vmid); 945 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 946 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 947 amdgpu_ring_write(ring, ib->length_dw); 948 } 949 950 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 951 u64 seq, unsigned flags) 952 { 953 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 954 955 amdgpu_ring_write(ring, VCE_CMD_FENCE); 956 amdgpu_ring_write(ring, addr); 957 amdgpu_ring_write(ring, upper_32_bits(addr)); 958 amdgpu_ring_write(ring, seq); 959 amdgpu_ring_write(ring, VCE_CMD_TRAP); 960 } 961 962 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) 963 { 964 amdgpu_ring_write(ring, VCE_CMD_END); 965 } 966 967 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, 968 unsigned int vmid, uint64_t pd_addr) 969 { 970 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 971 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); 972 uint64_t flags = AMDGPU_PTE_VALID; 973 unsigned eng = ring->vm_inv_eng; 974 975 amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); 976 pd_addr |= flags; 977 978 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 979 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2); 980 amdgpu_ring_write(ring, upper_32_bits(pd_addr)); 981 982 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 983 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); 984 amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 985 986 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 987 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); 988 amdgpu_ring_write(ring, 0xffffffff); 989 amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 990 991 /* flush TLB */ 992 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 993 amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); 994 amdgpu_ring_write(ring, req); 995 996 /* wait for flush */ 997 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 998 amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); 999 amdgpu_ring_write(ring, 1 << vmid); 1000 amdgpu_ring_write(ring, 1 << vmid); 1001 } 1002 1003 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, 1004 struct amdgpu_irq_src *source, 1005 unsigned type, 1006 enum amdgpu_interrupt_state state) 1007 { 1008 uint32_t val = 0; 1009 1010 if (!amdgpu_sriov_vf(adev)) { 1011 if (state == AMDGPU_IRQ_STATE_ENABLE) 1012 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; 1013 1014 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, 1015 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 1016 } 1017 return 0; 1018 } 1019 1020 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev, 1021 struct amdgpu_irq_src *source, 1022 struct amdgpu_iv_entry *entry) 1023 { 1024 DRM_DEBUG("IH: VCE\n"); 1025 1026 switch (entry->src_data[0]) { 1027 case 0: 1028 case 1: 1029 case 2: 1030 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); 1031 break; 1032 default: 1033 DRM_ERROR("Unhandled interrupt: %d %d\n", 1034 entry->src_id, entry->src_data[0]); 1035 break; 1036 } 1037 1038 return 0; 1039 } 1040 1041 const struct amd_ip_funcs vce_v4_0_ip_funcs = { 1042 .name = "vce_v4_0", 1043 .early_init = vce_v4_0_early_init, 1044 .late_init = NULL, 1045 .sw_init = vce_v4_0_sw_init, 1046 .sw_fini = vce_v4_0_sw_fini, 1047 .hw_init = vce_v4_0_hw_init, 1048 .hw_fini = vce_v4_0_hw_fini, 1049 .suspend = vce_v4_0_suspend, 1050 .resume = vce_v4_0_resume, 1051 .is_idle = NULL /* vce_v4_0_is_idle */, 1052 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */, 1053 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */, 1054 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */, 1055 .soft_reset = NULL /* vce_v4_0_soft_reset */, 1056 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */, 1057 .set_clockgating_state = vce_v4_0_set_clockgating_state, 1058 .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */, 1059 }; 1060 1061 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { 1062 .type = AMDGPU_RING_TYPE_VCE, 1063 .align_mask = 0x3f, 1064 .nop = VCE_CMD_NO_OP, 1065 .support_64bit_ptrs = false, 1066 .vmhub = AMDGPU_MMHUB, 1067 .get_rptr = vce_v4_0_ring_get_rptr, 1068 .get_wptr = vce_v4_0_ring_get_wptr, 1069 .set_wptr = vce_v4_0_ring_set_wptr, 1070 .parse_cs = amdgpu_vce_ring_parse_cs_vm, 1071 .emit_frame_size = 1072 17 + /* vce_v4_0_emit_vm_flush */ 1073 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 1074 1, /* vce_v4_0_ring_insert_end */ 1075 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ 1076 .emit_ib = vce_v4_0_ring_emit_ib, 1077 .emit_vm_flush = vce_v4_0_emit_vm_flush, 1078 .emit_fence = vce_v4_0_ring_emit_fence, 1079 .test_ring = amdgpu_vce_ring_test_ring, 1080 .test_ib = amdgpu_vce_ring_test_ib, 1081 .insert_nop = amdgpu_ring_insert_nop, 1082 .insert_end = vce_v4_0_ring_insert_end, 1083 .pad_ib = amdgpu_ring_generic_pad_ib, 1084 .begin_use = amdgpu_vce_ring_begin_use, 1085 .end_use = amdgpu_vce_ring_end_use, 1086 }; 1087 1088 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) 1089 { 1090 int i; 1091 1092 for (i = 0; i < adev->vce.num_rings; i++) 1093 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; 1094 DRM_INFO("VCE enabled in VM mode\n"); 1095 } 1096 1097 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = { 1098 .set = vce_v4_0_set_interrupt_state, 1099 .process = vce_v4_0_process_interrupt, 1100 }; 1101 1102 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev) 1103 { 1104 adev->vce.irq.num_types = 1; 1105 adev->vce.irq.funcs = &vce_v4_0_irq_funcs; 1106 }; 1107 1108 const struct amdgpu_ip_block_version vce_v4_0_ip_block = 1109 { 1110 .type = AMD_IP_BLOCK_TYPE_VCE, 1111 .major = 4, 1112 .minor = 0, 1113 .rev = 0, 1114 .funcs = &vce_v4_0_ip_funcs, 1115 }; 1116