1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <drm/drmP.h> 29 #include "amdgpu.h" 30 #include "amdgpu_vce.h" 31 #include "soc15d.h" 32 #include "soc15_common.h" 33 #include "mmsch_v1_0.h" 34 35 #include "vega10/soc15ip.h" 36 #include "vega10/VCE/vce_4_0_offset.h" 37 #include "vega10/VCE/vce_4_0_default.h" 38 #include "vega10/VCE/vce_4_0_sh_mask.h" 39 #include "vega10/MMHUB/mmhub_1_0_offset.h" 40 #include "vega10/MMHUB/mmhub_1_0_sh_mask.h" 41 42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 43 44 #define VCE_V4_0_FW_SIZE (384 * 1024) 45 #define VCE_V4_0_STACK_SIZE (64 * 1024) 46 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024)) 47 48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev); 49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); 50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); 51 52 /** 53 * vce_v4_0_ring_get_rptr - get read pointer 54 * 55 * @ring: amdgpu_ring pointer 56 * 57 * Returns the current hardware read pointer 58 */ 59 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring) 60 { 61 struct amdgpu_device *adev = ring->adev; 62 63 if (ring == &adev->vce.ring[0]) 64 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR)); 65 else if (ring == &adev->vce.ring[1]) 66 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2)); 67 else 68 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3)); 69 } 70 71 /** 72 * vce_v4_0_ring_get_wptr - get write pointer 73 * 74 * @ring: amdgpu_ring pointer 75 * 76 * Returns the current hardware write pointer 77 */ 78 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring) 79 { 80 struct amdgpu_device *adev = ring->adev; 81 82 if (ring->use_doorbell) 83 return adev->wb.wb[ring->wptr_offs]; 84 85 if (ring == &adev->vce.ring[0]) 86 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR)); 87 else if (ring == &adev->vce.ring[1]) 88 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2)); 89 else 90 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3)); 91 } 92 93 /** 94 * vce_v4_0_ring_set_wptr - set write pointer 95 * 96 * @ring: amdgpu_ring pointer 97 * 98 * Commits the write pointer to the hardware 99 */ 100 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring) 101 { 102 struct amdgpu_device *adev = ring->adev; 103 104 if (ring->use_doorbell) { 105 /* XXX check if swapping is necessary on BE */ 106 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 107 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 108 return; 109 } 110 111 if (ring == &adev->vce.ring[0]) 112 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), 113 lower_32_bits(ring->wptr)); 114 else if (ring == &adev->vce.ring[1]) 115 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), 116 lower_32_bits(ring->wptr)); 117 else 118 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), 119 lower_32_bits(ring->wptr)); 120 } 121 122 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev) 123 { 124 int i, j; 125 126 for (i = 0; i < 10; ++i) { 127 for (j = 0; j < 100; ++j) { 128 uint32_t status = 129 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS)); 130 131 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK) 132 return 0; 133 mdelay(10); 134 } 135 136 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); 137 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 138 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 139 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 140 mdelay(10); 141 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 142 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 143 mdelay(10); 144 145 } 146 147 return -ETIMEDOUT; 148 } 149 150 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, 151 struct amdgpu_mm_table *table) 152 { 153 uint32_t data = 0, loop; 154 uint64_t addr = table->gpu_addr; 155 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; 156 uint32_t size; 157 158 size = header->header_size + header->vce_table_size + header->uvd_table_size; 159 160 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ 161 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); 162 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); 163 164 /* 2, update vmid of descriptor */ 165 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); 166 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; 167 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ 168 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); 169 170 /* 3, notify mmsch about the size of this descriptor */ 171 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); 172 173 /* 4, set resp to zero */ 174 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); 175 176 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ 177 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); 178 179 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 180 loop = 1000; 181 while ((data & 0x10000002) != 0x10000002) { 182 udelay(10); 183 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 184 loop--; 185 if (!loop) 186 break; 187 } 188 189 if (!loop) { 190 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); 191 return -EBUSY; 192 } 193 194 return 0; 195 } 196 197 static int vce_v4_0_sriov_start(struct amdgpu_device *adev) 198 { 199 struct amdgpu_ring *ring; 200 uint32_t offset, size; 201 uint32_t table_size = 0; 202 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; 203 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; 204 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; 205 struct mmsch_v1_0_cmd_end end = { { 0 } }; 206 uint32_t *init_table = adev->virt.mm_table.cpu_addr; 207 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; 208 209 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; 210 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 211 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; 212 end.cmd_header.command_type = MMSCH_COMMAND__END; 213 214 if (header->vce_table_offset == 0 && header->vce_table_size == 0) { 215 header->version = MMSCH_VERSION; 216 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; 217 218 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) 219 header->vce_table_offset = header->header_size; 220 else 221 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset; 222 223 init_table += header->vce_table_offset; 224 225 ring = &adev->vce.ring[0]; 226 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), 227 lower_32_bits(ring->gpu_addr)); 228 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), 229 upper_32_bits(ring->gpu_addr)); 230 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), 231 ring->ring_size / 4); 232 233 /* BEGING OF MC_RESUME */ 234 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); 235 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); 236 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 237 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 238 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 239 240 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 241 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 242 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); 243 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), 244 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); 245 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), 246 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); 247 } else { 248 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 249 adev->vce.gpu_addr >> 8); 250 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), 251 adev->vce.gpu_addr >> 8); 252 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), 253 adev->vce.gpu_addr >> 8); 254 } 255 256 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 257 size = VCE_V4_0_FW_SIZE; 258 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 259 offset & 0x7FFFFFFF); 260 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 261 262 offset += size; 263 size = VCE_V4_0_STACK_SIZE; 264 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), 265 offset & 0x7FFFFFFF); 266 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 267 268 offset += size; 269 size = VCE_V4_0_DATA_SIZE; 270 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), 271 offset & 0x7FFFFFFF); 272 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 273 274 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); 275 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 276 0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 277 278 /* end of MC_RESUME */ 279 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 280 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK); 281 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 282 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); 283 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 284 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); 285 286 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 287 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, 288 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); 289 290 /* clear BUSY flag */ 291 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 292 ~VCE_STATUS__JOB_BUSY_MASK, 0); 293 294 /* add end packet */ 295 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); 296 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; 297 header->vce_table_size = table_size; 298 299 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); 300 } 301 302 return -EINVAL; /* already initializaed ? */ 303 } 304 305 /** 306 * vce_v4_0_start - start VCE block 307 * 308 * @adev: amdgpu_device pointer 309 * 310 * Setup and start the VCE block 311 */ 312 static int vce_v4_0_start(struct amdgpu_device *adev) 313 { 314 struct amdgpu_ring *ring; 315 int r; 316 317 ring = &adev->vce.ring[0]; 318 319 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr)); 320 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr)); 321 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr); 322 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 323 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); 324 325 ring = &adev->vce.ring[1]; 326 327 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr)); 328 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr)); 329 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr); 330 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); 331 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4); 332 333 ring = &adev->vce.ring[2]; 334 335 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr)); 336 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr)); 337 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr); 338 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr)); 339 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4); 340 341 vce_v4_0_mc_resume(adev); 342 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK, 343 ~VCE_STATUS__JOB_BUSY_MASK); 344 345 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001); 346 347 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 348 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 349 mdelay(100); 350 351 r = vce_v4_0_firmware_loaded(adev); 352 353 /* clear BUSY flag */ 354 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 355 356 if (r) { 357 DRM_ERROR("VCE not responding, giving up!!!\n"); 358 return r; 359 } 360 361 return 0; 362 } 363 364 static int vce_v4_0_stop(struct amdgpu_device *adev) 365 { 366 367 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); 368 369 /* hold on ECPU */ 370 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 371 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 372 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 373 374 /* clear BUSY flag */ 375 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 376 377 /* Set Clock-Gating off */ 378 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) 379 vce_v4_0_set_vce_sw_clock_gating(adev, false); 380 */ 381 382 return 0; 383 } 384 385 static int vce_v4_0_early_init(void *handle) 386 { 387 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 388 389 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */ 390 adev->vce.num_rings = 1; 391 else 392 adev->vce.num_rings = 3; 393 394 vce_v4_0_set_ring_funcs(adev); 395 vce_v4_0_set_irq_funcs(adev); 396 397 return 0; 398 } 399 400 static int vce_v4_0_sw_init(void *handle) 401 { 402 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 403 struct amdgpu_ring *ring; 404 unsigned size; 405 int r, i; 406 407 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq); 408 if (r) 409 return r; 410 411 size = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2; 412 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 413 size += VCE_V4_0_FW_SIZE; 414 415 r = amdgpu_vce_sw_init(adev, size); 416 if (r) 417 return r; 418 419 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 420 const struct common_firmware_header *hdr; 421 hdr = (const struct common_firmware_header *)adev->vce.fw->data; 422 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; 423 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; 424 adev->firmware.fw_size += 425 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 426 DRM_INFO("PSP loading VCE firmware\n"); 427 } 428 429 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 430 r = amdgpu_vce_resume(adev); 431 if (r) 432 return r; 433 } 434 435 for (i = 0; i < adev->vce.num_rings; i++) { 436 ring = &adev->vce.ring[i]; 437 sprintf(ring->name, "vce%d", i); 438 if (amdgpu_sriov_vf(adev)) { 439 /* DOORBELL only works under SRIOV */ 440 ring->use_doorbell = true; 441 if (i == 0) 442 ring->doorbell_index = AMDGPU_DOORBELL64_RING0_1 * 2; 443 else if (i == 1) 444 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2; 445 else 446 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2 + 1; 447 } 448 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); 449 if (r) 450 return r; 451 } 452 453 r = amdgpu_virt_alloc_mm_table(adev); 454 if (r) 455 return r; 456 457 return r; 458 } 459 460 static int vce_v4_0_sw_fini(void *handle) 461 { 462 int r; 463 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 464 465 /* free MM table */ 466 amdgpu_virt_free_mm_table(adev); 467 468 r = amdgpu_vce_suspend(adev); 469 if (r) 470 return r; 471 472 return amdgpu_vce_sw_fini(adev); 473 } 474 475 static int vce_v4_0_hw_init(void *handle) 476 { 477 int r, i; 478 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 479 480 if (amdgpu_sriov_vf(adev)) 481 r = vce_v4_0_sriov_start(adev); 482 else 483 r = vce_v4_0_start(adev); 484 if (r) 485 return r; 486 487 for (i = 0; i < adev->vce.num_rings; i++) 488 adev->vce.ring[i].ready = false; 489 490 for (i = 0; i < adev->vce.num_rings; i++) { 491 r = amdgpu_ring_test_ring(&adev->vce.ring[i]); 492 if (r) 493 return r; 494 else 495 adev->vce.ring[i].ready = true; 496 } 497 498 DRM_INFO("VCE initialized successfully.\n"); 499 500 return 0; 501 } 502 503 static int vce_v4_0_hw_fini(void *handle) 504 { 505 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 506 int i; 507 508 /* vce_v4_0_wait_for_idle(handle); */ 509 vce_v4_0_stop(adev); 510 for (i = 0; i < adev->vce.num_rings; i++) 511 adev->vce.ring[i].ready = false; 512 513 return 0; 514 } 515 516 static int vce_v4_0_suspend(void *handle) 517 { 518 int r; 519 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 520 521 r = vce_v4_0_hw_fini(adev); 522 if (r) 523 return r; 524 525 return amdgpu_vce_suspend(adev); 526 } 527 528 static int vce_v4_0_resume(void *handle) 529 { 530 int r; 531 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 532 533 r = amdgpu_vce_resume(adev); 534 if (r) 535 return r; 536 537 return vce_v4_0_hw_init(adev); 538 } 539 540 static void vce_v4_0_mc_resume(struct amdgpu_device *adev) 541 { 542 uint32_t offset, size; 543 544 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16)); 545 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000); 546 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F); 547 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); 548 549 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000); 550 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1); 551 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 552 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 553 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 554 555 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 556 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 557 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8)); 558 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 559 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff); 560 } else { 561 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 562 (adev->vce.gpu_addr >> 8)); 563 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 564 (adev->vce.gpu_addr >> 40) & 0xff); 565 } 566 567 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 568 size = VCE_V4_0_FW_SIZE; 569 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); 570 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 571 572 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8)); 573 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff); 574 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 575 size = VCE_V4_0_STACK_SIZE; 576 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24)); 577 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 578 579 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8)); 580 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff); 581 offset += size; 582 size = VCE_V4_0_DATA_SIZE; 583 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24)); 584 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 585 586 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100); 587 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 588 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 589 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 590 } 591 592 static int vce_v4_0_set_clockgating_state(void *handle, 593 enum amd_clockgating_state state) 594 { 595 /* needed for driver unload*/ 596 return 0; 597 } 598 599 #if 0 600 static bool vce_v4_0_is_idle(void *handle) 601 { 602 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 603 u32 mask = 0; 604 605 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK; 606 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK; 607 608 return !(RREG32(mmSRBM_STATUS2) & mask); 609 } 610 611 static int vce_v4_0_wait_for_idle(void *handle) 612 { 613 unsigned i; 614 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 615 616 for (i = 0; i < adev->usec_timeout; i++) 617 if (vce_v4_0_is_idle(handle)) 618 return 0; 619 620 return -ETIMEDOUT; 621 } 622 623 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */ 624 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */ 625 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */ 626 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \ 627 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK) 628 629 static bool vce_v4_0_check_soft_reset(void *handle) 630 { 631 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 632 u32 srbm_soft_reset = 0; 633 634 /* According to VCE team , we should use VCE_STATUS instead 635 * SRBM_STATUS.VCE_BUSY bit for busy status checking. 636 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE 637 * instance's registers are accessed 638 * (0 for 1st instance, 10 for 2nd instance). 639 * 640 *VCE_STATUS 641 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB | 642 *|----+----+-----------+----+----+----+----------+---------+----| 643 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0| 644 * 645 * VCE team suggest use bit 3--bit 6 for busy status check 646 */ 647 mutex_lock(&adev->grbm_idx_mutex); 648 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 649 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 650 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 651 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 652 } 653 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10); 654 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 655 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 656 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 657 } 658 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 659 mutex_unlock(&adev->grbm_idx_mutex); 660 661 if (srbm_soft_reset) { 662 adev->vce.srbm_soft_reset = srbm_soft_reset; 663 return true; 664 } else { 665 adev->vce.srbm_soft_reset = 0; 666 return false; 667 } 668 } 669 670 static int vce_v4_0_soft_reset(void *handle) 671 { 672 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 673 u32 srbm_soft_reset; 674 675 if (!adev->vce.srbm_soft_reset) 676 return 0; 677 srbm_soft_reset = adev->vce.srbm_soft_reset; 678 679 if (srbm_soft_reset) { 680 u32 tmp; 681 682 tmp = RREG32(mmSRBM_SOFT_RESET); 683 tmp |= srbm_soft_reset; 684 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 685 WREG32(mmSRBM_SOFT_RESET, tmp); 686 tmp = RREG32(mmSRBM_SOFT_RESET); 687 688 udelay(50); 689 690 tmp &= ~srbm_soft_reset; 691 WREG32(mmSRBM_SOFT_RESET, tmp); 692 tmp = RREG32(mmSRBM_SOFT_RESET); 693 694 /* Wait a little for things to settle down */ 695 udelay(50); 696 } 697 698 return 0; 699 } 700 701 static int vce_v4_0_pre_soft_reset(void *handle) 702 { 703 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 704 705 if (!adev->vce.srbm_soft_reset) 706 return 0; 707 708 mdelay(5); 709 710 return vce_v4_0_suspend(adev); 711 } 712 713 714 static int vce_v4_0_post_soft_reset(void *handle) 715 { 716 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 717 718 if (!adev->vce.srbm_soft_reset) 719 return 0; 720 721 mdelay(5); 722 723 return vce_v4_0_resume(adev); 724 } 725 726 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) 727 { 728 u32 tmp, data; 729 730 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL)); 731 if (override) 732 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 733 else 734 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 735 736 if (tmp != data) 737 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data); 738 } 739 740 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, 741 bool gated) 742 { 743 u32 data; 744 745 /* Set Override to disable Clock Gating */ 746 vce_v4_0_override_vce_clock_gating(adev, true); 747 748 /* This function enables MGCG which is controlled by firmware. 749 With the clocks in the gated state the core is still 750 accessible but the firmware will throttle the clocks on the 751 fly as necessary. 752 */ 753 if (gated) { 754 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 755 data |= 0x1ff; 756 data &= ~0xef0000; 757 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 758 759 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 760 data |= 0x3ff000; 761 data &= ~0xffc00000; 762 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 763 764 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 765 data |= 0x2; 766 data &= ~0x00010000; 767 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 768 769 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 770 data |= 0x37f; 771 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 772 773 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 774 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 775 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 776 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 777 0x8; 778 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 779 } else { 780 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 781 data &= ~0x80010; 782 data |= 0xe70008; 783 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 784 785 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 786 data |= 0xffc00000; 787 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 788 789 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 790 data |= 0x10000; 791 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 792 793 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 794 data &= ~0xffc00000; 795 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 796 797 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 798 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 799 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 800 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 801 0x8); 802 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 803 } 804 vce_v4_0_override_vce_clock_gating(adev, false); 805 } 806 807 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) 808 { 809 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL); 810 811 if (enable) 812 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 813 else 814 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 815 816 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp); 817 } 818 819 static int vce_v4_0_set_clockgating_state(void *handle, 820 enum amd_clockgating_state state) 821 { 822 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 823 bool enable = (state == AMD_CG_STATE_GATE) ? true : false; 824 int i; 825 826 if ((adev->asic_type == CHIP_POLARIS10) || 827 (adev->asic_type == CHIP_TONGA) || 828 (adev->asic_type == CHIP_FIJI)) 829 vce_v4_0_set_bypass_mode(adev, enable); 830 831 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) 832 return 0; 833 834 mutex_lock(&adev->grbm_idx_mutex); 835 for (i = 0; i < 2; i++) { 836 /* Program VCE Instance 0 or 1 if not harvested */ 837 if (adev->vce.harvest_config & (1 << i)) 838 continue; 839 840 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i); 841 842 if (enable) { 843 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */ 844 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A); 845 data &= ~(0xf | 0xff0); 846 data |= ((0x0 << 0) | (0x04 << 4)); 847 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data); 848 849 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */ 850 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING); 851 data &= ~(0xf | 0xff0); 852 data |= ((0x0 << 0) | (0x04 << 4)); 853 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data); 854 } 855 856 vce_v4_0_set_vce_sw_clock_gating(adev, enable); 857 } 858 859 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0); 860 mutex_unlock(&adev->grbm_idx_mutex); 861 862 return 0; 863 } 864 865 static int vce_v4_0_set_powergating_state(void *handle, 866 enum amd_powergating_state state) 867 { 868 /* This doesn't actually powergate the VCE block. 869 * That's done in the dpm code via the SMC. This 870 * just re-inits the block as necessary. The actual 871 * gating still happens in the dpm code. We should 872 * revisit this when there is a cleaner line between 873 * the smc and the hw blocks 874 */ 875 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 876 877 if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE)) 878 return 0; 879 880 if (state == AMD_PG_STATE_GATE) 881 /* XXX do we need a vce_v4_0_stop()? */ 882 return 0; 883 else 884 return vce_v4_0_start(adev); 885 } 886 #endif 887 888 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, 889 struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) 890 { 891 amdgpu_ring_write(ring, VCE_CMD_IB_VM); 892 amdgpu_ring_write(ring, vm_id); 893 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 894 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 895 amdgpu_ring_write(ring, ib->length_dw); 896 } 897 898 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 899 u64 seq, unsigned flags) 900 { 901 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 902 903 amdgpu_ring_write(ring, VCE_CMD_FENCE); 904 amdgpu_ring_write(ring, addr); 905 amdgpu_ring_write(ring, upper_32_bits(addr)); 906 amdgpu_ring_write(ring, seq); 907 amdgpu_ring_write(ring, VCE_CMD_TRAP); 908 } 909 910 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) 911 { 912 amdgpu_ring_write(ring, VCE_CMD_END); 913 } 914 915 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, 916 unsigned int vm_id, uint64_t pd_addr) 917 { 918 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 919 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); 920 unsigned eng = ring->vm_inv_eng; 921 922 pd_addr = pd_addr | 0x1; /* valid bit */ 923 /* now only use physical base address of PDE and valid */ 924 BUG_ON(pd_addr & 0xFFFF00000000003EULL); 925 926 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 927 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); 928 amdgpu_ring_write(ring, upper_32_bits(pd_addr)); 929 930 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 931 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 932 amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 933 934 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 935 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 936 amdgpu_ring_write(ring, 0xffffffff); 937 amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 938 939 /* flush TLB */ 940 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 941 amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); 942 amdgpu_ring_write(ring, req); 943 944 /* wait for flush */ 945 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 946 amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); 947 amdgpu_ring_write(ring, 1 << vm_id); 948 amdgpu_ring_write(ring, 1 << vm_id); 949 } 950 951 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, 952 struct amdgpu_irq_src *source, 953 unsigned type, 954 enum amdgpu_interrupt_state state) 955 { 956 uint32_t val = 0; 957 958 if (state == AMDGPU_IRQ_STATE_ENABLE) 959 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; 960 961 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, 962 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 963 return 0; 964 } 965 966 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev, 967 struct amdgpu_irq_src *source, 968 struct amdgpu_iv_entry *entry) 969 { 970 DRM_DEBUG("IH: VCE\n"); 971 972 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS), 973 VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK, 974 ~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK); 975 976 switch (entry->src_data[0]) { 977 case 0: 978 case 1: 979 case 2: 980 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); 981 break; 982 default: 983 DRM_ERROR("Unhandled interrupt: %d %d\n", 984 entry->src_id, entry->src_data[0]); 985 break; 986 } 987 988 return 0; 989 } 990 991 const struct amd_ip_funcs vce_v4_0_ip_funcs = { 992 .name = "vce_v4_0", 993 .early_init = vce_v4_0_early_init, 994 .late_init = NULL, 995 .sw_init = vce_v4_0_sw_init, 996 .sw_fini = vce_v4_0_sw_fini, 997 .hw_init = vce_v4_0_hw_init, 998 .hw_fini = vce_v4_0_hw_fini, 999 .suspend = vce_v4_0_suspend, 1000 .resume = vce_v4_0_resume, 1001 .is_idle = NULL /* vce_v4_0_is_idle */, 1002 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */, 1003 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */, 1004 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */, 1005 .soft_reset = NULL /* vce_v4_0_soft_reset */, 1006 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */, 1007 .set_clockgating_state = vce_v4_0_set_clockgating_state, 1008 .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */, 1009 }; 1010 1011 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { 1012 .type = AMDGPU_RING_TYPE_VCE, 1013 .align_mask = 0x3f, 1014 .nop = VCE_CMD_NO_OP, 1015 .support_64bit_ptrs = false, 1016 .vmhub = AMDGPU_MMHUB, 1017 .get_rptr = vce_v4_0_ring_get_rptr, 1018 .get_wptr = vce_v4_0_ring_get_wptr, 1019 .set_wptr = vce_v4_0_ring_set_wptr, 1020 .parse_cs = amdgpu_vce_ring_parse_cs_vm, 1021 .emit_frame_size = 1022 17 + /* vce_v4_0_emit_vm_flush */ 1023 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 1024 1, /* vce_v4_0_ring_insert_end */ 1025 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ 1026 .emit_ib = vce_v4_0_ring_emit_ib, 1027 .emit_vm_flush = vce_v4_0_emit_vm_flush, 1028 .emit_fence = vce_v4_0_ring_emit_fence, 1029 .test_ring = amdgpu_vce_ring_test_ring, 1030 .test_ib = amdgpu_vce_ring_test_ib, 1031 .insert_nop = amdgpu_ring_insert_nop, 1032 .insert_end = vce_v4_0_ring_insert_end, 1033 .pad_ib = amdgpu_ring_generic_pad_ib, 1034 .begin_use = amdgpu_vce_ring_begin_use, 1035 .end_use = amdgpu_vce_ring_end_use, 1036 }; 1037 1038 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) 1039 { 1040 int i; 1041 1042 for (i = 0; i < adev->vce.num_rings; i++) 1043 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; 1044 DRM_INFO("VCE enabled in VM mode\n"); 1045 } 1046 1047 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = { 1048 .set = vce_v4_0_set_interrupt_state, 1049 .process = vce_v4_0_process_interrupt, 1050 }; 1051 1052 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev) 1053 { 1054 adev->vce.irq.num_types = 1; 1055 adev->vce.irq.funcs = &vce_v4_0_irq_funcs; 1056 }; 1057 1058 const struct amdgpu_ip_block_version vce_v4_0_ip_block = 1059 { 1060 .type = AMD_IP_BLOCK_TYPE_VCE, 1061 .major = 4, 1062 .minor = 0, 1063 .rev = 0, 1064 .funcs = &vce_v4_0_ip_funcs, 1065 }; 1066