1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <drm/drmP.h> 29 #include "amdgpu.h" 30 #include "amdgpu_vce.h" 31 #include "soc15d.h" 32 #include "soc15_common.h" 33 #include "mmsch_v1_0.h" 34 35 #include "vega10/soc15ip.h" 36 #include "vega10/VCE/vce_4_0_offset.h" 37 #include "vega10/VCE/vce_4_0_default.h" 38 #include "vega10/VCE/vce_4_0_sh_mask.h" 39 #include "vega10/MMHUB/mmhub_1_0_offset.h" 40 #include "vega10/MMHUB/mmhub_1_0_sh_mask.h" 41 42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 43 44 #define VCE_V4_0_FW_SIZE (384 * 1024) 45 #define VCE_V4_0_STACK_SIZE (64 * 1024) 46 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024)) 47 48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev); 49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); 50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); 51 52 /** 53 * vce_v4_0_ring_get_rptr - get read pointer 54 * 55 * @ring: amdgpu_ring pointer 56 * 57 * Returns the current hardware read pointer 58 */ 59 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring) 60 { 61 struct amdgpu_device *adev = ring->adev; 62 63 if (ring == &adev->vce.ring[0]) 64 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR)); 65 else if (ring == &adev->vce.ring[1]) 66 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2)); 67 else 68 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3)); 69 } 70 71 /** 72 * vce_v4_0_ring_get_wptr - get write pointer 73 * 74 * @ring: amdgpu_ring pointer 75 * 76 * Returns the current hardware write pointer 77 */ 78 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring) 79 { 80 struct amdgpu_device *adev = ring->adev; 81 82 if (ring->use_doorbell) 83 return adev->wb.wb[ring->wptr_offs]; 84 85 if (ring == &adev->vce.ring[0]) 86 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR)); 87 else if (ring == &adev->vce.ring[1]) 88 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2)); 89 else 90 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3)); 91 } 92 93 /** 94 * vce_v4_0_ring_set_wptr - set write pointer 95 * 96 * @ring: amdgpu_ring pointer 97 * 98 * Commits the write pointer to the hardware 99 */ 100 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring) 101 { 102 struct amdgpu_device *adev = ring->adev; 103 104 if (ring->use_doorbell) { 105 /* XXX check if swapping is necessary on BE */ 106 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 107 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 108 return; 109 } 110 111 if (ring == &adev->vce.ring[0]) 112 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), 113 lower_32_bits(ring->wptr)); 114 else if (ring == &adev->vce.ring[1]) 115 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), 116 lower_32_bits(ring->wptr)); 117 else 118 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), 119 lower_32_bits(ring->wptr)); 120 } 121 122 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev) 123 { 124 int i, j; 125 126 for (i = 0; i < 10; ++i) { 127 for (j = 0; j < 100; ++j) { 128 uint32_t status = 129 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS)); 130 131 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK) 132 return 0; 133 mdelay(10); 134 } 135 136 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); 137 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 138 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 139 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 140 mdelay(10); 141 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 142 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 143 mdelay(10); 144 145 } 146 147 return -ETIMEDOUT; 148 } 149 150 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, 151 struct amdgpu_mm_table *table) 152 { 153 uint32_t data = 0, loop; 154 uint64_t addr = table->gpu_addr; 155 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; 156 uint32_t size; 157 158 size = header->header_size + header->vce_table_size + header->uvd_table_size; 159 160 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ 161 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); 162 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); 163 164 /* 2, update vmid of descriptor */ 165 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); 166 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; 167 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ 168 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); 169 170 /* 3, notify mmsch about the size of this descriptor */ 171 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); 172 173 /* 4, set resp to zero */ 174 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); 175 176 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0); 177 adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0; 178 adev->vce.ring[0].wptr = 0; 179 adev->vce.ring[0].wptr_old = 0; 180 181 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ 182 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); 183 184 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 185 loop = 1000; 186 while ((data & 0x10000002) != 0x10000002) { 187 udelay(10); 188 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 189 loop--; 190 if (!loop) 191 break; 192 } 193 194 if (!loop) { 195 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); 196 return -EBUSY; 197 } 198 199 return 0; 200 } 201 202 static int vce_v4_0_sriov_start(struct amdgpu_device *adev) 203 { 204 struct amdgpu_ring *ring; 205 uint32_t offset, size; 206 uint32_t table_size = 0; 207 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; 208 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; 209 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; 210 struct mmsch_v1_0_cmd_end end = { { 0 } }; 211 uint32_t *init_table = adev->virt.mm_table.cpu_addr; 212 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; 213 214 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; 215 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 216 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; 217 end.cmd_header.command_type = MMSCH_COMMAND__END; 218 219 if (header->vce_table_offset == 0 && header->vce_table_size == 0) { 220 header->version = MMSCH_VERSION; 221 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; 222 223 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) 224 header->vce_table_offset = header->header_size; 225 else 226 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset; 227 228 init_table += header->vce_table_offset; 229 230 ring = &adev->vce.ring[0]; 231 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), 232 lower_32_bits(ring->gpu_addr)); 233 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), 234 upper_32_bits(ring->gpu_addr)); 235 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), 236 ring->ring_size / 4); 237 238 /* BEGING OF MC_RESUME */ 239 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); 240 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); 241 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 242 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 243 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 244 245 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 246 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 247 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); 248 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), 249 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); 250 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), 251 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); 252 } else { 253 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 254 adev->vce.gpu_addr >> 8); 255 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), 256 adev->vce.gpu_addr >> 8); 257 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), 258 adev->vce.gpu_addr >> 8); 259 } 260 261 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 262 size = VCE_V4_0_FW_SIZE; 263 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 264 offset & 0x7FFFFFFF); 265 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 266 267 offset += size; 268 size = VCE_V4_0_STACK_SIZE; 269 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), 270 offset & 0x7FFFFFFF); 271 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 272 273 offset += size; 274 size = VCE_V4_0_DATA_SIZE; 275 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), 276 offset & 0x7FFFFFFF); 277 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 278 279 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); 280 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 281 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 282 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 283 284 /* end of MC_RESUME */ 285 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 286 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK); 287 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 288 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); 289 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 290 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); 291 292 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 293 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, 294 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); 295 296 /* clear BUSY flag */ 297 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 298 ~VCE_STATUS__JOB_BUSY_MASK, 0); 299 300 /* add end packet */ 301 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); 302 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; 303 header->vce_table_size = table_size; 304 } 305 306 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); 307 } 308 309 /** 310 * vce_v4_0_start - start VCE block 311 * 312 * @adev: amdgpu_device pointer 313 * 314 * Setup and start the VCE block 315 */ 316 static int vce_v4_0_start(struct amdgpu_device *adev) 317 { 318 struct amdgpu_ring *ring; 319 int r; 320 321 ring = &adev->vce.ring[0]; 322 323 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr)); 324 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr)); 325 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr); 326 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 327 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); 328 329 ring = &adev->vce.ring[1]; 330 331 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr)); 332 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr)); 333 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr); 334 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); 335 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4); 336 337 ring = &adev->vce.ring[2]; 338 339 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr)); 340 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr)); 341 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr); 342 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr)); 343 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4); 344 345 vce_v4_0_mc_resume(adev); 346 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK, 347 ~VCE_STATUS__JOB_BUSY_MASK); 348 349 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001); 350 351 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 352 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 353 mdelay(100); 354 355 r = vce_v4_0_firmware_loaded(adev); 356 357 /* clear BUSY flag */ 358 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 359 360 if (r) { 361 DRM_ERROR("VCE not responding, giving up!!!\n"); 362 return r; 363 } 364 365 return 0; 366 } 367 368 static int vce_v4_0_stop(struct amdgpu_device *adev) 369 { 370 371 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); 372 373 /* hold on ECPU */ 374 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 375 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 376 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 377 378 /* clear BUSY flag */ 379 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 380 381 /* Set Clock-Gating off */ 382 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) 383 vce_v4_0_set_vce_sw_clock_gating(adev, false); 384 */ 385 386 return 0; 387 } 388 389 static int vce_v4_0_early_init(void *handle) 390 { 391 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 392 393 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */ 394 adev->vce.num_rings = 1; 395 else 396 adev->vce.num_rings = 3; 397 398 vce_v4_0_set_ring_funcs(adev); 399 vce_v4_0_set_irq_funcs(adev); 400 401 return 0; 402 } 403 404 static int vce_v4_0_sw_init(void *handle) 405 { 406 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 407 struct amdgpu_ring *ring; 408 unsigned size; 409 int r, i; 410 411 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq); 412 if (r) 413 return r; 414 415 size = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2; 416 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 417 size += VCE_V4_0_FW_SIZE; 418 419 r = amdgpu_vce_sw_init(adev, size); 420 if (r) 421 return r; 422 423 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 424 const struct common_firmware_header *hdr; 425 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 426 427 adev->vce.saved_bo = kmalloc(size, GFP_KERNEL); 428 if (!adev->vce.saved_bo) 429 return -ENOMEM; 430 431 hdr = (const struct common_firmware_header *)adev->vce.fw->data; 432 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; 433 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; 434 adev->firmware.fw_size += 435 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 436 DRM_INFO("PSP loading VCE firmware\n"); 437 } else { 438 r = amdgpu_vce_resume(adev); 439 if (r) 440 return r; 441 } 442 443 for (i = 0; i < adev->vce.num_rings; i++) { 444 ring = &adev->vce.ring[i]; 445 sprintf(ring->name, "vce%d", i); 446 if (amdgpu_sriov_vf(adev)) { 447 /* DOORBELL only works under SRIOV */ 448 ring->use_doorbell = true; 449 450 /* currently only use the first encoding ring for sriov, 451 * so set unused location for other unused rings. 452 */ 453 if (i == 0) 454 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2; 455 else 456 ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1; 457 } 458 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); 459 if (r) 460 return r; 461 } 462 463 r = amdgpu_virt_alloc_mm_table(adev); 464 if (r) 465 return r; 466 467 return r; 468 } 469 470 static int vce_v4_0_sw_fini(void *handle) 471 { 472 int r; 473 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 474 475 /* free MM table */ 476 amdgpu_virt_free_mm_table(adev); 477 478 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 479 kfree(adev->vce.saved_bo); 480 adev->vce.saved_bo = NULL; 481 } 482 483 r = amdgpu_vce_suspend(adev); 484 if (r) 485 return r; 486 487 return amdgpu_vce_sw_fini(adev); 488 } 489 490 static int vce_v4_0_hw_init(void *handle) 491 { 492 int r, i; 493 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 494 495 if (amdgpu_sriov_vf(adev)) 496 r = vce_v4_0_sriov_start(adev); 497 else 498 r = vce_v4_0_start(adev); 499 if (r) 500 return r; 501 502 for (i = 0; i < adev->vce.num_rings; i++) 503 adev->vce.ring[i].ready = false; 504 505 for (i = 0; i < adev->vce.num_rings; i++) { 506 r = amdgpu_ring_test_ring(&adev->vce.ring[i]); 507 if (r) 508 return r; 509 else 510 adev->vce.ring[i].ready = true; 511 } 512 513 DRM_INFO("VCE initialized successfully.\n"); 514 515 return 0; 516 } 517 518 static int vce_v4_0_hw_fini(void *handle) 519 { 520 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 521 int i; 522 523 if (!amdgpu_sriov_vf(adev)) { 524 /* vce_v4_0_wait_for_idle(handle); */ 525 vce_v4_0_stop(adev); 526 } else { 527 /* full access mode, so don't touch any VCE register */ 528 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); 529 } 530 531 for (i = 0; i < adev->vce.num_rings; i++) 532 adev->vce.ring[i].ready = false; 533 534 return 0; 535 } 536 537 static int vce_v4_0_suspend(void *handle) 538 { 539 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 540 int r; 541 542 if (adev->vce.vcpu_bo == NULL) 543 return 0; 544 545 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 546 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 547 void *ptr = adev->vce.cpu_addr; 548 549 memcpy_fromio(adev->vce.saved_bo, ptr, size); 550 } 551 552 r = vce_v4_0_hw_fini(adev); 553 if (r) 554 return r; 555 556 return amdgpu_vce_suspend(adev); 557 } 558 559 static int vce_v4_0_resume(void *handle) 560 { 561 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 562 int r; 563 564 if (adev->vce.vcpu_bo == NULL) 565 return -EINVAL; 566 567 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 568 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); 569 void *ptr = adev->vce.cpu_addr; 570 571 memcpy_toio(ptr, adev->vce.saved_bo, size); 572 } else { 573 r = amdgpu_vce_resume(adev); 574 if (r) 575 return r; 576 } 577 578 return vce_v4_0_hw_init(adev); 579 } 580 581 static void vce_v4_0_mc_resume(struct amdgpu_device *adev) 582 { 583 uint32_t offset, size; 584 585 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16)); 586 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000); 587 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F); 588 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); 589 590 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000); 591 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1); 592 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 593 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 594 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 595 596 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 597 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 598 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8)); 599 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 600 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff); 601 } else { 602 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 603 (adev->vce.gpu_addr >> 8)); 604 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 605 (adev->vce.gpu_addr >> 40) & 0xff); 606 } 607 608 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 609 size = VCE_V4_0_FW_SIZE; 610 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); 611 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 612 613 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8)); 614 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff); 615 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 616 size = VCE_V4_0_STACK_SIZE; 617 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24)); 618 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 619 620 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8)); 621 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff); 622 offset += size; 623 size = VCE_V4_0_DATA_SIZE; 624 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24)); 625 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 626 627 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100); 628 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 629 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 630 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 631 } 632 633 static int vce_v4_0_set_clockgating_state(void *handle, 634 enum amd_clockgating_state state) 635 { 636 /* needed for driver unload*/ 637 return 0; 638 } 639 640 #if 0 641 static bool vce_v4_0_is_idle(void *handle) 642 { 643 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 644 u32 mask = 0; 645 646 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK; 647 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK; 648 649 return !(RREG32(mmSRBM_STATUS2) & mask); 650 } 651 652 static int vce_v4_0_wait_for_idle(void *handle) 653 { 654 unsigned i; 655 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 656 657 for (i = 0; i < adev->usec_timeout; i++) 658 if (vce_v4_0_is_idle(handle)) 659 return 0; 660 661 return -ETIMEDOUT; 662 } 663 664 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */ 665 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */ 666 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */ 667 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \ 668 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK) 669 670 static bool vce_v4_0_check_soft_reset(void *handle) 671 { 672 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 673 u32 srbm_soft_reset = 0; 674 675 /* According to VCE team , we should use VCE_STATUS instead 676 * SRBM_STATUS.VCE_BUSY bit for busy status checking. 677 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE 678 * instance's registers are accessed 679 * (0 for 1st instance, 10 for 2nd instance). 680 * 681 *VCE_STATUS 682 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB | 683 *|----+----+-----------+----+----+----+----------+---------+----| 684 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0| 685 * 686 * VCE team suggest use bit 3--bit 6 for busy status check 687 */ 688 mutex_lock(&adev->grbm_idx_mutex); 689 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 690 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 691 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 692 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 693 } 694 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10); 695 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 696 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 697 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 698 } 699 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 700 mutex_unlock(&adev->grbm_idx_mutex); 701 702 if (srbm_soft_reset) { 703 adev->vce.srbm_soft_reset = srbm_soft_reset; 704 return true; 705 } else { 706 adev->vce.srbm_soft_reset = 0; 707 return false; 708 } 709 } 710 711 static int vce_v4_0_soft_reset(void *handle) 712 { 713 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 714 u32 srbm_soft_reset; 715 716 if (!adev->vce.srbm_soft_reset) 717 return 0; 718 srbm_soft_reset = adev->vce.srbm_soft_reset; 719 720 if (srbm_soft_reset) { 721 u32 tmp; 722 723 tmp = RREG32(mmSRBM_SOFT_RESET); 724 tmp |= srbm_soft_reset; 725 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 726 WREG32(mmSRBM_SOFT_RESET, tmp); 727 tmp = RREG32(mmSRBM_SOFT_RESET); 728 729 udelay(50); 730 731 tmp &= ~srbm_soft_reset; 732 WREG32(mmSRBM_SOFT_RESET, tmp); 733 tmp = RREG32(mmSRBM_SOFT_RESET); 734 735 /* Wait a little for things to settle down */ 736 udelay(50); 737 } 738 739 return 0; 740 } 741 742 static int vce_v4_0_pre_soft_reset(void *handle) 743 { 744 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 745 746 if (!adev->vce.srbm_soft_reset) 747 return 0; 748 749 mdelay(5); 750 751 return vce_v4_0_suspend(adev); 752 } 753 754 755 static int vce_v4_0_post_soft_reset(void *handle) 756 { 757 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 758 759 if (!adev->vce.srbm_soft_reset) 760 return 0; 761 762 mdelay(5); 763 764 return vce_v4_0_resume(adev); 765 } 766 767 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) 768 { 769 u32 tmp, data; 770 771 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL)); 772 if (override) 773 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 774 else 775 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 776 777 if (tmp != data) 778 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data); 779 } 780 781 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, 782 bool gated) 783 { 784 u32 data; 785 786 /* Set Override to disable Clock Gating */ 787 vce_v4_0_override_vce_clock_gating(adev, true); 788 789 /* This function enables MGCG which is controlled by firmware. 790 With the clocks in the gated state the core is still 791 accessible but the firmware will throttle the clocks on the 792 fly as necessary. 793 */ 794 if (gated) { 795 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 796 data |= 0x1ff; 797 data &= ~0xef0000; 798 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 799 800 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 801 data |= 0x3ff000; 802 data &= ~0xffc00000; 803 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 804 805 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 806 data |= 0x2; 807 data &= ~0x00010000; 808 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 809 810 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 811 data |= 0x37f; 812 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 813 814 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 815 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 816 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 817 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 818 0x8; 819 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 820 } else { 821 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 822 data &= ~0x80010; 823 data |= 0xe70008; 824 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 825 826 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 827 data |= 0xffc00000; 828 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 829 830 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 831 data |= 0x10000; 832 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 833 834 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 835 data &= ~0xffc00000; 836 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 837 838 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 839 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 840 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 841 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 842 0x8); 843 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 844 } 845 vce_v4_0_override_vce_clock_gating(adev, false); 846 } 847 848 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) 849 { 850 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL); 851 852 if (enable) 853 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 854 else 855 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 856 857 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp); 858 } 859 860 static int vce_v4_0_set_clockgating_state(void *handle, 861 enum amd_clockgating_state state) 862 { 863 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 864 bool enable = (state == AMD_CG_STATE_GATE) ? true : false; 865 int i; 866 867 if ((adev->asic_type == CHIP_POLARIS10) || 868 (adev->asic_type == CHIP_TONGA) || 869 (adev->asic_type == CHIP_FIJI)) 870 vce_v4_0_set_bypass_mode(adev, enable); 871 872 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) 873 return 0; 874 875 mutex_lock(&adev->grbm_idx_mutex); 876 for (i = 0; i < 2; i++) { 877 /* Program VCE Instance 0 or 1 if not harvested */ 878 if (adev->vce.harvest_config & (1 << i)) 879 continue; 880 881 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i); 882 883 if (enable) { 884 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */ 885 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A); 886 data &= ~(0xf | 0xff0); 887 data |= ((0x0 << 0) | (0x04 << 4)); 888 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data); 889 890 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */ 891 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING); 892 data &= ~(0xf | 0xff0); 893 data |= ((0x0 << 0) | (0x04 << 4)); 894 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data); 895 } 896 897 vce_v4_0_set_vce_sw_clock_gating(adev, enable); 898 } 899 900 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0); 901 mutex_unlock(&adev->grbm_idx_mutex); 902 903 return 0; 904 } 905 906 static int vce_v4_0_set_powergating_state(void *handle, 907 enum amd_powergating_state state) 908 { 909 /* This doesn't actually powergate the VCE block. 910 * That's done in the dpm code via the SMC. This 911 * just re-inits the block as necessary. The actual 912 * gating still happens in the dpm code. We should 913 * revisit this when there is a cleaner line between 914 * the smc and the hw blocks 915 */ 916 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 917 918 if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE)) 919 return 0; 920 921 if (state == AMD_PG_STATE_GATE) 922 /* XXX do we need a vce_v4_0_stop()? */ 923 return 0; 924 else 925 return vce_v4_0_start(adev); 926 } 927 #endif 928 929 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, 930 struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) 931 { 932 amdgpu_ring_write(ring, VCE_CMD_IB_VM); 933 amdgpu_ring_write(ring, vm_id); 934 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 935 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 936 amdgpu_ring_write(ring, ib->length_dw); 937 } 938 939 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 940 u64 seq, unsigned flags) 941 { 942 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 943 944 amdgpu_ring_write(ring, VCE_CMD_FENCE); 945 amdgpu_ring_write(ring, addr); 946 amdgpu_ring_write(ring, upper_32_bits(addr)); 947 amdgpu_ring_write(ring, seq); 948 amdgpu_ring_write(ring, VCE_CMD_TRAP); 949 } 950 951 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) 952 { 953 amdgpu_ring_write(ring, VCE_CMD_END); 954 } 955 956 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, 957 unsigned int vm_id, uint64_t pd_addr) 958 { 959 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 960 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); 961 unsigned eng = ring->vm_inv_eng; 962 963 pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); 964 pd_addr |= AMDGPU_PTE_VALID; 965 966 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 967 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); 968 amdgpu_ring_write(ring, upper_32_bits(pd_addr)); 969 970 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 971 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 972 amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 973 974 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 975 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 976 amdgpu_ring_write(ring, 0xffffffff); 977 amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 978 979 /* flush TLB */ 980 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 981 amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); 982 amdgpu_ring_write(ring, req); 983 984 /* wait for flush */ 985 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 986 amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); 987 amdgpu_ring_write(ring, 1 << vm_id); 988 amdgpu_ring_write(ring, 1 << vm_id); 989 } 990 991 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, 992 struct amdgpu_irq_src *source, 993 unsigned type, 994 enum amdgpu_interrupt_state state) 995 { 996 uint32_t val = 0; 997 998 if (!amdgpu_sriov_vf(adev)) { 999 if (state == AMDGPU_IRQ_STATE_ENABLE) 1000 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; 1001 1002 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, 1003 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 1004 } 1005 return 0; 1006 } 1007 1008 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev, 1009 struct amdgpu_irq_src *source, 1010 struct amdgpu_iv_entry *entry) 1011 { 1012 DRM_DEBUG("IH: VCE\n"); 1013 1014 switch (entry->src_data[0]) { 1015 case 0: 1016 case 1: 1017 case 2: 1018 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); 1019 break; 1020 default: 1021 DRM_ERROR("Unhandled interrupt: %d %d\n", 1022 entry->src_id, entry->src_data[0]); 1023 break; 1024 } 1025 1026 return 0; 1027 } 1028 1029 const struct amd_ip_funcs vce_v4_0_ip_funcs = { 1030 .name = "vce_v4_0", 1031 .early_init = vce_v4_0_early_init, 1032 .late_init = NULL, 1033 .sw_init = vce_v4_0_sw_init, 1034 .sw_fini = vce_v4_0_sw_fini, 1035 .hw_init = vce_v4_0_hw_init, 1036 .hw_fini = vce_v4_0_hw_fini, 1037 .suspend = vce_v4_0_suspend, 1038 .resume = vce_v4_0_resume, 1039 .is_idle = NULL /* vce_v4_0_is_idle */, 1040 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */, 1041 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */, 1042 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */, 1043 .soft_reset = NULL /* vce_v4_0_soft_reset */, 1044 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */, 1045 .set_clockgating_state = vce_v4_0_set_clockgating_state, 1046 .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */, 1047 }; 1048 1049 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { 1050 .type = AMDGPU_RING_TYPE_VCE, 1051 .align_mask = 0x3f, 1052 .nop = VCE_CMD_NO_OP, 1053 .support_64bit_ptrs = false, 1054 .vmhub = AMDGPU_MMHUB, 1055 .get_rptr = vce_v4_0_ring_get_rptr, 1056 .get_wptr = vce_v4_0_ring_get_wptr, 1057 .set_wptr = vce_v4_0_ring_set_wptr, 1058 .parse_cs = amdgpu_vce_ring_parse_cs_vm, 1059 .emit_frame_size = 1060 17 + /* vce_v4_0_emit_vm_flush */ 1061 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 1062 1, /* vce_v4_0_ring_insert_end */ 1063 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ 1064 .emit_ib = vce_v4_0_ring_emit_ib, 1065 .emit_vm_flush = vce_v4_0_emit_vm_flush, 1066 .emit_fence = vce_v4_0_ring_emit_fence, 1067 .test_ring = amdgpu_vce_ring_test_ring, 1068 .test_ib = amdgpu_vce_ring_test_ib, 1069 .insert_nop = amdgpu_ring_insert_nop, 1070 .insert_end = vce_v4_0_ring_insert_end, 1071 .pad_ib = amdgpu_ring_generic_pad_ib, 1072 .begin_use = amdgpu_vce_ring_begin_use, 1073 .end_use = amdgpu_vce_ring_end_use, 1074 }; 1075 1076 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) 1077 { 1078 int i; 1079 1080 for (i = 0; i < adev->vce.num_rings; i++) 1081 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; 1082 DRM_INFO("VCE enabled in VM mode\n"); 1083 } 1084 1085 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = { 1086 .set = vce_v4_0_set_interrupt_state, 1087 .process = vce_v4_0_process_interrupt, 1088 }; 1089 1090 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev) 1091 { 1092 adev->vce.irq.num_types = 1; 1093 adev->vce.irq.funcs = &vce_v4_0_irq_funcs; 1094 }; 1095 1096 const struct amdgpu_ip_block_version vce_v4_0_ip_block = 1097 { 1098 .type = AMD_IP_BLOCK_TYPE_VCE, 1099 .major = 4, 1100 .minor = 0, 1101 .rev = 0, 1102 .funcs = &vce_v4_0_ip_funcs, 1103 }; 1104