1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 27 #include <linux/firmware.h> 28 #include <drm/drmP.h> 29 #include "amdgpu.h" 30 #include "amdgpu_vce.h" 31 #include "soc15d.h" 32 #include "soc15_common.h" 33 #include "mmsch_v1_0.h" 34 35 #include "vega10/soc15ip.h" 36 #include "vega10/VCE/vce_4_0_offset.h" 37 #include "vega10/VCE/vce_4_0_default.h" 38 #include "vega10/VCE/vce_4_0_sh_mask.h" 39 #include "vega10/MMHUB/mmhub_1_0_offset.h" 40 #include "vega10/MMHUB/mmhub_1_0_sh_mask.h" 41 42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 43 44 #define VCE_V4_0_FW_SIZE (384 * 1024) 45 #define VCE_V4_0_STACK_SIZE (64 * 1024) 46 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024)) 47 48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev); 49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); 50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); 51 52 static inline void mmsch_insert_direct_wt(struct mmsch_v1_0_cmd_direct_write *direct_wt, 53 uint32_t *init_table, 54 uint32_t reg_offset, 55 uint32_t value) 56 { 57 direct_wt->cmd_header.reg_offset = reg_offset; 58 direct_wt->reg_value = value; 59 memcpy((void *)init_table, direct_wt, sizeof(struct mmsch_v1_0_cmd_direct_write)); 60 } 61 62 static inline void mmsch_insert_direct_rd_mod_wt(struct mmsch_v1_0_cmd_direct_read_modify_write *direct_rd_mod_wt, 63 uint32_t *init_table, 64 uint32_t reg_offset, 65 uint32_t mask, uint32_t data) 66 { 67 direct_rd_mod_wt->cmd_header.reg_offset = reg_offset; 68 direct_rd_mod_wt->mask_value = mask; 69 direct_rd_mod_wt->write_data = data; 70 memcpy((void *)init_table, direct_rd_mod_wt, 71 sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)); 72 } 73 74 static inline void mmsch_insert_direct_poll(struct mmsch_v1_0_cmd_direct_polling *direct_poll, 75 uint32_t *init_table, 76 uint32_t reg_offset, 77 uint32_t mask, uint32_t wait) 78 { 79 direct_poll->cmd_header.reg_offset = reg_offset; 80 direct_poll->mask_value = mask; 81 direct_poll->wait_value = wait; 82 memcpy((void *)init_table, direct_poll, sizeof(struct mmsch_v1_0_cmd_direct_polling)); 83 } 84 85 #define INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \ 86 mmsch_insert_direct_rd_mod_wt(&direct_rd_mod_wt, \ 87 init_table, (reg), \ 88 (mask), (data)); \ 89 init_table += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \ 90 table_size += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \ 91 } 92 93 #define INSERT_DIRECT_WT(reg, value) { \ 94 mmsch_insert_direct_wt(&direct_wt, \ 95 init_table, (reg), \ 96 (value)); \ 97 init_table += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \ 98 table_size += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \ 99 } 100 101 #define INSERT_DIRECT_POLL(reg, mask, wait) { \ 102 mmsch_insert_direct_poll(&direct_poll, \ 103 init_table, (reg), \ 104 (mask), (wait)); \ 105 init_table += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \ 106 table_size += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \ 107 } 108 109 /** 110 * vce_v4_0_ring_get_rptr - get read pointer 111 * 112 * @ring: amdgpu_ring pointer 113 * 114 * Returns the current hardware read pointer 115 */ 116 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring) 117 { 118 struct amdgpu_device *adev = ring->adev; 119 120 if (ring == &adev->vce.ring[0]) 121 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR)); 122 else if (ring == &adev->vce.ring[1]) 123 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2)); 124 else 125 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3)); 126 } 127 128 /** 129 * vce_v4_0_ring_get_wptr - get write pointer 130 * 131 * @ring: amdgpu_ring pointer 132 * 133 * Returns the current hardware write pointer 134 */ 135 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring) 136 { 137 struct amdgpu_device *adev = ring->adev; 138 139 if (ring->use_doorbell) 140 return adev->wb.wb[ring->wptr_offs]; 141 142 if (ring == &adev->vce.ring[0]) 143 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR)); 144 else if (ring == &adev->vce.ring[1]) 145 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2)); 146 else 147 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3)); 148 } 149 150 /** 151 * vce_v4_0_ring_set_wptr - set write pointer 152 * 153 * @ring: amdgpu_ring pointer 154 * 155 * Commits the write pointer to the hardware 156 */ 157 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring) 158 { 159 struct amdgpu_device *adev = ring->adev; 160 161 if (ring->use_doorbell) { 162 /* XXX check if swapping is necessary on BE */ 163 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 164 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 165 return; 166 } 167 168 if (ring == &adev->vce.ring[0]) 169 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), 170 lower_32_bits(ring->wptr)); 171 else if (ring == &adev->vce.ring[1]) 172 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), 173 lower_32_bits(ring->wptr)); 174 else 175 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), 176 lower_32_bits(ring->wptr)); 177 } 178 179 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev) 180 { 181 int i, j; 182 183 for (i = 0; i < 10; ++i) { 184 for (j = 0; j < 100; ++j) { 185 uint32_t status = 186 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS)); 187 188 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK) 189 return 0; 190 mdelay(10); 191 } 192 193 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); 194 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 195 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 196 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 197 mdelay(10); 198 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 199 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 200 mdelay(10); 201 202 } 203 204 return -ETIMEDOUT; 205 } 206 207 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, 208 struct amdgpu_mm_table *table) 209 { 210 uint32_t data = 0, loop; 211 uint64_t addr = table->gpu_addr; 212 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; 213 uint32_t size; 214 215 size = header->header_size + header->vce_table_size + header->uvd_table_size; 216 217 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ 218 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); 219 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); 220 221 /* 2, update vmid of descriptor */ 222 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); 223 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; 224 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ 225 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); 226 227 /* 3, notify mmsch about the size of this descriptor */ 228 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); 229 230 /* 4, set resp to zero */ 231 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); 232 233 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ 234 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); 235 236 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 237 loop = 1000; 238 while ((data & 0x10000002) != 0x10000002) { 239 udelay(10); 240 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 241 loop--; 242 if (!loop) 243 break; 244 } 245 246 if (!loop) { 247 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); 248 return -EBUSY; 249 } 250 251 return 0; 252 } 253 254 static int vce_v4_0_sriov_start(struct amdgpu_device *adev) 255 { 256 struct amdgpu_ring *ring; 257 uint32_t offset, size; 258 uint32_t table_size = 0; 259 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; 260 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; 261 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; 262 struct mmsch_v1_0_cmd_end end = { { 0 } }; 263 uint32_t *init_table = adev->virt.mm_table.cpu_addr; 264 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; 265 266 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; 267 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 268 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; 269 end.cmd_header.command_type = MMSCH_COMMAND__END; 270 271 if (header->vce_table_offset == 0 && header->vce_table_size == 0) { 272 header->version = MMSCH_VERSION; 273 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; 274 275 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) 276 header->vce_table_offset = header->header_size; 277 else 278 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset; 279 280 init_table += header->vce_table_offset; 281 282 ring = &adev->vce.ring[0]; 283 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), ring->wptr); 284 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), ring->wptr); 285 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), lower_32_bits(ring->gpu_addr)); 286 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 287 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); 288 289 /* BEGING OF MC_RESUME */ 290 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), ~(1 << 16), 0); 291 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), ~0xFF9FF000, 0x1FF000); 292 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), ~0x3F, 0x3F); 293 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); 294 295 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); 296 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); 297 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 298 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 299 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 300 301 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), adev->vce.gpu_addr >> 8); 302 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), adev->vce.gpu_addr >> 8); 303 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), adev->vce.gpu_addr >> 8); 304 305 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 306 size = VCE_V4_0_FW_SIZE; 307 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & 0x7FFFFFFF); 308 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 309 310 offset += size; 311 size = VCE_V4_0_STACK_SIZE; 312 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), offset & 0x7FFFFFFF); 313 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 314 315 offset += size; 316 size = VCE_V4_0_DATA_SIZE; 317 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), offset & 0x7FFFFFFF); 318 INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 319 320 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); 321 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 322 0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 323 324 /* end of MC_RESUME */ 325 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 326 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); 327 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 328 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); 329 330 INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 331 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, 332 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); 333 334 /* clear BUSY flag */ 335 INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 336 ~VCE_STATUS__JOB_BUSY_MASK, 0); 337 338 /* add end packet */ 339 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); 340 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; 341 header->vce_table_size = table_size; 342 343 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); 344 } 345 346 return -EINVAL; /* already initializaed ? */ 347 } 348 349 /** 350 * vce_v4_0_start - start VCE block 351 * 352 * @adev: amdgpu_device pointer 353 * 354 * Setup and start the VCE block 355 */ 356 static int vce_v4_0_start(struct amdgpu_device *adev) 357 { 358 struct amdgpu_ring *ring; 359 int r; 360 361 ring = &adev->vce.ring[0]; 362 363 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr)); 364 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr)); 365 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr); 366 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 367 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); 368 369 ring = &adev->vce.ring[1]; 370 371 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr)); 372 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr)); 373 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr); 374 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); 375 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4); 376 377 ring = &adev->vce.ring[2]; 378 379 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr)); 380 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr)); 381 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr); 382 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr)); 383 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4); 384 385 vce_v4_0_mc_resume(adev); 386 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK, 387 ~VCE_STATUS__JOB_BUSY_MASK); 388 389 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001); 390 391 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, 392 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 393 mdelay(100); 394 395 r = vce_v4_0_firmware_loaded(adev); 396 397 /* clear BUSY flag */ 398 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 399 400 if (r) { 401 DRM_ERROR("VCE not responding, giving up!!!\n"); 402 return r; 403 } 404 405 return 0; 406 } 407 408 static int vce_v4_0_stop(struct amdgpu_device *adev) 409 { 410 411 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); 412 413 /* hold on ECPU */ 414 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 415 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 416 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); 417 418 /* clear BUSY flag */ 419 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); 420 421 /* Set Clock-Gating off */ 422 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) 423 vce_v4_0_set_vce_sw_clock_gating(adev, false); 424 */ 425 426 return 0; 427 } 428 429 static int vce_v4_0_early_init(void *handle) 430 { 431 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 432 433 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */ 434 adev->vce.num_rings = 1; 435 else 436 adev->vce.num_rings = 3; 437 438 vce_v4_0_set_ring_funcs(adev); 439 vce_v4_0_set_irq_funcs(adev); 440 441 return 0; 442 } 443 444 static int vce_v4_0_sw_init(void *handle) 445 { 446 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 447 struct amdgpu_ring *ring; 448 unsigned size; 449 int r, i; 450 451 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq); 452 if (r) 453 return r; 454 455 size = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2; 456 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 457 size += VCE_V4_0_FW_SIZE; 458 459 r = amdgpu_vce_sw_init(adev, size); 460 if (r) 461 return r; 462 463 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 464 const struct common_firmware_header *hdr; 465 hdr = (const struct common_firmware_header *)adev->vce.fw->data; 466 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; 467 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; 468 adev->firmware.fw_size += 469 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 470 DRM_INFO("PSP loading VCE firmware\n"); 471 } 472 473 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 474 r = amdgpu_vce_resume(adev); 475 if (r) 476 return r; 477 } 478 479 for (i = 0; i < adev->vce.num_rings; i++) { 480 ring = &adev->vce.ring[i]; 481 sprintf(ring->name, "vce%d", i); 482 if (amdgpu_sriov_vf(adev)) { 483 /* DOORBELL only works under SRIOV */ 484 ring->use_doorbell = true; 485 if (i == 0) 486 ring->doorbell_index = AMDGPU_DOORBELL64_RING0_1 * 2; 487 else if (i == 1) 488 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2; 489 else 490 ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2 + 1; 491 } 492 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); 493 if (r) 494 return r; 495 } 496 497 if (amdgpu_sriov_vf(adev)) { 498 r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, 499 AMDGPU_GEM_DOMAIN_VRAM, 500 &adev->virt.mm_table.bo, 501 &adev->virt.mm_table.gpu_addr, 502 (void *)&adev->virt.mm_table.cpu_addr); 503 if (!r) { 504 memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE); 505 printk("mm table gpu addr = 0x%llx, cpu addr = %p. \n", 506 adev->virt.mm_table.gpu_addr, 507 adev->virt.mm_table.cpu_addr); 508 } 509 return r; 510 } 511 512 return r; 513 } 514 515 static int vce_v4_0_sw_fini(void *handle) 516 { 517 int r; 518 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 519 520 /* free MM table */ 521 if (amdgpu_sriov_vf(adev)) 522 amdgpu_bo_free_kernel(&adev->virt.mm_table.bo, 523 &adev->virt.mm_table.gpu_addr, 524 (void *)&adev->virt.mm_table.cpu_addr); 525 526 r = amdgpu_vce_suspend(adev); 527 if (r) 528 return r; 529 530 return amdgpu_vce_sw_fini(adev); 531 } 532 533 static int vce_v4_0_hw_init(void *handle) 534 { 535 int r, i; 536 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 537 538 if (amdgpu_sriov_vf(adev)) 539 r = vce_v4_0_sriov_start(adev); 540 else 541 r = vce_v4_0_start(adev); 542 if (r) 543 return r; 544 545 for (i = 0; i < adev->vce.num_rings; i++) 546 adev->vce.ring[i].ready = false; 547 548 for (i = 0; i < adev->vce.num_rings; i++) { 549 r = amdgpu_ring_test_ring(&adev->vce.ring[i]); 550 if (r) 551 return r; 552 else 553 adev->vce.ring[i].ready = true; 554 } 555 556 DRM_INFO("VCE initialized successfully.\n"); 557 558 return 0; 559 } 560 561 static int vce_v4_0_hw_fini(void *handle) 562 { 563 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 564 int i; 565 566 /* vce_v4_0_wait_for_idle(handle); */ 567 vce_v4_0_stop(adev); 568 for (i = 0; i < adev->vce.num_rings; i++) 569 adev->vce.ring[i].ready = false; 570 571 return 0; 572 } 573 574 static int vce_v4_0_suspend(void *handle) 575 { 576 int r; 577 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 578 579 r = vce_v4_0_hw_fini(adev); 580 if (r) 581 return r; 582 583 return amdgpu_vce_suspend(adev); 584 } 585 586 static int vce_v4_0_resume(void *handle) 587 { 588 int r; 589 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 590 591 r = amdgpu_vce_resume(adev); 592 if (r) 593 return r; 594 595 return vce_v4_0_hw_init(adev); 596 } 597 598 static void vce_v4_0_mc_resume(struct amdgpu_device *adev) 599 { 600 uint32_t offset, size; 601 602 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16)); 603 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000); 604 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F); 605 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); 606 607 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000); 608 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1); 609 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); 610 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); 611 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); 612 613 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 614 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 615 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8)); 616 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 617 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff); 618 } else { 619 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), 620 (adev->vce.gpu_addr >> 8)); 621 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), 622 (adev->vce.gpu_addr >> 40) & 0xff); 623 } 624 625 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 626 size = VCE_V4_0_FW_SIZE; 627 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); 628 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); 629 630 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8)); 631 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff); 632 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; 633 size = VCE_V4_0_STACK_SIZE; 634 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24)); 635 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); 636 637 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8)); 638 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff); 639 offset += size; 640 size = VCE_V4_0_DATA_SIZE; 641 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24)); 642 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); 643 644 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100); 645 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), 646 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, 647 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 648 } 649 650 static int vce_v4_0_set_clockgating_state(void *handle, 651 enum amd_clockgating_state state) 652 { 653 /* needed for driver unload*/ 654 return 0; 655 } 656 657 #if 0 658 static bool vce_v4_0_is_idle(void *handle) 659 { 660 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 661 u32 mask = 0; 662 663 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK; 664 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK; 665 666 return !(RREG32(mmSRBM_STATUS2) & mask); 667 } 668 669 static int vce_v4_0_wait_for_idle(void *handle) 670 { 671 unsigned i; 672 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 673 674 for (i = 0; i < adev->usec_timeout; i++) 675 if (vce_v4_0_is_idle(handle)) 676 return 0; 677 678 return -ETIMEDOUT; 679 } 680 681 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */ 682 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */ 683 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */ 684 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \ 685 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK) 686 687 static bool vce_v4_0_check_soft_reset(void *handle) 688 { 689 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 690 u32 srbm_soft_reset = 0; 691 692 /* According to VCE team , we should use VCE_STATUS instead 693 * SRBM_STATUS.VCE_BUSY bit for busy status checking. 694 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE 695 * instance's registers are accessed 696 * (0 for 1st instance, 10 for 2nd instance). 697 * 698 *VCE_STATUS 699 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB | 700 *|----+----+-----------+----+----+----+----------+---------+----| 701 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0| 702 * 703 * VCE team suggest use bit 3--bit 6 for busy status check 704 */ 705 mutex_lock(&adev->grbm_idx_mutex); 706 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 707 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 708 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 709 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 710 } 711 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10); 712 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { 713 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); 714 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); 715 } 716 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); 717 mutex_unlock(&adev->grbm_idx_mutex); 718 719 if (srbm_soft_reset) { 720 adev->vce.srbm_soft_reset = srbm_soft_reset; 721 return true; 722 } else { 723 adev->vce.srbm_soft_reset = 0; 724 return false; 725 } 726 } 727 728 static int vce_v4_0_soft_reset(void *handle) 729 { 730 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 731 u32 srbm_soft_reset; 732 733 if (!adev->vce.srbm_soft_reset) 734 return 0; 735 srbm_soft_reset = adev->vce.srbm_soft_reset; 736 737 if (srbm_soft_reset) { 738 u32 tmp; 739 740 tmp = RREG32(mmSRBM_SOFT_RESET); 741 tmp |= srbm_soft_reset; 742 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 743 WREG32(mmSRBM_SOFT_RESET, tmp); 744 tmp = RREG32(mmSRBM_SOFT_RESET); 745 746 udelay(50); 747 748 tmp &= ~srbm_soft_reset; 749 WREG32(mmSRBM_SOFT_RESET, tmp); 750 tmp = RREG32(mmSRBM_SOFT_RESET); 751 752 /* Wait a little for things to settle down */ 753 udelay(50); 754 } 755 756 return 0; 757 } 758 759 static int vce_v4_0_pre_soft_reset(void *handle) 760 { 761 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 762 763 if (!adev->vce.srbm_soft_reset) 764 return 0; 765 766 mdelay(5); 767 768 return vce_v4_0_suspend(adev); 769 } 770 771 772 static int vce_v4_0_post_soft_reset(void *handle) 773 { 774 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 775 776 if (!adev->vce.srbm_soft_reset) 777 return 0; 778 779 mdelay(5); 780 781 return vce_v4_0_resume(adev); 782 } 783 784 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) 785 { 786 u32 tmp, data; 787 788 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL)); 789 if (override) 790 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 791 else 792 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; 793 794 if (tmp != data) 795 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data); 796 } 797 798 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, 799 bool gated) 800 { 801 u32 data; 802 803 /* Set Override to disable Clock Gating */ 804 vce_v4_0_override_vce_clock_gating(adev, true); 805 806 /* This function enables MGCG which is controlled by firmware. 807 With the clocks in the gated state the core is still 808 accessible but the firmware will throttle the clocks on the 809 fly as necessary. 810 */ 811 if (gated) { 812 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 813 data |= 0x1ff; 814 data &= ~0xef0000; 815 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 816 817 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 818 data |= 0x3ff000; 819 data &= ~0xffc00000; 820 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 821 822 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 823 data |= 0x2; 824 data &= ~0x00010000; 825 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 826 827 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 828 data |= 0x37f; 829 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 830 831 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 832 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 833 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 834 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 835 0x8; 836 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 837 } else { 838 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); 839 data &= ~0x80010; 840 data |= 0xe70008; 841 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); 842 843 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); 844 data |= 0xffc00000; 845 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); 846 847 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); 848 data |= 0x10000; 849 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); 850 851 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); 852 data &= ~0xffc00000; 853 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); 854 855 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); 856 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | 857 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | 858 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | 859 0x8); 860 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); 861 } 862 vce_v4_0_override_vce_clock_gating(adev, false); 863 } 864 865 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) 866 { 867 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL); 868 869 if (enable) 870 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 871 else 872 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; 873 874 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp); 875 } 876 877 static int vce_v4_0_set_clockgating_state(void *handle, 878 enum amd_clockgating_state state) 879 { 880 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 881 bool enable = (state == AMD_CG_STATE_GATE) ? true : false; 882 int i; 883 884 if ((adev->asic_type == CHIP_POLARIS10) || 885 (adev->asic_type == CHIP_TONGA) || 886 (adev->asic_type == CHIP_FIJI)) 887 vce_v4_0_set_bypass_mode(adev, enable); 888 889 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) 890 return 0; 891 892 mutex_lock(&adev->grbm_idx_mutex); 893 for (i = 0; i < 2; i++) { 894 /* Program VCE Instance 0 or 1 if not harvested */ 895 if (adev->vce.harvest_config & (1 << i)) 896 continue; 897 898 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i); 899 900 if (enable) { 901 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */ 902 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A); 903 data &= ~(0xf | 0xff0); 904 data |= ((0x0 << 0) | (0x04 << 4)); 905 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data); 906 907 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */ 908 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING); 909 data &= ~(0xf | 0xff0); 910 data |= ((0x0 << 0) | (0x04 << 4)); 911 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data); 912 } 913 914 vce_v4_0_set_vce_sw_clock_gating(adev, enable); 915 } 916 917 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0); 918 mutex_unlock(&adev->grbm_idx_mutex); 919 920 return 0; 921 } 922 923 static int vce_v4_0_set_powergating_state(void *handle, 924 enum amd_powergating_state state) 925 { 926 /* This doesn't actually powergate the VCE block. 927 * That's done in the dpm code via the SMC. This 928 * just re-inits the block as necessary. The actual 929 * gating still happens in the dpm code. We should 930 * revisit this when there is a cleaner line between 931 * the smc and the hw blocks 932 */ 933 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 934 935 if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE)) 936 return 0; 937 938 if (state == AMD_PG_STATE_GATE) 939 /* XXX do we need a vce_v4_0_stop()? */ 940 return 0; 941 else 942 return vce_v4_0_start(adev); 943 } 944 #endif 945 946 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, 947 struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) 948 { 949 amdgpu_ring_write(ring, VCE_CMD_IB_VM); 950 amdgpu_ring_write(ring, vm_id); 951 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 952 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 953 amdgpu_ring_write(ring, ib->length_dw); 954 } 955 956 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 957 u64 seq, unsigned flags) 958 { 959 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 960 961 amdgpu_ring_write(ring, VCE_CMD_FENCE); 962 amdgpu_ring_write(ring, addr); 963 amdgpu_ring_write(ring, upper_32_bits(addr)); 964 amdgpu_ring_write(ring, seq); 965 amdgpu_ring_write(ring, VCE_CMD_TRAP); 966 } 967 968 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) 969 { 970 amdgpu_ring_write(ring, VCE_CMD_END); 971 } 972 973 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, 974 unsigned int vm_id, uint64_t pd_addr) 975 { 976 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); 977 unsigned eng = ring->idx; 978 unsigned i; 979 980 pd_addr = pd_addr | 0x1; /* valid bit */ 981 /* now only use physical base address of PDE and valid */ 982 BUG_ON(pd_addr & 0xFFFF00000000003EULL); 983 984 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 985 struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; 986 987 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 988 amdgpu_ring_write(ring, 989 (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); 990 amdgpu_ring_write(ring, upper_32_bits(pd_addr)); 991 992 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 993 amdgpu_ring_write(ring, 994 (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 995 amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 996 997 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 998 amdgpu_ring_write(ring, 999 (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); 1000 amdgpu_ring_write(ring, 0xffffffff); 1001 amdgpu_ring_write(ring, lower_32_bits(pd_addr)); 1002 1003 /* flush TLB */ 1004 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 1005 amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); 1006 amdgpu_ring_write(ring, req); 1007 1008 /* wait for flush */ 1009 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); 1010 amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); 1011 amdgpu_ring_write(ring, 1 << vm_id); 1012 amdgpu_ring_write(ring, 1 << vm_id); 1013 } 1014 } 1015 1016 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, 1017 struct amdgpu_irq_src *source, 1018 unsigned type, 1019 enum amdgpu_interrupt_state state) 1020 { 1021 uint32_t val = 0; 1022 1023 if (state == AMDGPU_IRQ_STATE_ENABLE) 1024 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; 1025 1026 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, 1027 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); 1028 return 0; 1029 } 1030 1031 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev, 1032 struct amdgpu_irq_src *source, 1033 struct amdgpu_iv_entry *entry) 1034 { 1035 DRM_DEBUG("IH: VCE\n"); 1036 1037 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS), 1038 VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK, 1039 ~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK); 1040 1041 switch (entry->src_data[0]) { 1042 case 0: 1043 case 1: 1044 case 2: 1045 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); 1046 break; 1047 default: 1048 DRM_ERROR("Unhandled interrupt: %d %d\n", 1049 entry->src_id, entry->src_data[0]); 1050 break; 1051 } 1052 1053 return 0; 1054 } 1055 1056 const struct amd_ip_funcs vce_v4_0_ip_funcs = { 1057 .name = "vce_v4_0", 1058 .early_init = vce_v4_0_early_init, 1059 .late_init = NULL, 1060 .sw_init = vce_v4_0_sw_init, 1061 .sw_fini = vce_v4_0_sw_fini, 1062 .hw_init = vce_v4_0_hw_init, 1063 .hw_fini = vce_v4_0_hw_fini, 1064 .suspend = vce_v4_0_suspend, 1065 .resume = vce_v4_0_resume, 1066 .is_idle = NULL /* vce_v4_0_is_idle */, 1067 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */, 1068 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */, 1069 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */, 1070 .soft_reset = NULL /* vce_v4_0_soft_reset */, 1071 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */, 1072 .set_clockgating_state = vce_v4_0_set_clockgating_state, 1073 .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */, 1074 }; 1075 1076 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { 1077 .type = AMDGPU_RING_TYPE_VCE, 1078 .align_mask = 0x3f, 1079 .nop = VCE_CMD_NO_OP, 1080 .support_64bit_ptrs = false, 1081 .get_rptr = vce_v4_0_ring_get_rptr, 1082 .get_wptr = vce_v4_0_ring_get_wptr, 1083 .set_wptr = vce_v4_0_ring_set_wptr, 1084 .parse_cs = amdgpu_vce_ring_parse_cs_vm, 1085 .emit_frame_size = 1086 17 * AMDGPU_MAX_VMHUBS + /* vce_v4_0_emit_vm_flush */ 1087 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 1088 1, /* vce_v4_0_ring_insert_end */ 1089 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ 1090 .emit_ib = vce_v4_0_ring_emit_ib, 1091 .emit_vm_flush = vce_v4_0_emit_vm_flush, 1092 .emit_fence = vce_v4_0_ring_emit_fence, 1093 .test_ring = amdgpu_vce_ring_test_ring, 1094 .test_ib = amdgpu_vce_ring_test_ib, 1095 .insert_nop = amdgpu_ring_insert_nop, 1096 .insert_end = vce_v4_0_ring_insert_end, 1097 .pad_ib = amdgpu_ring_generic_pad_ib, 1098 .begin_use = amdgpu_vce_ring_begin_use, 1099 .end_use = amdgpu_vce_ring_end_use, 1100 }; 1101 1102 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) 1103 { 1104 int i; 1105 1106 for (i = 0; i < adev->vce.num_rings; i++) 1107 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; 1108 DRM_INFO("VCE enabled in VM mode\n"); 1109 } 1110 1111 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = { 1112 .set = vce_v4_0_set_interrupt_state, 1113 .process = vce_v4_0_process_interrupt, 1114 }; 1115 1116 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev) 1117 { 1118 adev->vce.irq.num_types = 1; 1119 adev->vce.irq.funcs = &vce_v4_0_irq_funcs; 1120 }; 1121 1122 const struct amdgpu_ip_block_version vce_v4_0_ip_block = 1123 { 1124 .type = AMD_IP_BLOCK_TYPE_VCE, 1125 .major = 4, 1126 .minor = 0, 1127 .rev = 0, 1128 .funcs = &vce_v4_0_ip_funcs, 1129 }; 1130