1 /* 2 * Copyright 2022 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include "amdgpu.h" 25 #include "amdgpu_jpeg.h" 26 #include "soc15.h" 27 #include "soc15d.h" 28 #include "jpeg_v2_0.h" 29 #include "jpeg_v4_0_3.h" 30 #include "mmsch_v4_0_3.h" 31 32 #include "vcn/vcn_4_0_3_offset.h" 33 #include "vcn/vcn_4_0_3_sh_mask.h" 34 #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" 35 36 #define NORMALIZE_JPEG_REG_OFFSET(offset) \ 37 (offset & 0x1FFFF) 38 39 enum jpeg_engin_status { 40 UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0, 41 UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2, 42 }; 43 44 static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev); 45 static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); 46 static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block, 47 enum amd_powergating_state state); 48 static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev); 49 static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring); 50 51 static int amdgpu_ih_srcid_jpeg[] = { 52 VCN_4_0__SRCID__JPEG_DECODE, 53 VCN_4_0__SRCID__JPEG1_DECODE, 54 VCN_4_0__SRCID__JPEG2_DECODE, 55 VCN_4_0__SRCID__JPEG3_DECODE, 56 VCN_4_0__SRCID__JPEG4_DECODE, 57 VCN_4_0__SRCID__JPEG5_DECODE, 58 VCN_4_0__SRCID__JPEG6_DECODE, 59 VCN_4_0__SRCID__JPEG7_DECODE 60 }; 61 62 static const struct amdgpu_hwip_reg_entry jpeg_reg_list_4_0_3[] = { 63 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS), 64 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT), 65 SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_SYS_INT_STATUS), 66 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR), 67 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR), 68 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS), 69 SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE), 70 SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG), 71 SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE), 72 SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE), 73 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH), 74 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH), 75 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_RPTR), 76 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_WPTR), 77 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_STATUS), 78 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_RPTR), 79 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_WPTR), 80 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_STATUS), 81 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_RPTR), 82 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_WPTR), 83 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_STATUS), 84 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_RPTR), 85 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_WPTR), 86 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_STATUS), 87 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_RPTR), 88 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_WPTR), 89 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_STATUS), 90 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_RPTR), 91 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_WPTR), 92 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_STATUS), 93 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_RPTR), 94 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_WPTR), 95 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_STATUS), 96 }; 97 98 static inline bool jpeg_v4_0_3_normalizn_reqd(struct amdgpu_device *adev) 99 { 100 return (adev->jpeg.caps & AMDGPU_JPEG_CAPS(RRMT_ENABLED)) == 0; 101 } 102 103 static inline int jpeg_v4_0_3_core_reg_offset(u32 pipe) 104 { 105 if (pipe) 106 return ((0x40 * pipe) - 0xc80); 107 else 108 return 0; 109 } 110 111 /** 112 * jpeg_v4_0_3_early_init - set function pointers 113 * 114 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 115 * 116 * Set ring and irq function pointers 117 */ 118 static int jpeg_v4_0_3_early_init(struct amdgpu_ip_block *ip_block) 119 { 120 struct amdgpu_device *adev = ip_block->adev; 121 122 adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3; 123 124 jpeg_v4_0_3_set_dec_ring_funcs(adev); 125 jpeg_v4_0_3_set_irq_funcs(adev); 126 jpeg_v4_0_3_set_ras_funcs(adev); 127 128 return 0; 129 } 130 131 /** 132 * jpeg_v4_0_3_sw_init - sw init for JPEG block 133 * 134 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 135 * 136 * Load firmware and sw initialization 137 */ 138 static int jpeg_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) 139 { 140 struct amdgpu_device *adev = ip_block->adev; 141 struct amdgpu_ring *ring; 142 int i, j, r, jpeg_inst; 143 144 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 145 /* JPEG TRAP */ 146 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 147 amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq); 148 if (r) 149 return r; 150 } 151 152 r = amdgpu_jpeg_sw_init(adev); 153 if (r) 154 return r; 155 156 r = amdgpu_jpeg_resume(adev); 157 if (r) 158 return r; 159 160 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 161 jpeg_inst = GET_INST(JPEG, i); 162 163 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 164 ring = &adev->jpeg.inst[i].ring_dec[j]; 165 ring->use_doorbell = true; 166 ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); 167 if (!amdgpu_sriov_vf(adev)) { 168 ring->doorbell_index = 169 (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 170 1 + j + 9 * jpeg_inst; 171 } else { 172 if (j < 4) 173 ring->doorbell_index = 174 (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 175 4 + j + 32 * jpeg_inst; 176 else 177 ring->doorbell_index = 178 (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 179 8 + j + 32 * jpeg_inst; 180 } 181 sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j); 182 r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, 183 AMDGPU_RING_PRIO_DEFAULT, NULL); 184 if (r) 185 return r; 186 187 adev->jpeg.internal.jpeg_pitch[j] = 188 regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET; 189 adev->jpeg.inst[i].external.jpeg_pitch[j] = 190 SOC15_REG_OFFSET1(JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_SCRATCH0, 191 jpeg_v4_0_3_core_reg_offset(j)); 192 } 193 } 194 195 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) { 196 r = amdgpu_jpeg_ras_sw_init(adev); 197 if (r) { 198 dev_err(adev->dev, "Failed to initialize jpeg ras block!\n"); 199 return r; 200 } 201 } 202 203 r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_4_0_3, ARRAY_SIZE(jpeg_reg_list_4_0_3)); 204 if (r) 205 return r; 206 207 /* TODO: Add queue reset mask when FW fully supports it */ 208 adev->jpeg.supported_reset = 209 amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); 210 r = amdgpu_jpeg_sysfs_reset_mask_init(adev); 211 if (r) 212 return r; 213 214 return 0; 215 } 216 217 /** 218 * jpeg_v4_0_3_sw_fini - sw fini for JPEG block 219 * 220 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 221 * 222 * JPEG suspend and free up sw allocation 223 */ 224 static int jpeg_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) 225 { 226 struct amdgpu_device *adev = ip_block->adev; 227 int r; 228 229 r = amdgpu_jpeg_suspend(adev); 230 if (r) 231 return r; 232 233 amdgpu_jpeg_sysfs_reset_mask_fini(adev); 234 r = amdgpu_jpeg_sw_fini(adev); 235 236 return r; 237 } 238 239 static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev) 240 { 241 struct amdgpu_ring *ring; 242 uint64_t ctx_addr; 243 uint32_t param, resp, expected; 244 uint32_t tmp, timeout; 245 246 struct amdgpu_mm_table *table = &adev->virt.mm_table; 247 uint32_t *table_loc; 248 uint32_t table_size; 249 uint32_t size, size_dw, item_offset; 250 uint32_t init_status; 251 int i, j, jpeg_inst; 252 253 struct mmsch_v4_0_cmd_direct_write 254 direct_wt = { {0} }; 255 struct mmsch_v4_0_cmd_end end = { {0} }; 256 struct mmsch_v4_0_3_init_header header; 257 258 direct_wt.cmd_header.command_type = 259 MMSCH_COMMAND__DIRECT_REG_WRITE; 260 end.cmd_header.command_type = 261 MMSCH_COMMAND__END; 262 263 for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { 264 jpeg_inst = GET_INST(JPEG, i); 265 266 memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header)); 267 header.version = MMSCH_VERSION; 268 header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2; 269 270 table_loc = (uint32_t *)table->cpu_addr; 271 table_loc += header.total_size; 272 273 item_offset = header.total_size; 274 275 for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) { 276 ring = &adev->jpeg.inst[i].ring_dec[j]; 277 table_size = 0; 278 279 tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW); 280 MMSCH_V4_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr)); 281 tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH); 282 MMSCH_V4_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr)); 283 tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE); 284 MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4); 285 286 if (j <= 3) { 287 header.mjpegdec0[j].table_offset = item_offset; 288 header.mjpegdec0[j].init_status = 0; 289 header.mjpegdec0[j].table_size = table_size; 290 } else { 291 header.mjpegdec1[j - 4].table_offset = item_offset; 292 header.mjpegdec1[j - 4].init_status = 0; 293 header.mjpegdec1[j - 4].table_size = table_size; 294 } 295 header.total_size += table_size; 296 item_offset += table_size; 297 } 298 299 MMSCH_V4_0_INSERT_END(); 300 301 /* send init table to MMSCH */ 302 size = sizeof(struct mmsch_v4_0_3_init_header); 303 table_loc = (uint32_t *)table->cpu_addr; 304 memcpy((void *)table_loc, &header, size); 305 306 ctx_addr = table->gpu_addr; 307 WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); 308 WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); 309 310 tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID); 311 tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; 312 tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); 313 WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp); 314 315 size = header.total_size; 316 WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size); 317 318 WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0); 319 320 param = 0x00000001; 321 WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param); 322 tmp = 0; 323 timeout = 1000; 324 resp = 0; 325 expected = MMSCH_VF_MAILBOX_RESP__OK; 326 init_status = 327 ((struct mmsch_v4_0_3_init_header *)(table_loc))->mjpegdec0[i].init_status; 328 while (resp != expected) { 329 resp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP); 330 331 if (resp != 0) 332 break; 333 udelay(10); 334 tmp = tmp + 10; 335 if (tmp >= timeout) { 336 DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\ 337 " waiting for regMMSCH_VF_MAILBOX_RESP "\ 338 "(expected=0x%08x, readback=0x%08x)\n", 339 tmp, expected, resp); 340 return -EBUSY; 341 } 342 } 343 if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE && 344 init_status != MMSCH_VF_ENGINE_STATUS__PASS) 345 DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n", 346 resp, init_status); 347 348 } 349 return 0; 350 } 351 352 /** 353 * jpeg_v4_0_3_hw_init - start and test JPEG block 354 * 355 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 356 * 357 */ 358 static int jpeg_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block) 359 { 360 struct amdgpu_device *adev = ip_block->adev; 361 struct amdgpu_ring *ring; 362 int i, j, r, jpeg_inst; 363 364 if (amdgpu_sriov_vf(adev)) { 365 r = jpeg_v4_0_3_start_sriov(adev); 366 if (r) 367 return r; 368 369 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 370 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 371 ring = &adev->jpeg.inst[i].ring_dec[j]; 372 ring->wptr = 0; 373 ring->wptr_old = 0; 374 jpeg_v4_0_3_dec_ring_set_wptr(ring); 375 ring->sched.ready = true; 376 } 377 } 378 } else { 379 /* This flag is not set for VF, assumed to be disabled always */ 380 if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 381 0x100) 382 adev->jpeg.caps |= AMDGPU_JPEG_CAPS(RRMT_ENABLED); 383 384 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 385 jpeg_inst = GET_INST(JPEG, i); 386 387 ring = adev->jpeg.inst[i].ring_dec; 388 389 if (ring->use_doorbell) 390 adev->nbio.funcs->vcn_doorbell_range( 391 adev, ring->use_doorbell, 392 (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 393 9 * jpeg_inst, 394 adev->jpeg.inst[i].aid_id); 395 396 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 397 ring = &adev->jpeg.inst[i].ring_dec[j]; 398 if (ring->use_doorbell) 399 WREG32_SOC15_OFFSET( 400 VCN, GET_INST(VCN, i), 401 regVCN_JPEG_DB_CTRL, 402 (ring->pipe ? (ring->pipe - 0x15) : 0), 403 ring->doorbell_index 404 << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | 405 VCN_JPEG_DB_CTRL__EN_MASK); 406 r = amdgpu_ring_test_helper(ring); 407 if (r) 408 return r; 409 } 410 } 411 } 412 413 return 0; 414 } 415 416 /** 417 * jpeg_v4_0_3_hw_fini - stop the hardware block 418 * 419 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 420 * 421 * Stop the JPEG block, mark ring as not ready any more 422 */ 423 static int jpeg_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block) 424 { 425 struct amdgpu_device *adev = ip_block->adev; 426 int ret = 0; 427 428 cancel_delayed_work_sync(&adev->jpeg.idle_work); 429 430 if (!amdgpu_sriov_vf(adev)) { 431 if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) 432 ret = jpeg_v4_0_3_set_powergating_state(ip_block, AMD_PG_STATE_GATE); 433 } 434 435 return ret; 436 } 437 438 /** 439 * jpeg_v4_0_3_suspend - suspend JPEG block 440 * 441 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 442 * 443 * HW fini and suspend JPEG block 444 */ 445 static int jpeg_v4_0_3_suspend(struct amdgpu_ip_block *ip_block) 446 { 447 int r; 448 449 r = jpeg_v4_0_3_hw_fini(ip_block); 450 if (r) 451 return r; 452 453 r = amdgpu_jpeg_suspend(ip_block->adev); 454 455 return r; 456 } 457 458 /** 459 * jpeg_v4_0_3_resume - resume JPEG block 460 * 461 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 462 * 463 * Resume firmware and hw init JPEG block 464 */ 465 static int jpeg_v4_0_3_resume(struct amdgpu_ip_block *ip_block) 466 { 467 int r; 468 469 r = amdgpu_jpeg_resume(ip_block->adev); 470 if (r) 471 return r; 472 473 r = jpeg_v4_0_3_hw_init(ip_block); 474 475 return r; 476 } 477 478 static void jpeg_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx) 479 { 480 int i, jpeg_inst; 481 uint32_t data; 482 483 jpeg_inst = GET_INST(JPEG, inst_idx); 484 data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL); 485 if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { 486 data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 487 data &= (~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1)); 488 } else { 489 data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 490 } 491 492 data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; 493 data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; 494 WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data); 495 496 data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE); 497 data &= ~(JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK); 498 for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) 499 data &= ~(JPEG_CGC_GATE__JPEG0_DEC_MASK << i); 500 WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data); 501 } 502 503 static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx) 504 { 505 int i, jpeg_inst; 506 uint32_t data; 507 508 jpeg_inst = GET_INST(JPEG, inst_idx); 509 data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL); 510 if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { 511 data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 512 data |= (JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1); 513 } else { 514 data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 515 } 516 517 data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; 518 data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; 519 WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data); 520 521 data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE); 522 data |= (JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK); 523 for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) 524 data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK << i); 525 WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data); 526 } 527 528 static void jpeg_v4_0_3_start_inst(struct amdgpu_device *adev, int inst) 529 { 530 int jpeg_inst = GET_INST(JPEG, inst); 531 532 WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG, 533 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); 534 SOC15_WAIT_ON_RREG(JPEG, jpeg_inst, regUVD_PGFSM_STATUS, 535 UVD_PGFSM_STATUS__UVDJ_PWR_ON << 536 UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, 537 UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); 538 539 /* disable anti hang mechanism */ 540 WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 541 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); 542 543 /* JPEG disable CGC */ 544 jpeg_v4_0_3_disable_clock_gating(adev, inst); 545 546 /* MJPEG global tiling registers */ 547 WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX8_ADDR_CONFIG, 548 adev->gfx.config.gb_addr_config); 549 WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX10_ADDR_CONFIG, 550 adev->gfx.config.gb_addr_config); 551 552 /* enable JMI channel */ 553 WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0, 554 ~UVD_JMI_CNTL__SOFT_RESET_MASK); 555 } 556 557 static void jpeg_v4_0_3_start_jrbc(struct amdgpu_ring *ring) 558 { 559 struct amdgpu_device *adev = ring->adev; 560 int jpeg_inst = GET_INST(JPEG, ring->me); 561 int reg_offset = jpeg_v4_0_3_core_reg_offset(ring->pipe); 562 563 /* enable System Interrupt for JRBC */ 564 WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regJPEG_SYS_INT_EN), 565 JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe, 566 ~(JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe)); 567 568 WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 569 regUVD_JMI0_UVD_LMI_JRBC_RB_VMID, 570 reg_offset, 0); 571 WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 572 regUVD_JRBC0_UVD_JRBC_RB_CNTL, 573 reg_offset, 574 (0x00000001L | 0x00000002L)); 575 WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 576 regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW, 577 reg_offset, lower_32_bits(ring->gpu_addr)); 578 WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 579 regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH, 580 reg_offset, upper_32_bits(ring->gpu_addr)); 581 WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 582 regUVD_JRBC0_UVD_JRBC_RB_RPTR, 583 reg_offset, 0); 584 WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 585 regUVD_JRBC0_UVD_JRBC_RB_WPTR, 586 reg_offset, 0); 587 WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 588 regUVD_JRBC0_UVD_JRBC_RB_CNTL, 589 reg_offset, 0x00000002L); 590 WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 591 regUVD_JRBC0_UVD_JRBC_RB_SIZE, 592 reg_offset, ring->ring_size / 4); 593 ring->wptr = RREG32_SOC15_OFFSET(JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_RB_WPTR, 594 reg_offset); 595 } 596 597 /** 598 * jpeg_v4_0_3_start - start JPEG block 599 * 600 * @adev: amdgpu_device pointer 601 * 602 * Setup and start the JPEG block 603 */ 604 static int jpeg_v4_0_3_start(struct amdgpu_device *adev) 605 { 606 struct amdgpu_ring *ring; 607 int i, j; 608 609 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 610 jpeg_v4_0_3_start_inst(adev, i); 611 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 612 ring = &adev->jpeg.inst[i].ring_dec[j]; 613 jpeg_v4_0_3_start_jrbc(ring); 614 } 615 } 616 617 return 0; 618 } 619 620 static void jpeg_v4_0_3_stop_inst(struct amdgpu_device *adev, int inst) 621 { 622 int jpeg_inst = GET_INST(JPEG, inst); 623 /* reset JMI */ 624 WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 625 UVD_JMI_CNTL__SOFT_RESET_MASK, 626 ~UVD_JMI_CNTL__SOFT_RESET_MASK); 627 628 jpeg_v4_0_3_enable_clock_gating(adev, inst); 629 630 /* enable anti hang mechanism */ 631 WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 632 UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, 633 ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); 634 635 WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG, 636 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); 637 SOC15_WAIT_ON_RREG(JPEG, jpeg_inst, regUVD_PGFSM_STATUS, 638 UVD_PGFSM_STATUS__UVDJ_PWR_OFF << 639 UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, 640 UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); 641 } 642 643 /** 644 * jpeg_v4_0_3_stop - stop JPEG block 645 * 646 * @adev: amdgpu_device pointer 647 * 648 * stop the JPEG block 649 */ 650 static int jpeg_v4_0_3_stop(struct amdgpu_device *adev) 651 { 652 int i; 653 654 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) 655 jpeg_v4_0_3_stop_inst(adev, i); 656 657 return 0; 658 } 659 660 /** 661 * jpeg_v4_0_3_dec_ring_get_rptr - get read pointer 662 * 663 * @ring: amdgpu_ring pointer 664 * 665 * Returns the current hardware read pointer 666 */ 667 static uint64_t jpeg_v4_0_3_dec_ring_get_rptr(struct amdgpu_ring *ring) 668 { 669 struct amdgpu_device *adev = ring->adev; 670 671 return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_RPTR, 672 jpeg_v4_0_3_core_reg_offset(ring->pipe)); 673 } 674 675 /** 676 * jpeg_v4_0_3_dec_ring_get_wptr - get write pointer 677 * 678 * @ring: amdgpu_ring pointer 679 * 680 * Returns the current hardware write pointer 681 */ 682 static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring) 683 { 684 struct amdgpu_device *adev = ring->adev; 685 686 if (ring->use_doorbell) 687 return adev->wb.wb[ring->wptr_offs]; 688 689 return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_WPTR, 690 jpeg_v4_0_3_core_reg_offset(ring->pipe)); 691 } 692 693 static void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) 694 { 695 /* JPEG engine access for HDP flush doesn't work when RRMT is enabled. 696 * This is a workaround to avoid any HDP flush through JPEG ring. 697 */ 698 } 699 700 /** 701 * jpeg_v4_0_3_dec_ring_set_wptr - set write pointer 702 * 703 * @ring: amdgpu_ring pointer 704 * 705 * Commits the write pointer to the hardware 706 */ 707 static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring) 708 { 709 struct amdgpu_device *adev = ring->adev; 710 711 if (ring->use_doorbell) { 712 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 713 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 714 } else { 715 WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_WPTR, 716 jpeg_v4_0_3_core_reg_offset(ring->pipe), 717 lower_32_bits(ring->wptr)); 718 } 719 } 720 721 /** 722 * jpeg_v4_0_3_dec_ring_insert_start - insert a start command 723 * 724 * @ring: amdgpu_ring pointer 725 * 726 * Write a start command to the ring. 727 */ 728 void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) 729 { 730 if (!amdgpu_sriov_vf(ring->adev)) { 731 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 732 0, 0, PACKETJ_TYPE0)); 733 amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */ 734 735 amdgpu_ring_write(ring, 736 PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, 737 0, PACKETJ_TYPE0)); 738 amdgpu_ring_write(ring, 0x80004000); 739 } 740 } 741 742 /** 743 * jpeg_v4_0_3_dec_ring_insert_end - insert a end command 744 * 745 * @ring: amdgpu_ring pointer 746 * 747 * Write a end command to the ring. 748 */ 749 void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) 750 { 751 if (!amdgpu_sriov_vf(ring->adev)) { 752 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 753 0, 0, PACKETJ_TYPE0)); 754 amdgpu_ring_write(ring, 0x62a04); 755 756 amdgpu_ring_write(ring, 757 PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, 758 0, PACKETJ_TYPE0)); 759 amdgpu_ring_write(ring, 0x00004000); 760 } 761 } 762 763 /** 764 * jpeg_v4_0_3_dec_ring_emit_fence - emit an fence & trap command 765 * 766 * @ring: amdgpu_ring pointer 767 * @addr: address 768 * @seq: sequence number 769 * @flags: fence related flags 770 * 771 * Write a fence and a trap command to the ring. 772 */ 773 void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 774 unsigned int flags) 775 { 776 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 777 778 amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET, 779 0, 0, PACKETJ_TYPE0)); 780 amdgpu_ring_write(ring, seq); 781 782 amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET, 783 0, 0, PACKETJ_TYPE0)); 784 amdgpu_ring_write(ring, seq); 785 786 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET, 787 0, 0, PACKETJ_TYPE0)); 788 amdgpu_ring_write(ring, lower_32_bits(addr)); 789 790 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET, 791 0, 0, PACKETJ_TYPE0)); 792 amdgpu_ring_write(ring, upper_32_bits(addr)); 793 794 amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, 795 0, 0, PACKETJ_TYPE0)); 796 amdgpu_ring_write(ring, 0x8); 797 798 amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, 799 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); 800 amdgpu_ring_write(ring, 0); 801 802 amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); 803 amdgpu_ring_write(ring, 0); 804 805 amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); 806 amdgpu_ring_write(ring, 0); 807 808 amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); 809 amdgpu_ring_write(ring, 0); 810 } 811 812 /** 813 * jpeg_v4_0_3_dec_ring_emit_ib - execute indirect buffer 814 * 815 * @ring: amdgpu_ring pointer 816 * @job: job to retrieve vmid from 817 * @ib: indirect buffer to execute 818 * @flags: unused 819 * 820 * Write ring commands to execute the indirect buffer. 821 */ 822 void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, 823 struct amdgpu_job *job, 824 struct amdgpu_ib *ib, 825 uint32_t flags) 826 { 827 unsigned int vmid = AMDGPU_JOB_GET_VMID(job); 828 829 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 830 0, 0, PACKETJ_TYPE0)); 831 832 if (ring->funcs->parse_cs) 833 amdgpu_ring_write(ring, 0); 834 else 835 amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); 836 837 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 838 0, 0, PACKETJ_TYPE0)); 839 amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); 840 841 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 842 0, 0, PACKETJ_TYPE0)); 843 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 844 845 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, 846 0, 0, PACKETJ_TYPE0)); 847 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 848 849 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_IB_SIZE_INTERNAL_OFFSET, 850 0, 0, PACKETJ_TYPE0)); 851 amdgpu_ring_write(ring, ib->length_dw); 852 853 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET, 854 0, 0, PACKETJ_TYPE0)); 855 amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); 856 857 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET, 858 0, 0, PACKETJ_TYPE0)); 859 amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); 860 861 amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); 862 amdgpu_ring_write(ring, 0); 863 864 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, 865 0, 0, PACKETJ_TYPE0)); 866 amdgpu_ring_write(ring, 0x01400200); 867 868 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, 869 0, 0, PACKETJ_TYPE0)); 870 amdgpu_ring_write(ring, 0x2); 871 872 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_STATUS_INTERNAL_OFFSET, 873 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); 874 amdgpu_ring_write(ring, 0x2); 875 } 876 877 void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 878 uint32_t val, uint32_t mask) 879 { 880 uint32_t reg_offset; 881 882 /* Use normalized offsets if required */ 883 if (jpeg_v4_0_3_normalizn_reqd(ring->adev)) 884 reg = NORMALIZE_JPEG_REG_OFFSET(reg); 885 886 reg_offset = (reg << 2); 887 888 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, 889 0, 0, PACKETJ_TYPE0)); 890 amdgpu_ring_write(ring, 0x01400200); 891 892 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, 893 0, 0, PACKETJ_TYPE0)); 894 amdgpu_ring_write(ring, val); 895 896 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 897 0, 0, PACKETJ_TYPE0)); 898 if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { 899 amdgpu_ring_write(ring, 0); 900 amdgpu_ring_write(ring, 901 PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); 902 } else { 903 amdgpu_ring_write(ring, reg_offset); 904 amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 905 0, 0, PACKETJ_TYPE3)); 906 } 907 amdgpu_ring_write(ring, mask); 908 } 909 910 void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, 911 unsigned int vmid, uint64_t pd_addr) 912 { 913 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; 914 uint32_t data0, data1, mask; 915 916 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 917 918 /* wait for register write */ 919 data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; 920 data1 = lower_32_bits(pd_addr); 921 mask = 0xffffffff; 922 jpeg_v4_0_3_dec_ring_emit_reg_wait(ring, data0, data1, mask); 923 } 924 925 void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) 926 { 927 uint32_t reg_offset; 928 929 /* Use normalized offsets if required */ 930 if (jpeg_v4_0_3_normalizn_reqd(ring->adev)) 931 reg = NORMALIZE_JPEG_REG_OFFSET(reg); 932 933 reg_offset = (reg << 2); 934 935 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 936 0, 0, PACKETJ_TYPE0)); 937 if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { 938 amdgpu_ring_write(ring, 0); 939 amdgpu_ring_write(ring, 940 PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); 941 } else { 942 amdgpu_ring_write(ring, reg_offset); 943 amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 944 0, 0, PACKETJ_TYPE0)); 945 } 946 amdgpu_ring_write(ring, val); 947 } 948 949 void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) 950 { 951 int i; 952 953 WARN_ON(ring->wptr % 2 || count % 2); 954 955 for (i = 0; i < count / 2; i++) { 956 amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); 957 amdgpu_ring_write(ring, 0); 958 } 959 } 960 961 static bool jpeg_v4_0_3_is_idle(void *handle) 962 { 963 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 964 bool ret = false; 965 int i, j; 966 967 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 968 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 969 ret &= ((RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, i), 970 regUVD_JRBC0_UVD_JRBC_STATUS, jpeg_v4_0_3_core_reg_offset(j)) & 971 UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == 972 UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); 973 } 974 } 975 976 return ret; 977 } 978 979 static int jpeg_v4_0_3_wait_for_idle(struct amdgpu_ip_block *ip_block) 980 { 981 struct amdgpu_device *adev = ip_block->adev; 982 int ret = 0; 983 int i, j; 984 985 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 986 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 987 ret &= (SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, i), 988 regUVD_JRBC0_UVD_JRBC_STATUS, jpeg_v4_0_3_core_reg_offset(j), 989 UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK, 990 UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK)); 991 } 992 } 993 return ret; 994 } 995 996 static int jpeg_v4_0_3_set_clockgating_state(struct amdgpu_ip_block *ip_block, 997 enum amd_clockgating_state state) 998 { 999 struct amdgpu_device *adev = ip_block->adev; 1000 bool enable = state == AMD_CG_STATE_GATE; 1001 int i; 1002 1003 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 1004 if (enable) { 1005 if (!jpeg_v4_0_3_is_idle(adev)) 1006 return -EBUSY; 1007 jpeg_v4_0_3_enable_clock_gating(adev, i); 1008 } else { 1009 jpeg_v4_0_3_disable_clock_gating(adev, i); 1010 } 1011 } 1012 return 0; 1013 } 1014 1015 static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block, 1016 enum amd_powergating_state state) 1017 { 1018 struct amdgpu_device *adev = ip_block->adev; 1019 int ret; 1020 1021 if (amdgpu_sriov_vf(adev)) { 1022 adev->jpeg.cur_state = AMD_PG_STATE_UNGATE; 1023 return 0; 1024 } 1025 1026 if (state == adev->jpeg.cur_state) 1027 return 0; 1028 1029 if (state == AMD_PG_STATE_GATE) 1030 ret = jpeg_v4_0_3_stop(adev); 1031 else 1032 ret = jpeg_v4_0_3_start(adev); 1033 1034 if (!ret) 1035 adev->jpeg.cur_state = state; 1036 1037 return ret; 1038 } 1039 1040 static int jpeg_v4_0_3_set_interrupt_state(struct amdgpu_device *adev, 1041 struct amdgpu_irq_src *source, 1042 unsigned int type, 1043 enum amdgpu_interrupt_state state) 1044 { 1045 return 0; 1046 } 1047 1048 static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev, 1049 struct amdgpu_irq_src *source, 1050 struct amdgpu_iv_entry *entry) 1051 { 1052 uint32_t i, inst; 1053 1054 i = node_id_to_phys_map[entry->node_id]; 1055 DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n"); 1056 1057 for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst) 1058 if (adev->jpeg.inst[inst].aid_id == i) 1059 break; 1060 1061 if (inst >= adev->jpeg.num_jpeg_inst) { 1062 dev_WARN_ONCE(adev->dev, 1, 1063 "Interrupt received for unknown JPEG instance %d", 1064 entry->node_id); 1065 return 0; 1066 } 1067 1068 switch (entry->src_id) { 1069 case VCN_4_0__SRCID__JPEG_DECODE: 1070 amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]); 1071 break; 1072 case VCN_4_0__SRCID__JPEG1_DECODE: 1073 amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]); 1074 break; 1075 case VCN_4_0__SRCID__JPEG2_DECODE: 1076 amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]); 1077 break; 1078 case VCN_4_0__SRCID__JPEG3_DECODE: 1079 amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]); 1080 break; 1081 case VCN_4_0__SRCID__JPEG4_DECODE: 1082 amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]); 1083 break; 1084 case VCN_4_0__SRCID__JPEG5_DECODE: 1085 amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]); 1086 break; 1087 case VCN_4_0__SRCID__JPEG6_DECODE: 1088 amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]); 1089 break; 1090 case VCN_4_0__SRCID__JPEG7_DECODE: 1091 amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]); 1092 break; 1093 default: 1094 DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", 1095 entry->src_id, entry->src_data[0]); 1096 break; 1097 } 1098 1099 return 0; 1100 } 1101 1102 static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { 1103 .name = "jpeg_v4_0_3", 1104 .early_init = jpeg_v4_0_3_early_init, 1105 .sw_init = jpeg_v4_0_3_sw_init, 1106 .sw_fini = jpeg_v4_0_3_sw_fini, 1107 .hw_init = jpeg_v4_0_3_hw_init, 1108 .hw_fini = jpeg_v4_0_3_hw_fini, 1109 .suspend = jpeg_v4_0_3_suspend, 1110 .resume = jpeg_v4_0_3_resume, 1111 .is_idle = jpeg_v4_0_3_is_idle, 1112 .wait_for_idle = jpeg_v4_0_3_wait_for_idle, 1113 .set_clockgating_state = jpeg_v4_0_3_set_clockgating_state, 1114 .set_powergating_state = jpeg_v4_0_3_set_powergating_state, 1115 .dump_ip_state = amdgpu_jpeg_dump_ip_state, 1116 .print_ip_state = amdgpu_jpeg_print_ip_state, 1117 }; 1118 1119 static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { 1120 .type = AMDGPU_RING_TYPE_VCN_JPEG, 1121 .align_mask = 0xf, 1122 .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, 1123 .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, 1124 .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, 1125 .parse_cs = jpeg_v2_dec_ring_parse_cs, 1126 .emit_frame_size = 1127 SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + 1128 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + 1129 8 + /* jpeg_v4_0_3_dec_ring_emit_vm_flush */ 1130 18 + 18 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */ 1131 8 + 16, 1132 .emit_ib_size = 22, /* jpeg_v4_0_3_dec_ring_emit_ib */ 1133 .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, 1134 .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, 1135 .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, 1136 .emit_hdp_flush = jpeg_v4_0_3_ring_emit_hdp_flush, 1137 .test_ring = amdgpu_jpeg_dec_ring_test_ring, 1138 .test_ib = amdgpu_jpeg_dec_ring_test_ib, 1139 .insert_nop = jpeg_v4_0_3_dec_ring_nop, 1140 .insert_start = jpeg_v4_0_3_dec_ring_insert_start, 1141 .insert_end = jpeg_v4_0_3_dec_ring_insert_end, 1142 .pad_ib = amdgpu_ring_generic_pad_ib, 1143 .begin_use = amdgpu_jpeg_ring_begin_use, 1144 .end_use = amdgpu_jpeg_ring_end_use, 1145 .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, 1146 .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, 1147 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 1148 }; 1149 1150 static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev) 1151 { 1152 int i, j, jpeg_inst; 1153 1154 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 1155 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 1156 adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v4_0_3_dec_ring_vm_funcs; 1157 adev->jpeg.inst[i].ring_dec[j].me = i; 1158 adev->jpeg.inst[i].ring_dec[j].pipe = j; 1159 } 1160 jpeg_inst = GET_INST(JPEG, i); 1161 adev->jpeg.inst[i].aid_id = 1162 jpeg_inst / adev->jpeg.num_inst_per_aid; 1163 } 1164 } 1165 1166 static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = { 1167 .set = jpeg_v4_0_3_set_interrupt_state, 1168 .process = jpeg_v4_0_3_process_interrupt, 1169 }; 1170 1171 static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) 1172 { 1173 int i; 1174 1175 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 1176 adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings; 1177 } 1178 adev->jpeg.inst->irq.funcs = &jpeg_v4_0_3_irq_funcs; 1179 } 1180 1181 const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block = { 1182 .type = AMD_IP_BLOCK_TYPE_JPEG, 1183 .major = 4, 1184 .minor = 0, 1185 .rev = 3, 1186 .funcs = &jpeg_v4_0_3_ip_funcs, 1187 }; 1188 1189 static const struct amdgpu_ras_err_status_reg_entry jpeg_v4_0_3_ue_reg_list[] = { 1190 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0S, regVCN_UE_ERR_STATUS_HI_JPEG0S), 1191 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0S"}, 1192 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0D, regVCN_UE_ERR_STATUS_HI_JPEG0D), 1193 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0D"}, 1194 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1S, regVCN_UE_ERR_STATUS_HI_JPEG1S), 1195 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1S"}, 1196 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1D, regVCN_UE_ERR_STATUS_HI_JPEG1D), 1197 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1D"}, 1198 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2S, regVCN_UE_ERR_STATUS_HI_JPEG2S), 1199 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2S"}, 1200 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2D, regVCN_UE_ERR_STATUS_HI_JPEG2D), 1201 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2D"}, 1202 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3S, regVCN_UE_ERR_STATUS_HI_JPEG3S), 1203 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3S"}, 1204 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3D, regVCN_UE_ERR_STATUS_HI_JPEG3D), 1205 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3D"}, 1206 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4S, regVCN_UE_ERR_STATUS_HI_JPEG4S), 1207 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4S"}, 1208 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4D, regVCN_UE_ERR_STATUS_HI_JPEG4D), 1209 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4D"}, 1210 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5S, regVCN_UE_ERR_STATUS_HI_JPEG5S), 1211 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5S"}, 1212 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5D, regVCN_UE_ERR_STATUS_HI_JPEG5D), 1213 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5D"}, 1214 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6S, regVCN_UE_ERR_STATUS_HI_JPEG6S), 1215 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6S"}, 1216 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6D, regVCN_UE_ERR_STATUS_HI_JPEG6D), 1217 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6D"}, 1218 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7S, regVCN_UE_ERR_STATUS_HI_JPEG7S), 1219 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7S"}, 1220 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7D, regVCN_UE_ERR_STATUS_HI_JPEG7D), 1221 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7D"}, 1222 }; 1223 1224 static void jpeg_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev, 1225 uint32_t jpeg_inst, 1226 void *ras_err_status) 1227 { 1228 struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status; 1229 1230 /* jpeg v4_0_3 only support uncorrectable errors */ 1231 amdgpu_ras_inst_query_ras_error_count(adev, 1232 jpeg_v4_0_3_ue_reg_list, 1233 ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list), 1234 NULL, 0, GET_INST(VCN, jpeg_inst), 1235 AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, 1236 &err_data->ue_count); 1237 } 1238 1239 static void jpeg_v4_0_3_query_ras_error_count(struct amdgpu_device *adev, 1240 void *ras_err_status) 1241 { 1242 uint32_t i; 1243 1244 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) { 1245 dev_warn(adev->dev, "JPEG RAS is not supported\n"); 1246 return; 1247 } 1248 1249 for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) 1250 jpeg_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status); 1251 } 1252 1253 static void jpeg_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev, 1254 uint32_t jpeg_inst) 1255 { 1256 amdgpu_ras_inst_reset_ras_error_count(adev, 1257 jpeg_v4_0_3_ue_reg_list, 1258 ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list), 1259 GET_INST(VCN, jpeg_inst)); 1260 } 1261 1262 static void jpeg_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev) 1263 { 1264 uint32_t i; 1265 1266 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) { 1267 dev_warn(adev->dev, "JPEG RAS is not supported\n"); 1268 return; 1269 } 1270 1271 for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) 1272 jpeg_v4_0_3_inst_reset_ras_error_count(adev, i); 1273 } 1274 1275 static const struct amdgpu_ras_block_hw_ops jpeg_v4_0_3_ras_hw_ops = { 1276 .query_ras_error_count = jpeg_v4_0_3_query_ras_error_count, 1277 .reset_ras_error_count = jpeg_v4_0_3_reset_ras_error_count, 1278 }; 1279 1280 static int jpeg_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, 1281 enum aca_smu_type type, void *data) 1282 { 1283 struct aca_bank_info info; 1284 u64 misc0; 1285 int ret; 1286 1287 ret = aca_bank_info_decode(bank, &info); 1288 if (ret) 1289 return ret; 1290 1291 misc0 = bank->regs[ACA_REG_IDX_MISC0]; 1292 switch (type) { 1293 case ACA_SMU_TYPE_UE: 1294 bank->aca_err_type = ACA_ERROR_TYPE_UE; 1295 ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, 1296 1ULL); 1297 break; 1298 case ACA_SMU_TYPE_CE: 1299 bank->aca_err_type = ACA_ERROR_TYPE_CE; 1300 ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, 1301 ACA_REG__MISC0__ERRCNT(misc0)); 1302 break; 1303 default: 1304 return -EINVAL; 1305 } 1306 1307 return ret; 1308 } 1309 1310 /* reference to smu driver if header file */ 1311 static int jpeg_v4_0_3_err_codes[] = { 1312 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-7][S|D] */ 1313 24, 25, 26, 27, 28, 29, 30, 31 1314 }; 1315 1316 static bool jpeg_v4_0_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, 1317 enum aca_smu_type type, void *data) 1318 { 1319 u32 instlo; 1320 1321 instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); 1322 instlo &= GENMASK(31, 1); 1323 1324 if (instlo != mmSMNAID_AID0_MCA_SMU) 1325 return false; 1326 1327 if (aca_bank_check_error_codes(handle->adev, bank, 1328 jpeg_v4_0_3_err_codes, 1329 ARRAY_SIZE(jpeg_v4_0_3_err_codes))) 1330 return false; 1331 1332 return true; 1333 } 1334 1335 static const struct aca_bank_ops jpeg_v4_0_3_aca_bank_ops = { 1336 .aca_bank_parser = jpeg_v4_0_3_aca_bank_parser, 1337 .aca_bank_is_valid = jpeg_v4_0_3_aca_bank_is_valid, 1338 }; 1339 1340 static const struct aca_info jpeg_v4_0_3_aca_info = { 1341 .hwip = ACA_HWIP_TYPE_SMU, 1342 .mask = ACA_ERROR_UE_MASK, 1343 .bank_ops = &jpeg_v4_0_3_aca_bank_ops, 1344 }; 1345 1346 static int jpeg_v4_0_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) 1347 { 1348 int r; 1349 1350 r = amdgpu_ras_block_late_init(adev, ras_block); 1351 if (r) 1352 return r; 1353 1354 r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG, 1355 &jpeg_v4_0_3_aca_info, NULL); 1356 if (r) 1357 goto late_fini; 1358 1359 return 0; 1360 1361 late_fini: 1362 amdgpu_ras_block_late_fini(adev, ras_block); 1363 1364 return r; 1365 } 1366 1367 static struct amdgpu_jpeg_ras jpeg_v4_0_3_ras = { 1368 .ras_block = { 1369 .hw_ops = &jpeg_v4_0_3_ras_hw_ops, 1370 .ras_late_init = jpeg_v4_0_3_ras_late_init, 1371 }, 1372 }; 1373 1374 static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) 1375 { 1376 adev->jpeg.ras = &jpeg_v4_0_3_ras; 1377 } 1378