1 /* 2 * Copyright 2022 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include "amdgpu.h" 25 #include "amdgpu_jpeg.h" 26 #include "soc15.h" 27 #include "soc15d.h" 28 #include "jpeg_v2_0.h" 29 #include "jpeg_v4_0_3.h" 30 #include "mmsch_v4_0_3.h" 31 32 #include "vcn/vcn_4_0_3_offset.h" 33 #include "vcn/vcn_4_0_3_sh_mask.h" 34 #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" 35 36 #define NORMALIZE_JPEG_REG_OFFSET(offset) \ 37 (offset & 0x1FFFF) 38 39 enum jpeg_engin_status { 40 UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0, 41 UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2, 42 }; 43 44 static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev); 45 static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); 46 static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block, 47 enum amd_powergating_state state); 48 static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev); 49 static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring); 50 51 static int amdgpu_ih_srcid_jpeg[] = { 52 VCN_4_0__SRCID__JPEG_DECODE, 53 VCN_4_0__SRCID__JPEG1_DECODE, 54 VCN_4_0__SRCID__JPEG2_DECODE, 55 VCN_4_0__SRCID__JPEG3_DECODE, 56 VCN_4_0__SRCID__JPEG4_DECODE, 57 VCN_4_0__SRCID__JPEG5_DECODE, 58 VCN_4_0__SRCID__JPEG6_DECODE, 59 VCN_4_0__SRCID__JPEG7_DECODE 60 }; 61 62 static const struct amdgpu_hwip_reg_entry jpeg_reg_list_4_0_3[] = { 63 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS), 64 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT), 65 SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_SYS_INT_STATUS), 66 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR), 67 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR), 68 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS), 69 SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE), 70 SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG), 71 SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE), 72 SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE), 73 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH), 74 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH), 75 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_RPTR), 76 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_WPTR), 77 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_STATUS), 78 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_RPTR), 79 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_WPTR), 80 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_STATUS), 81 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_RPTR), 82 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_WPTR), 83 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_STATUS), 84 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_RPTR), 85 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_WPTR), 86 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_STATUS), 87 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_RPTR), 88 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_WPTR), 89 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_STATUS), 90 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_RPTR), 91 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_WPTR), 92 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_STATUS), 93 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_RPTR), 94 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_WPTR), 95 SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_STATUS), 96 }; 97 98 static inline bool jpeg_v4_0_3_normalizn_reqd(struct amdgpu_device *adev) 99 { 100 return (adev->jpeg.caps & AMDGPU_JPEG_CAPS(RRMT_ENABLED)) == 0; 101 } 102 103 static inline int jpeg_v4_0_3_core_reg_offset(u32 pipe) 104 { 105 if (pipe) 106 return ((0x40 * pipe) - 0xc80); 107 else 108 return 0; 109 } 110 111 /** 112 * jpeg_v4_0_3_early_init - set function pointers 113 * 114 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 115 * 116 * Set ring and irq function pointers 117 */ 118 static int jpeg_v4_0_3_early_init(struct amdgpu_ip_block *ip_block) 119 { 120 struct amdgpu_device *adev = ip_block->adev; 121 122 adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3; 123 124 jpeg_v4_0_3_set_dec_ring_funcs(adev); 125 jpeg_v4_0_3_set_irq_funcs(adev); 126 jpeg_v4_0_3_set_ras_funcs(adev); 127 128 return 0; 129 } 130 131 /** 132 * jpeg_v4_0_3_sw_init - sw init for JPEG block 133 * 134 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 135 * 136 * Load firmware and sw initialization 137 */ 138 static int jpeg_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) 139 { 140 struct amdgpu_device *adev = ip_block->adev; 141 struct amdgpu_ring *ring; 142 int i, j, r, jpeg_inst; 143 144 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 145 /* JPEG TRAP */ 146 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 147 amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq); 148 if (r) 149 return r; 150 } 151 152 r = amdgpu_jpeg_sw_init(adev); 153 if (r) 154 return r; 155 156 r = amdgpu_jpeg_resume(adev); 157 if (r) 158 return r; 159 160 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 161 jpeg_inst = GET_INST(JPEG, i); 162 163 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 164 ring = &adev->jpeg.inst[i].ring_dec[j]; 165 ring->use_doorbell = true; 166 ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); 167 if (!amdgpu_sriov_vf(adev)) { 168 ring->doorbell_index = 169 (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 170 1 + j + 9 * jpeg_inst; 171 } else { 172 if (j < 4) 173 ring->doorbell_index = 174 (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 175 4 + j + 32 * jpeg_inst; 176 else 177 ring->doorbell_index = 178 (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 179 8 + j + 32 * jpeg_inst; 180 } 181 sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j); 182 r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, 183 AMDGPU_RING_PRIO_DEFAULT, NULL); 184 if (r) 185 return r; 186 187 adev->jpeg.internal.jpeg_pitch[j] = 188 regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET; 189 adev->jpeg.inst[i].external.jpeg_pitch[j] = 190 SOC15_REG_OFFSET1(JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_SCRATCH0, 191 jpeg_v4_0_3_core_reg_offset(j)); 192 } 193 } 194 195 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) { 196 r = amdgpu_jpeg_ras_sw_init(adev); 197 if (r) { 198 dev_err(adev->dev, "Failed to initialize jpeg ras block!\n"); 199 return r; 200 } 201 } 202 203 r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_4_0_3, ARRAY_SIZE(jpeg_reg_list_4_0_3)); 204 if (r) 205 return r; 206 207 /* TODO: Add queue reset mask when FW fully supports it */ 208 adev->jpeg.supported_reset = 209 amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); 210 r = amdgpu_jpeg_sysfs_reset_mask_init(adev); 211 if (r) 212 return r; 213 214 return 0; 215 } 216 217 /** 218 * jpeg_v4_0_3_sw_fini - sw fini for JPEG block 219 * 220 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 221 * 222 * JPEG suspend and free up sw allocation 223 */ 224 static int jpeg_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) 225 { 226 struct amdgpu_device *adev = ip_block->adev; 227 int r; 228 229 r = amdgpu_jpeg_suspend(adev); 230 if (r) 231 return r; 232 233 amdgpu_jpeg_sysfs_reset_mask_fini(adev); 234 r = amdgpu_jpeg_sw_fini(adev); 235 236 return r; 237 } 238 239 static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev) 240 { 241 struct amdgpu_ring *ring; 242 uint64_t ctx_addr; 243 uint32_t param, resp, expected; 244 uint32_t tmp, timeout; 245 246 struct amdgpu_mm_table *table = &adev->virt.mm_table; 247 uint32_t *table_loc; 248 uint32_t table_size; 249 uint32_t size, size_dw, item_offset; 250 uint32_t init_status; 251 int i, j, jpeg_inst; 252 253 struct mmsch_v4_0_cmd_direct_write 254 direct_wt = { {0} }; 255 struct mmsch_v4_0_cmd_end end = { {0} }; 256 struct mmsch_v4_0_3_init_header header; 257 258 direct_wt.cmd_header.command_type = 259 MMSCH_COMMAND__DIRECT_REG_WRITE; 260 end.cmd_header.command_type = 261 MMSCH_COMMAND__END; 262 263 for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { 264 jpeg_inst = GET_INST(JPEG, i); 265 266 memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header)); 267 header.version = MMSCH_VERSION; 268 header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2; 269 270 table_loc = (uint32_t *)table->cpu_addr; 271 table_loc += header.total_size; 272 273 item_offset = header.total_size; 274 275 for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) { 276 ring = &adev->jpeg.inst[i].ring_dec[j]; 277 table_size = 0; 278 279 tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW); 280 MMSCH_V4_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr)); 281 tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH); 282 MMSCH_V4_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr)); 283 tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE); 284 MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4); 285 286 if (j <= 3) { 287 header.mjpegdec0[j].table_offset = item_offset; 288 header.mjpegdec0[j].init_status = 0; 289 header.mjpegdec0[j].table_size = table_size; 290 } else { 291 header.mjpegdec1[j - 4].table_offset = item_offset; 292 header.mjpegdec1[j - 4].init_status = 0; 293 header.mjpegdec1[j - 4].table_size = table_size; 294 } 295 header.total_size += table_size; 296 item_offset += table_size; 297 } 298 299 MMSCH_V4_0_INSERT_END(); 300 301 /* send init table to MMSCH */ 302 size = sizeof(struct mmsch_v4_0_3_init_header); 303 table_loc = (uint32_t *)table->cpu_addr; 304 memcpy((void *)table_loc, &header, size); 305 306 ctx_addr = table->gpu_addr; 307 WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); 308 WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); 309 310 tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID); 311 tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; 312 tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); 313 WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp); 314 315 size = header.total_size; 316 WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size); 317 318 WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0); 319 320 param = 0x00000001; 321 WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param); 322 tmp = 0; 323 timeout = 1000; 324 resp = 0; 325 expected = MMSCH_VF_MAILBOX_RESP__OK; 326 init_status = 327 ((struct mmsch_v4_0_3_init_header *)(table_loc))->mjpegdec0[i].init_status; 328 while (resp != expected) { 329 resp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP); 330 331 if (resp != 0) 332 break; 333 udelay(10); 334 tmp = tmp + 10; 335 if (tmp >= timeout) { 336 DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\ 337 " waiting for regMMSCH_VF_MAILBOX_RESP "\ 338 "(expected=0x%08x, readback=0x%08x)\n", 339 tmp, expected, resp); 340 return -EBUSY; 341 } 342 } 343 if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE && 344 init_status != MMSCH_VF_ENGINE_STATUS__PASS) 345 DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n", 346 resp, init_status); 347 348 } 349 return 0; 350 } 351 352 /** 353 * jpeg_v4_0_3_hw_init - start and test JPEG block 354 * 355 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 356 * 357 */ 358 static int jpeg_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block) 359 { 360 struct amdgpu_device *adev = ip_block->adev; 361 struct amdgpu_ring *ring; 362 int i, j, r, jpeg_inst; 363 364 if (amdgpu_sriov_vf(adev)) { 365 r = jpeg_v4_0_3_start_sriov(adev); 366 if (r) 367 return r; 368 369 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 370 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 371 ring = &adev->jpeg.inst[i].ring_dec[j]; 372 ring->wptr = 0; 373 ring->wptr_old = 0; 374 jpeg_v4_0_3_dec_ring_set_wptr(ring); 375 ring->sched.ready = true; 376 } 377 } 378 } else { 379 /* This flag is not set for VF, assumed to be disabled always */ 380 if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 381 0x100) 382 adev->jpeg.caps |= AMDGPU_JPEG_CAPS(RRMT_ENABLED); 383 384 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 385 jpeg_inst = GET_INST(JPEG, i); 386 387 ring = adev->jpeg.inst[i].ring_dec; 388 389 if (ring->use_doorbell) 390 adev->nbio.funcs->vcn_doorbell_range( 391 adev, ring->use_doorbell, 392 (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 393 9 * jpeg_inst, 394 adev->jpeg.inst[i].aid_id); 395 396 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 397 ring = &adev->jpeg.inst[i].ring_dec[j]; 398 if (ring->use_doorbell) 399 WREG32_SOC15_OFFSET( 400 VCN, GET_INST(VCN, i), 401 regVCN_JPEG_DB_CTRL, 402 (ring->pipe ? (ring->pipe - 0x15) : 0), 403 ring->doorbell_index 404 << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | 405 VCN_JPEG_DB_CTRL__EN_MASK); 406 r = amdgpu_ring_test_helper(ring); 407 if (r) 408 return r; 409 } 410 } 411 } 412 413 return 0; 414 } 415 416 /** 417 * jpeg_v4_0_3_hw_fini - stop the hardware block 418 * 419 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 420 * 421 * Stop the JPEG block, mark ring as not ready any more 422 */ 423 static int jpeg_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block) 424 { 425 struct amdgpu_device *adev = ip_block->adev; 426 int ret = 0; 427 428 cancel_delayed_work_sync(&adev->jpeg.idle_work); 429 430 if (!amdgpu_sriov_vf(adev)) { 431 if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) 432 ret = jpeg_v4_0_3_set_powergating_state(ip_block, AMD_PG_STATE_GATE); 433 } 434 435 return ret; 436 } 437 438 /** 439 * jpeg_v4_0_3_suspend - suspend JPEG block 440 * 441 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 442 * 443 * HW fini and suspend JPEG block 444 */ 445 static int jpeg_v4_0_3_suspend(struct amdgpu_ip_block *ip_block) 446 { 447 int r; 448 449 r = jpeg_v4_0_3_hw_fini(ip_block); 450 if (r) 451 return r; 452 453 r = amdgpu_jpeg_suspend(ip_block->adev); 454 455 return r; 456 } 457 458 /** 459 * jpeg_v4_0_3_resume - resume JPEG block 460 * 461 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. 462 * 463 * Resume firmware and hw init JPEG block 464 */ 465 static int jpeg_v4_0_3_resume(struct amdgpu_ip_block *ip_block) 466 { 467 int r; 468 469 r = amdgpu_jpeg_resume(ip_block->adev); 470 if (r) 471 return r; 472 473 r = jpeg_v4_0_3_hw_init(ip_block); 474 475 return r; 476 } 477 478 static void jpeg_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx) 479 { 480 int i, jpeg_inst; 481 uint32_t data; 482 483 jpeg_inst = GET_INST(JPEG, inst_idx); 484 data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL); 485 if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { 486 data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 487 data &= (~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1)); 488 } else { 489 data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 490 } 491 492 data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; 493 data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; 494 WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data); 495 496 data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE); 497 data &= ~(JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK); 498 for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) 499 data &= ~(JPEG_CGC_GATE__JPEG0_DEC_MASK << i); 500 WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data); 501 } 502 503 static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx) 504 { 505 int i, jpeg_inst; 506 uint32_t data; 507 508 jpeg_inst = GET_INST(JPEG, inst_idx); 509 data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL); 510 if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { 511 data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 512 data |= (JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1); 513 } else { 514 data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 515 } 516 517 data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; 518 data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; 519 WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data); 520 521 data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE); 522 data |= (JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK); 523 for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) 524 data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK << i); 525 WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data); 526 } 527 528 /** 529 * jpeg_v4_0_3_start - start JPEG block 530 * 531 * @adev: amdgpu_device pointer 532 * 533 * Setup and start the JPEG block 534 */ 535 static int jpeg_v4_0_3_start(struct amdgpu_device *adev) 536 { 537 struct amdgpu_ring *ring; 538 int i, j, jpeg_inst; 539 540 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 541 jpeg_inst = GET_INST(JPEG, i); 542 543 WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG, 544 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); 545 SOC15_WAIT_ON_RREG( 546 JPEG, jpeg_inst, regUVD_PGFSM_STATUS, 547 UVD_PGFSM_STATUS__UVDJ_PWR_ON 548 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, 549 UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); 550 551 /* disable anti hang mechanism */ 552 WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, 553 regUVD_JPEG_POWER_STATUS), 554 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); 555 556 /* JPEG disable CGC */ 557 jpeg_v4_0_3_disable_clock_gating(adev, i); 558 559 /* MJPEG global tiling registers */ 560 WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX8_ADDR_CONFIG, 561 adev->gfx.config.gb_addr_config); 562 WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX10_ADDR_CONFIG, 563 adev->gfx.config.gb_addr_config); 564 565 /* enable JMI channel */ 566 WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0, 567 ~UVD_JMI_CNTL__SOFT_RESET_MASK); 568 569 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 570 int reg_offset = jpeg_v4_0_3_core_reg_offset(j); 571 572 ring = &adev->jpeg.inst[i].ring_dec[j]; 573 574 /* enable System Interrupt for JRBC */ 575 WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, 576 regJPEG_SYS_INT_EN), 577 JPEG_SYS_INT_EN__DJRBC0_MASK << j, 578 ~(JPEG_SYS_INT_EN__DJRBC0_MASK << j)); 579 580 WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 581 regUVD_JMI0_UVD_LMI_JRBC_RB_VMID, 582 reg_offset, 0); 583 WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 584 regUVD_JRBC0_UVD_JRBC_RB_CNTL, 585 reg_offset, 586 (0x00000001L | 0x00000002L)); 587 WREG32_SOC15_OFFSET( 588 JPEG, jpeg_inst, 589 regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW, 590 reg_offset, lower_32_bits(ring->gpu_addr)); 591 WREG32_SOC15_OFFSET( 592 JPEG, jpeg_inst, 593 regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH, 594 reg_offset, upper_32_bits(ring->gpu_addr)); 595 WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 596 regUVD_JRBC0_UVD_JRBC_RB_RPTR, 597 reg_offset, 0); 598 WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 599 regUVD_JRBC0_UVD_JRBC_RB_WPTR, 600 reg_offset, 0); 601 WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 602 regUVD_JRBC0_UVD_JRBC_RB_CNTL, 603 reg_offset, 0x00000002L); 604 WREG32_SOC15_OFFSET(JPEG, jpeg_inst, 605 regUVD_JRBC0_UVD_JRBC_RB_SIZE, 606 reg_offset, ring->ring_size / 4); 607 ring->wptr = RREG32_SOC15_OFFSET( 608 JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_RB_WPTR, 609 reg_offset); 610 } 611 } 612 613 return 0; 614 } 615 616 /** 617 * jpeg_v4_0_3_stop - stop JPEG block 618 * 619 * @adev: amdgpu_device pointer 620 * 621 * stop the JPEG block 622 */ 623 static int jpeg_v4_0_3_stop(struct amdgpu_device *adev) 624 { 625 int i, jpeg_inst; 626 627 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 628 jpeg_inst = GET_INST(JPEG, i); 629 /* reset JMI */ 630 WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 631 UVD_JMI_CNTL__SOFT_RESET_MASK, 632 ~UVD_JMI_CNTL__SOFT_RESET_MASK); 633 634 jpeg_v4_0_3_enable_clock_gating(adev, i); 635 636 /* enable anti hang mechanism */ 637 WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, 638 regUVD_JPEG_POWER_STATUS), 639 UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, 640 ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); 641 642 WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG, 643 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); 644 SOC15_WAIT_ON_RREG( 645 JPEG, jpeg_inst, regUVD_PGFSM_STATUS, 646 UVD_PGFSM_STATUS__UVDJ_PWR_OFF 647 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, 648 UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); 649 } 650 651 return 0; 652 } 653 654 /** 655 * jpeg_v4_0_3_dec_ring_get_rptr - get read pointer 656 * 657 * @ring: amdgpu_ring pointer 658 * 659 * Returns the current hardware read pointer 660 */ 661 static uint64_t jpeg_v4_0_3_dec_ring_get_rptr(struct amdgpu_ring *ring) 662 { 663 struct amdgpu_device *adev = ring->adev; 664 665 return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_RPTR, 666 jpeg_v4_0_3_core_reg_offset(ring->pipe)); 667 } 668 669 /** 670 * jpeg_v4_0_3_dec_ring_get_wptr - get write pointer 671 * 672 * @ring: amdgpu_ring pointer 673 * 674 * Returns the current hardware write pointer 675 */ 676 static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring) 677 { 678 struct amdgpu_device *adev = ring->adev; 679 680 if (ring->use_doorbell) 681 return adev->wb.wb[ring->wptr_offs]; 682 683 return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_WPTR, 684 jpeg_v4_0_3_core_reg_offset(ring->pipe)); 685 } 686 687 static void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) 688 { 689 /* JPEG engine access for HDP flush doesn't work when RRMT is enabled. 690 * This is a workaround to avoid any HDP flush through JPEG ring. 691 */ 692 } 693 694 /** 695 * jpeg_v4_0_3_dec_ring_set_wptr - set write pointer 696 * 697 * @ring: amdgpu_ring pointer 698 * 699 * Commits the write pointer to the hardware 700 */ 701 static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring) 702 { 703 struct amdgpu_device *adev = ring->adev; 704 705 if (ring->use_doorbell) { 706 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 707 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 708 } else { 709 WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_WPTR, 710 jpeg_v4_0_3_core_reg_offset(ring->pipe), 711 lower_32_bits(ring->wptr)); 712 } 713 } 714 715 /** 716 * jpeg_v4_0_3_dec_ring_insert_start - insert a start command 717 * 718 * @ring: amdgpu_ring pointer 719 * 720 * Write a start command to the ring. 721 */ 722 void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) 723 { 724 if (!amdgpu_sriov_vf(ring->adev)) { 725 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 726 0, 0, PACKETJ_TYPE0)); 727 amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */ 728 729 amdgpu_ring_write(ring, 730 PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, 731 0, PACKETJ_TYPE0)); 732 amdgpu_ring_write(ring, 0x80004000); 733 } 734 } 735 736 /** 737 * jpeg_v4_0_3_dec_ring_insert_end - insert a end command 738 * 739 * @ring: amdgpu_ring pointer 740 * 741 * Write a end command to the ring. 742 */ 743 void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) 744 { 745 if (!amdgpu_sriov_vf(ring->adev)) { 746 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 747 0, 0, PACKETJ_TYPE0)); 748 amdgpu_ring_write(ring, 0x62a04); 749 750 amdgpu_ring_write(ring, 751 PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, 752 0, PACKETJ_TYPE0)); 753 amdgpu_ring_write(ring, 0x00004000); 754 } 755 } 756 757 /** 758 * jpeg_v4_0_3_dec_ring_emit_fence - emit an fence & trap command 759 * 760 * @ring: amdgpu_ring pointer 761 * @addr: address 762 * @seq: sequence number 763 * @flags: fence related flags 764 * 765 * Write a fence and a trap command to the ring. 766 */ 767 void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 768 unsigned int flags) 769 { 770 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 771 772 amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET, 773 0, 0, PACKETJ_TYPE0)); 774 amdgpu_ring_write(ring, seq); 775 776 amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET, 777 0, 0, PACKETJ_TYPE0)); 778 amdgpu_ring_write(ring, seq); 779 780 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET, 781 0, 0, PACKETJ_TYPE0)); 782 amdgpu_ring_write(ring, lower_32_bits(addr)); 783 784 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET, 785 0, 0, PACKETJ_TYPE0)); 786 amdgpu_ring_write(ring, upper_32_bits(addr)); 787 788 amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, 789 0, 0, PACKETJ_TYPE0)); 790 amdgpu_ring_write(ring, 0x8); 791 792 amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, 793 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); 794 amdgpu_ring_write(ring, 0); 795 796 amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); 797 amdgpu_ring_write(ring, 0); 798 799 amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); 800 amdgpu_ring_write(ring, 0); 801 802 amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); 803 amdgpu_ring_write(ring, 0); 804 } 805 806 /** 807 * jpeg_v4_0_3_dec_ring_emit_ib - execute indirect buffer 808 * 809 * @ring: amdgpu_ring pointer 810 * @job: job to retrieve vmid from 811 * @ib: indirect buffer to execute 812 * @flags: unused 813 * 814 * Write ring commands to execute the indirect buffer. 815 */ 816 void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, 817 struct amdgpu_job *job, 818 struct amdgpu_ib *ib, 819 uint32_t flags) 820 { 821 unsigned int vmid = AMDGPU_JOB_GET_VMID(job); 822 823 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 824 0, 0, PACKETJ_TYPE0)); 825 826 if (ring->funcs->parse_cs) 827 amdgpu_ring_write(ring, 0); 828 else 829 amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); 830 831 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 832 0, 0, PACKETJ_TYPE0)); 833 amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); 834 835 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 836 0, 0, PACKETJ_TYPE0)); 837 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 838 839 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, 840 0, 0, PACKETJ_TYPE0)); 841 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 842 843 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_IB_SIZE_INTERNAL_OFFSET, 844 0, 0, PACKETJ_TYPE0)); 845 amdgpu_ring_write(ring, ib->length_dw); 846 847 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET, 848 0, 0, PACKETJ_TYPE0)); 849 amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); 850 851 amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET, 852 0, 0, PACKETJ_TYPE0)); 853 amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); 854 855 amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); 856 amdgpu_ring_write(ring, 0); 857 858 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, 859 0, 0, PACKETJ_TYPE0)); 860 amdgpu_ring_write(ring, 0x01400200); 861 862 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, 863 0, 0, PACKETJ_TYPE0)); 864 amdgpu_ring_write(ring, 0x2); 865 866 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_STATUS_INTERNAL_OFFSET, 867 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); 868 amdgpu_ring_write(ring, 0x2); 869 } 870 871 void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 872 uint32_t val, uint32_t mask) 873 { 874 uint32_t reg_offset; 875 876 /* Use normalized offsets if required */ 877 if (jpeg_v4_0_3_normalizn_reqd(ring->adev)) 878 reg = NORMALIZE_JPEG_REG_OFFSET(reg); 879 880 reg_offset = (reg << 2); 881 882 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, 883 0, 0, PACKETJ_TYPE0)); 884 amdgpu_ring_write(ring, 0x01400200); 885 886 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, 887 0, 0, PACKETJ_TYPE0)); 888 amdgpu_ring_write(ring, val); 889 890 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 891 0, 0, PACKETJ_TYPE0)); 892 if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { 893 amdgpu_ring_write(ring, 0); 894 amdgpu_ring_write(ring, 895 PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); 896 } else { 897 amdgpu_ring_write(ring, reg_offset); 898 amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 899 0, 0, PACKETJ_TYPE3)); 900 } 901 amdgpu_ring_write(ring, mask); 902 } 903 904 void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, 905 unsigned int vmid, uint64_t pd_addr) 906 { 907 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; 908 uint32_t data0, data1, mask; 909 910 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 911 912 /* wait for register write */ 913 data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; 914 data1 = lower_32_bits(pd_addr); 915 mask = 0xffffffff; 916 jpeg_v4_0_3_dec_ring_emit_reg_wait(ring, data0, data1, mask); 917 } 918 919 void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) 920 { 921 uint32_t reg_offset; 922 923 /* Use normalized offsets if required */ 924 if (jpeg_v4_0_3_normalizn_reqd(ring->adev)) 925 reg = NORMALIZE_JPEG_REG_OFFSET(reg); 926 927 reg_offset = (reg << 2); 928 929 amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 930 0, 0, PACKETJ_TYPE0)); 931 if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { 932 amdgpu_ring_write(ring, 0); 933 amdgpu_ring_write(ring, 934 PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); 935 } else { 936 amdgpu_ring_write(ring, reg_offset); 937 amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 938 0, 0, PACKETJ_TYPE0)); 939 } 940 amdgpu_ring_write(ring, val); 941 } 942 943 void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) 944 { 945 int i; 946 947 WARN_ON(ring->wptr % 2 || count % 2); 948 949 for (i = 0; i < count / 2; i++) { 950 amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); 951 amdgpu_ring_write(ring, 0); 952 } 953 } 954 955 static bool jpeg_v4_0_3_is_idle(void *handle) 956 { 957 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 958 bool ret = false; 959 int i, j; 960 961 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 962 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 963 ret &= ((RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, i), 964 regUVD_JRBC0_UVD_JRBC_STATUS, jpeg_v4_0_3_core_reg_offset(j)) & 965 UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == 966 UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); 967 } 968 } 969 970 return ret; 971 } 972 973 static int jpeg_v4_0_3_wait_for_idle(struct amdgpu_ip_block *ip_block) 974 { 975 struct amdgpu_device *adev = ip_block->adev; 976 int ret = 0; 977 int i, j; 978 979 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 980 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 981 ret &= (SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, i), 982 regUVD_JRBC0_UVD_JRBC_STATUS, jpeg_v4_0_3_core_reg_offset(j), 983 UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK, 984 UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK)); 985 } 986 } 987 return ret; 988 } 989 990 static int jpeg_v4_0_3_set_clockgating_state(struct amdgpu_ip_block *ip_block, 991 enum amd_clockgating_state state) 992 { 993 struct amdgpu_device *adev = ip_block->adev; 994 bool enable = state == AMD_CG_STATE_GATE; 995 int i; 996 997 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 998 if (enable) { 999 if (!jpeg_v4_0_3_is_idle(adev)) 1000 return -EBUSY; 1001 jpeg_v4_0_3_enable_clock_gating(adev, i); 1002 } else { 1003 jpeg_v4_0_3_disable_clock_gating(adev, i); 1004 } 1005 } 1006 return 0; 1007 } 1008 1009 static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block, 1010 enum amd_powergating_state state) 1011 { 1012 struct amdgpu_device *adev = ip_block->adev; 1013 int ret; 1014 1015 if (amdgpu_sriov_vf(adev)) { 1016 adev->jpeg.cur_state = AMD_PG_STATE_UNGATE; 1017 return 0; 1018 } 1019 1020 if (state == adev->jpeg.cur_state) 1021 return 0; 1022 1023 if (state == AMD_PG_STATE_GATE) 1024 ret = jpeg_v4_0_3_stop(adev); 1025 else 1026 ret = jpeg_v4_0_3_start(adev); 1027 1028 if (!ret) 1029 adev->jpeg.cur_state = state; 1030 1031 return ret; 1032 } 1033 1034 static int jpeg_v4_0_3_set_interrupt_state(struct amdgpu_device *adev, 1035 struct amdgpu_irq_src *source, 1036 unsigned int type, 1037 enum amdgpu_interrupt_state state) 1038 { 1039 return 0; 1040 } 1041 1042 static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev, 1043 struct amdgpu_irq_src *source, 1044 struct amdgpu_iv_entry *entry) 1045 { 1046 uint32_t i, inst; 1047 1048 i = node_id_to_phys_map[entry->node_id]; 1049 DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n"); 1050 1051 for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst) 1052 if (adev->jpeg.inst[inst].aid_id == i) 1053 break; 1054 1055 if (inst >= adev->jpeg.num_jpeg_inst) { 1056 dev_WARN_ONCE(adev->dev, 1, 1057 "Interrupt received for unknown JPEG instance %d", 1058 entry->node_id); 1059 return 0; 1060 } 1061 1062 switch (entry->src_id) { 1063 case VCN_4_0__SRCID__JPEG_DECODE: 1064 amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]); 1065 break; 1066 case VCN_4_0__SRCID__JPEG1_DECODE: 1067 amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]); 1068 break; 1069 case VCN_4_0__SRCID__JPEG2_DECODE: 1070 amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]); 1071 break; 1072 case VCN_4_0__SRCID__JPEG3_DECODE: 1073 amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]); 1074 break; 1075 case VCN_4_0__SRCID__JPEG4_DECODE: 1076 amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]); 1077 break; 1078 case VCN_4_0__SRCID__JPEG5_DECODE: 1079 amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]); 1080 break; 1081 case VCN_4_0__SRCID__JPEG6_DECODE: 1082 amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]); 1083 break; 1084 case VCN_4_0__SRCID__JPEG7_DECODE: 1085 amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]); 1086 break; 1087 default: 1088 DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", 1089 entry->src_id, entry->src_data[0]); 1090 break; 1091 } 1092 1093 return 0; 1094 } 1095 1096 static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { 1097 .name = "jpeg_v4_0_3", 1098 .early_init = jpeg_v4_0_3_early_init, 1099 .sw_init = jpeg_v4_0_3_sw_init, 1100 .sw_fini = jpeg_v4_0_3_sw_fini, 1101 .hw_init = jpeg_v4_0_3_hw_init, 1102 .hw_fini = jpeg_v4_0_3_hw_fini, 1103 .suspend = jpeg_v4_0_3_suspend, 1104 .resume = jpeg_v4_0_3_resume, 1105 .is_idle = jpeg_v4_0_3_is_idle, 1106 .wait_for_idle = jpeg_v4_0_3_wait_for_idle, 1107 .set_clockgating_state = jpeg_v4_0_3_set_clockgating_state, 1108 .set_powergating_state = jpeg_v4_0_3_set_powergating_state, 1109 .dump_ip_state = amdgpu_jpeg_dump_ip_state, 1110 .print_ip_state = amdgpu_jpeg_print_ip_state, 1111 }; 1112 1113 static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { 1114 .type = AMDGPU_RING_TYPE_VCN_JPEG, 1115 .align_mask = 0xf, 1116 .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, 1117 .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, 1118 .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, 1119 .parse_cs = jpeg_v2_dec_ring_parse_cs, 1120 .emit_frame_size = 1121 SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + 1122 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + 1123 8 + /* jpeg_v4_0_3_dec_ring_emit_vm_flush */ 1124 18 + 18 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */ 1125 8 + 16, 1126 .emit_ib_size = 22, /* jpeg_v4_0_3_dec_ring_emit_ib */ 1127 .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, 1128 .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, 1129 .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, 1130 .emit_hdp_flush = jpeg_v4_0_3_ring_emit_hdp_flush, 1131 .test_ring = amdgpu_jpeg_dec_ring_test_ring, 1132 .test_ib = amdgpu_jpeg_dec_ring_test_ib, 1133 .insert_nop = jpeg_v4_0_3_dec_ring_nop, 1134 .insert_start = jpeg_v4_0_3_dec_ring_insert_start, 1135 .insert_end = jpeg_v4_0_3_dec_ring_insert_end, 1136 .pad_ib = amdgpu_ring_generic_pad_ib, 1137 .begin_use = amdgpu_jpeg_ring_begin_use, 1138 .end_use = amdgpu_jpeg_ring_end_use, 1139 .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, 1140 .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, 1141 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 1142 }; 1143 1144 static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev) 1145 { 1146 int i, j, jpeg_inst; 1147 1148 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 1149 for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { 1150 adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v4_0_3_dec_ring_vm_funcs; 1151 adev->jpeg.inst[i].ring_dec[j].me = i; 1152 adev->jpeg.inst[i].ring_dec[j].pipe = j; 1153 } 1154 jpeg_inst = GET_INST(JPEG, i); 1155 adev->jpeg.inst[i].aid_id = 1156 jpeg_inst / adev->jpeg.num_inst_per_aid; 1157 } 1158 } 1159 1160 static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = { 1161 .set = jpeg_v4_0_3_set_interrupt_state, 1162 .process = jpeg_v4_0_3_process_interrupt, 1163 }; 1164 1165 static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) 1166 { 1167 int i; 1168 1169 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 1170 adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings; 1171 } 1172 adev->jpeg.inst->irq.funcs = &jpeg_v4_0_3_irq_funcs; 1173 } 1174 1175 const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block = { 1176 .type = AMD_IP_BLOCK_TYPE_JPEG, 1177 .major = 4, 1178 .minor = 0, 1179 .rev = 3, 1180 .funcs = &jpeg_v4_0_3_ip_funcs, 1181 }; 1182 1183 static const struct amdgpu_ras_err_status_reg_entry jpeg_v4_0_3_ue_reg_list[] = { 1184 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0S, regVCN_UE_ERR_STATUS_HI_JPEG0S), 1185 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0S"}, 1186 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0D, regVCN_UE_ERR_STATUS_HI_JPEG0D), 1187 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0D"}, 1188 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1S, regVCN_UE_ERR_STATUS_HI_JPEG1S), 1189 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1S"}, 1190 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1D, regVCN_UE_ERR_STATUS_HI_JPEG1D), 1191 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1D"}, 1192 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2S, regVCN_UE_ERR_STATUS_HI_JPEG2S), 1193 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2S"}, 1194 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2D, regVCN_UE_ERR_STATUS_HI_JPEG2D), 1195 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2D"}, 1196 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3S, regVCN_UE_ERR_STATUS_HI_JPEG3S), 1197 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3S"}, 1198 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3D, regVCN_UE_ERR_STATUS_HI_JPEG3D), 1199 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3D"}, 1200 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4S, regVCN_UE_ERR_STATUS_HI_JPEG4S), 1201 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4S"}, 1202 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4D, regVCN_UE_ERR_STATUS_HI_JPEG4D), 1203 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4D"}, 1204 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5S, regVCN_UE_ERR_STATUS_HI_JPEG5S), 1205 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5S"}, 1206 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5D, regVCN_UE_ERR_STATUS_HI_JPEG5D), 1207 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5D"}, 1208 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6S, regVCN_UE_ERR_STATUS_HI_JPEG6S), 1209 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6S"}, 1210 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6D, regVCN_UE_ERR_STATUS_HI_JPEG6D), 1211 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6D"}, 1212 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7S, regVCN_UE_ERR_STATUS_HI_JPEG7S), 1213 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7S"}, 1214 {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7D, regVCN_UE_ERR_STATUS_HI_JPEG7D), 1215 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7D"}, 1216 }; 1217 1218 static void jpeg_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev, 1219 uint32_t jpeg_inst, 1220 void *ras_err_status) 1221 { 1222 struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status; 1223 1224 /* jpeg v4_0_3 only support uncorrectable errors */ 1225 amdgpu_ras_inst_query_ras_error_count(adev, 1226 jpeg_v4_0_3_ue_reg_list, 1227 ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list), 1228 NULL, 0, GET_INST(VCN, jpeg_inst), 1229 AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, 1230 &err_data->ue_count); 1231 } 1232 1233 static void jpeg_v4_0_3_query_ras_error_count(struct amdgpu_device *adev, 1234 void *ras_err_status) 1235 { 1236 uint32_t i; 1237 1238 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) { 1239 dev_warn(adev->dev, "JPEG RAS is not supported\n"); 1240 return; 1241 } 1242 1243 for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) 1244 jpeg_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status); 1245 } 1246 1247 static void jpeg_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev, 1248 uint32_t jpeg_inst) 1249 { 1250 amdgpu_ras_inst_reset_ras_error_count(adev, 1251 jpeg_v4_0_3_ue_reg_list, 1252 ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list), 1253 GET_INST(VCN, jpeg_inst)); 1254 } 1255 1256 static void jpeg_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev) 1257 { 1258 uint32_t i; 1259 1260 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) { 1261 dev_warn(adev->dev, "JPEG RAS is not supported\n"); 1262 return; 1263 } 1264 1265 for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) 1266 jpeg_v4_0_3_inst_reset_ras_error_count(adev, i); 1267 } 1268 1269 static const struct amdgpu_ras_block_hw_ops jpeg_v4_0_3_ras_hw_ops = { 1270 .query_ras_error_count = jpeg_v4_0_3_query_ras_error_count, 1271 .reset_ras_error_count = jpeg_v4_0_3_reset_ras_error_count, 1272 }; 1273 1274 static int jpeg_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, 1275 enum aca_smu_type type, void *data) 1276 { 1277 struct aca_bank_info info; 1278 u64 misc0; 1279 int ret; 1280 1281 ret = aca_bank_info_decode(bank, &info); 1282 if (ret) 1283 return ret; 1284 1285 misc0 = bank->regs[ACA_REG_IDX_MISC0]; 1286 switch (type) { 1287 case ACA_SMU_TYPE_UE: 1288 ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, 1289 1ULL); 1290 break; 1291 case ACA_SMU_TYPE_CE: 1292 ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, 1293 ACA_REG__MISC0__ERRCNT(misc0)); 1294 break; 1295 default: 1296 return -EINVAL; 1297 } 1298 1299 return ret; 1300 } 1301 1302 /* reference to smu driver if header file */ 1303 static int jpeg_v4_0_3_err_codes[] = { 1304 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-7][S|D] */ 1305 24, 25, 26, 27, 28, 29, 30, 31 1306 }; 1307 1308 static bool jpeg_v4_0_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, 1309 enum aca_smu_type type, void *data) 1310 { 1311 u32 instlo; 1312 1313 instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); 1314 instlo &= GENMASK(31, 1); 1315 1316 if (instlo != mmSMNAID_AID0_MCA_SMU) 1317 return false; 1318 1319 if (aca_bank_check_error_codes(handle->adev, bank, 1320 jpeg_v4_0_3_err_codes, 1321 ARRAY_SIZE(jpeg_v4_0_3_err_codes))) 1322 return false; 1323 1324 return true; 1325 } 1326 1327 static const struct aca_bank_ops jpeg_v4_0_3_aca_bank_ops = { 1328 .aca_bank_parser = jpeg_v4_0_3_aca_bank_parser, 1329 .aca_bank_is_valid = jpeg_v4_0_3_aca_bank_is_valid, 1330 }; 1331 1332 static const struct aca_info jpeg_v4_0_3_aca_info = { 1333 .hwip = ACA_HWIP_TYPE_SMU, 1334 .mask = ACA_ERROR_UE_MASK, 1335 .bank_ops = &jpeg_v4_0_3_aca_bank_ops, 1336 }; 1337 1338 static int jpeg_v4_0_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) 1339 { 1340 int r; 1341 1342 r = amdgpu_ras_block_late_init(adev, ras_block); 1343 if (r) 1344 return r; 1345 1346 r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG, 1347 &jpeg_v4_0_3_aca_info, NULL); 1348 if (r) 1349 goto late_fini; 1350 1351 return 0; 1352 1353 late_fini: 1354 amdgpu_ras_block_late_fini(adev, ras_block); 1355 1356 return r; 1357 } 1358 1359 static struct amdgpu_jpeg_ras jpeg_v4_0_3_ras = { 1360 .ras_block = { 1361 .hw_ops = &jpeg_v4_0_3_ras_hw_ops, 1362 .ras_late_init = jpeg_v4_0_3_ras_late_init, 1363 }, 1364 }; 1365 1366 static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) 1367 { 1368 adev->jpeg.ras = &jpeg_v4_0_3_ras; 1369 } 1370