1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 25 #include <linux/delay.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 29 #include "amdgpu.h" 30 #include "amdgpu_ucode.h" 31 #include "amdgpu_trace.h" 32 #include "vi.h" 33 #include "vid.h" 34 35 #include "oss/oss_2_4_d.h" 36 #include "oss/oss_2_4_sh_mask.h" 37 38 #include "gmc/gmc_7_1_d.h" 39 #include "gmc/gmc_7_1_sh_mask.h" 40 41 #include "gca/gfx_8_0_d.h" 42 #include "gca/gfx_8_0_enum.h" 43 #include "gca/gfx_8_0_sh_mask.h" 44 45 #include "bif/bif_5_0_d.h" 46 #include "bif/bif_5_0_sh_mask.h" 47 48 #include "iceland_sdma_pkt_open.h" 49 50 #include "ivsrcid/ivsrcid_vislands30.h" 51 52 static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev); 53 static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev); 54 static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev); 55 static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev); 56 57 MODULE_FIRMWARE("amdgpu/topaz_sdma.bin"); 58 MODULE_FIRMWARE("amdgpu/topaz_sdma1.bin"); 59 60 static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = { 61 SDMA0_REGISTER_OFFSET, 62 SDMA1_REGISTER_OFFSET 63 }; 64 65 static const u32 golden_settings_iceland_a11[] = { 66 mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, 67 mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000, 68 mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, 69 mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000, 70 }; 71 72 static const u32 iceland_mgcg_cgcg_init[] = { 73 mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100, 74 mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100 75 }; 76 77 /* 78 * sDMA - System DMA 79 * Starting with CIK, the GPU has new asynchronous 80 * DMA engines. These engines are used for compute 81 * and gfx. There are two DMA engines (SDMA0, SDMA1) 82 * and each one supports 1 ring buffer used for gfx 83 * and 2 queues used for compute. 84 * 85 * The programming model is very similar to the CP 86 * (ring buffer, IBs, etc.), but sDMA has it's own 87 * packet format that is different from the PM4 format 88 * used by the CP. sDMA supports copying data, writing 89 * embedded data, solid fills, and a number of other 90 * things. It also has support for tiling/detiling of 91 * buffers. 92 */ 93 94 static void sdma_v2_4_init_golden_registers(struct amdgpu_device *adev) 95 { 96 switch (adev->asic_type) { 97 case CHIP_TOPAZ: 98 amdgpu_device_program_register_sequence(adev, 99 iceland_mgcg_cgcg_init, 100 ARRAY_SIZE(iceland_mgcg_cgcg_init)); 101 amdgpu_device_program_register_sequence(adev, 102 golden_settings_iceland_a11, 103 ARRAY_SIZE(golden_settings_iceland_a11)); 104 break; 105 default: 106 break; 107 } 108 } 109 110 static void sdma_v2_4_free_microcode(struct amdgpu_device *adev) 111 { 112 int i; 113 114 for (i = 0; i < adev->sdma.num_instances; i++) 115 amdgpu_ucode_release(&adev->sdma.instance[i].fw); 116 } 117 118 /** 119 * sdma_v2_4_init_microcode - load ucode images from disk 120 * 121 * @adev: amdgpu_device pointer 122 * 123 * Use the firmware interface to load the ucode images into 124 * the driver (not loaded into hw). 125 * Returns 0 on success, error on failure. 126 */ 127 static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) 128 { 129 const char *chip_name; 130 int err = 0, i; 131 struct amdgpu_firmware_info *info = NULL; 132 const struct common_firmware_header *header = NULL; 133 const struct sdma_firmware_header_v1_0 *hdr; 134 135 DRM_DEBUG("\n"); 136 137 switch (adev->asic_type) { 138 case CHIP_TOPAZ: 139 chip_name = "topaz"; 140 break; 141 default: 142 BUG(); 143 } 144 145 for (i = 0; i < adev->sdma.num_instances; i++) { 146 if (i == 0) 147 err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, 148 AMDGPU_UCODE_REQUIRED, 149 "amdgpu/%s_sdma.bin", chip_name); 150 else 151 err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, 152 AMDGPU_UCODE_REQUIRED, 153 "amdgpu/%s_sdma1.bin", chip_name); 154 if (err) 155 goto out; 156 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; 157 adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version); 158 adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); 159 if (adev->sdma.instance[i].feature_version >= 20) 160 adev->sdma.instance[i].burst_nop = true; 161 162 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { 163 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; 164 info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; 165 info->fw = adev->sdma.instance[i].fw; 166 header = (const struct common_firmware_header *)info->fw->data; 167 adev->firmware.fw_size += 168 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 169 } 170 } 171 172 out: 173 if (err) { 174 pr_err("sdma_v2_4: Failed to load firmware \"%s_sdma%s.bin\"\n", 175 chip_name, i == 0 ? "" : "1"); 176 for (i = 0; i < adev->sdma.num_instances; i++) 177 amdgpu_ucode_release(&adev->sdma.instance[i].fw); 178 } 179 return err; 180 } 181 182 /** 183 * sdma_v2_4_ring_get_rptr - get the current read pointer 184 * 185 * @ring: amdgpu ring pointer 186 * 187 * Get the current rptr from the hardware (VI+). 188 */ 189 static uint64_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring) 190 { 191 /* XXX check if swapping is necessary on BE */ 192 return *ring->rptr_cpu_addr >> 2; 193 } 194 195 /** 196 * sdma_v2_4_ring_get_wptr - get the current write pointer 197 * 198 * @ring: amdgpu ring pointer 199 * 200 * Get the current wptr from the hardware (VI+). 201 */ 202 static uint64_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring) 203 { 204 struct amdgpu_device *adev = ring->adev; 205 u32 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) >> 2; 206 207 return wptr; 208 } 209 210 /** 211 * sdma_v2_4_ring_set_wptr - commit the write pointer 212 * 213 * @ring: amdgpu ring pointer 214 * 215 * Write the wptr back to the hardware (VI+). 216 */ 217 static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring) 218 { 219 struct amdgpu_device *adev = ring->adev; 220 221 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], ring->wptr << 2); 222 } 223 224 static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 225 { 226 struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); 227 int i; 228 229 for (i = 0; i < count; i++) 230 if (sdma && sdma->burst_nop && (i == 0)) 231 amdgpu_ring_write(ring, ring->funcs->nop | 232 SDMA_PKT_NOP_HEADER_COUNT(count - 1)); 233 else 234 amdgpu_ring_write(ring, ring->funcs->nop); 235 } 236 237 /** 238 * sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine 239 * 240 * @ring: amdgpu ring pointer 241 * @job: job to retrieve vmid from 242 * @ib: IB object to schedule 243 * @flags: unused 244 * 245 * Schedule an IB in the DMA ring (VI). 246 */ 247 static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, 248 struct amdgpu_job *job, 249 struct amdgpu_ib *ib, 250 uint32_t flags) 251 { 252 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 253 254 /* IB packet must end on a 8 DW boundary */ 255 sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); 256 257 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | 258 SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); 259 /* base must be 32 byte aligned */ 260 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); 261 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 262 amdgpu_ring_write(ring, ib->length_dw); 263 amdgpu_ring_write(ring, 0); 264 amdgpu_ring_write(ring, 0); 265 266 } 267 268 /** 269 * sdma_v2_4_ring_emit_hdp_flush - emit an hdp flush on the DMA ring 270 * 271 * @ring: amdgpu ring pointer 272 * 273 * Emit an hdp flush packet on the requested DMA ring. 274 */ 275 static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring) 276 { 277 u32 ref_and_mask = 0; 278 279 if (ring->me == 0) 280 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1); 281 else 282 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1); 283 284 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 285 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | 286 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ 287 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE << 2); 288 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ << 2); 289 amdgpu_ring_write(ring, ref_and_mask); /* reference */ 290 amdgpu_ring_write(ring, ref_and_mask); /* mask */ 291 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 292 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ 293 } 294 295 /** 296 * sdma_v2_4_ring_emit_fence - emit a fence on the DMA ring 297 * 298 * @ring: amdgpu ring pointer 299 * @addr: address 300 * @seq: sequence number 301 * @flags: fence related flags 302 * 303 * Add a DMA fence packet to the ring to write 304 * the fence seq number and DMA trap packet to generate 305 * an interrupt if needed (VI). 306 */ 307 static void sdma_v2_4_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 308 unsigned flags) 309 { 310 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 311 /* write the fence */ 312 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); 313 amdgpu_ring_write(ring, lower_32_bits(addr)); 314 amdgpu_ring_write(ring, upper_32_bits(addr)); 315 amdgpu_ring_write(ring, lower_32_bits(seq)); 316 317 /* optionally write high bits as well */ 318 if (write64bit) { 319 addr += 4; 320 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); 321 amdgpu_ring_write(ring, lower_32_bits(addr)); 322 amdgpu_ring_write(ring, upper_32_bits(addr)); 323 amdgpu_ring_write(ring, upper_32_bits(seq)); 324 } 325 326 /* generate an interrupt */ 327 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); 328 amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); 329 } 330 331 /** 332 * sdma_v2_4_gfx_stop - stop the gfx async dma engines 333 * 334 * @adev: amdgpu_device pointer 335 * 336 * Stop the gfx async dma ring buffers (VI). 337 */ 338 static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev) 339 { 340 u32 rb_cntl, ib_cntl; 341 int i; 342 343 for (i = 0; i < adev->sdma.num_instances; i++) { 344 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); 345 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); 346 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 347 ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]); 348 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); 349 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); 350 } 351 } 352 353 /** 354 * sdma_v2_4_rlc_stop - stop the compute async dma engines 355 * 356 * @adev: amdgpu_device pointer 357 * 358 * Stop the compute async dma queues (VI). 359 */ 360 static void sdma_v2_4_rlc_stop(struct amdgpu_device *adev) 361 { 362 /* XXX todo */ 363 } 364 365 /** 366 * sdma_v2_4_enable - stop the async dma engines 367 * 368 * @adev: amdgpu_device pointer 369 * @enable: enable/disable the DMA MEs. 370 * 371 * Halt or unhalt the async dma engines (VI). 372 */ 373 static void sdma_v2_4_enable(struct amdgpu_device *adev, bool enable) 374 { 375 u32 f32_cntl; 376 int i; 377 378 if (!enable) { 379 sdma_v2_4_gfx_stop(adev); 380 sdma_v2_4_rlc_stop(adev); 381 } 382 383 for (i = 0; i < adev->sdma.num_instances; i++) { 384 f32_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]); 385 if (enable) 386 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 0); 387 else 388 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1); 389 WREG32(mmSDMA0_F32_CNTL + sdma_offsets[i], f32_cntl); 390 } 391 } 392 393 /** 394 * sdma_v2_4_gfx_resume - setup and start the async dma engines 395 * 396 * @adev: amdgpu_device pointer 397 * 398 * Set up the gfx DMA ring buffers and enable them (VI). 399 * Returns 0 for success, error for failure. 400 */ 401 static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) 402 { 403 struct amdgpu_ring *ring; 404 u32 rb_cntl, ib_cntl; 405 u32 rb_bufsz; 406 int i, j, r; 407 408 for (i = 0; i < adev->sdma.num_instances; i++) { 409 ring = &adev->sdma.instance[i].ring; 410 411 mutex_lock(&adev->srbm_mutex); 412 for (j = 0; j < 16; j++) { 413 vi_srbm_select(adev, 0, 0, 0, j); 414 /* SDMA GFX */ 415 WREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i], 0); 416 WREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i], 0); 417 } 418 vi_srbm_select(adev, 0, 0, 0, 0); 419 mutex_unlock(&adev->srbm_mutex); 420 421 WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i], 422 adev->gfx.config.gb_addr_config & 0x70); 423 424 WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); 425 426 /* Set ring buffer size in dwords */ 427 rb_bufsz = order_base_2(ring->ring_size / 4); 428 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); 429 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); 430 #ifdef __BIG_ENDIAN 431 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); 432 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, 433 RPTR_WRITEBACK_SWAP_ENABLE, 1); 434 #endif 435 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 436 437 /* Initialize the ring buffer's read and write pointers */ 438 WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0); 439 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); 440 WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0); 441 WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0); 442 443 /* set the wb address whether it's enabled or not */ 444 WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], 445 upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); 446 WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i], 447 lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); 448 449 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); 450 451 WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); 452 WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); 453 454 ring->wptr = 0; 455 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); 456 457 /* enable DMA RB */ 458 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); 459 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 460 461 ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]); 462 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); 463 #ifdef __BIG_ENDIAN 464 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); 465 #endif 466 /* enable DMA IBs */ 467 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); 468 } 469 470 sdma_v2_4_enable(adev, true); 471 for (i = 0; i < adev->sdma.num_instances; i++) { 472 ring = &adev->sdma.instance[i].ring; 473 r = amdgpu_ring_test_helper(ring); 474 if (r) 475 return r; 476 } 477 478 return 0; 479 } 480 481 /** 482 * sdma_v2_4_rlc_resume - setup and start the async dma engines 483 * 484 * @adev: amdgpu_device pointer 485 * 486 * Set up the compute DMA queues and enable them (VI). 487 * Returns 0 for success, error for failure. 488 */ 489 static int sdma_v2_4_rlc_resume(struct amdgpu_device *adev) 490 { 491 /* XXX todo */ 492 return 0; 493 } 494 495 496 /** 497 * sdma_v2_4_start - setup and start the async dma engines 498 * 499 * @adev: amdgpu_device pointer 500 * 501 * Set up the DMA engines and enable them (VI). 502 * Returns 0 for success, error for failure. 503 */ 504 static int sdma_v2_4_start(struct amdgpu_device *adev) 505 { 506 int r; 507 508 /* halt the engine before programing */ 509 sdma_v2_4_enable(adev, false); 510 511 /* start the gfx rings and rlc compute queues */ 512 r = sdma_v2_4_gfx_resume(adev); 513 if (r) 514 return r; 515 r = sdma_v2_4_rlc_resume(adev); 516 if (r) 517 return r; 518 519 return 0; 520 } 521 522 /** 523 * sdma_v2_4_ring_test_ring - simple async dma engine test 524 * 525 * @ring: amdgpu_ring structure holding ring information 526 * 527 * Test the DMA engine by writing using it to write an 528 * value to memory. (VI). 529 * Returns 0 for success, error for failure. 530 */ 531 static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring) 532 { 533 struct amdgpu_device *adev = ring->adev; 534 unsigned i; 535 unsigned index; 536 int r; 537 u32 tmp; 538 u64 gpu_addr; 539 540 r = amdgpu_device_wb_get(adev, &index); 541 if (r) 542 return r; 543 544 gpu_addr = adev->wb.gpu_addr + (index * 4); 545 tmp = 0xCAFEDEAD; 546 adev->wb.wb[index] = cpu_to_le32(tmp); 547 548 r = amdgpu_ring_alloc(ring, 5); 549 if (r) 550 goto error_free_wb; 551 552 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 553 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); 554 amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); 555 amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); 556 amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1)); 557 amdgpu_ring_write(ring, 0xDEADBEEF); 558 amdgpu_ring_commit(ring); 559 560 for (i = 0; i < adev->usec_timeout; i++) { 561 tmp = le32_to_cpu(adev->wb.wb[index]); 562 if (tmp == 0xDEADBEEF) 563 break; 564 udelay(1); 565 } 566 567 if (i >= adev->usec_timeout) 568 r = -ETIMEDOUT; 569 570 error_free_wb: 571 amdgpu_device_wb_free(adev, index); 572 return r; 573 } 574 575 /** 576 * sdma_v2_4_ring_test_ib - test an IB on the DMA engine 577 * 578 * @ring: amdgpu_ring structure holding ring information 579 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT 580 * 581 * Test a simple IB in the DMA ring (VI). 582 * Returns 0 on success, error on failure. 583 */ 584 static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) 585 { 586 struct amdgpu_device *adev = ring->adev; 587 struct amdgpu_ib ib; 588 struct dma_fence *f = NULL; 589 unsigned index; 590 u32 tmp = 0; 591 u64 gpu_addr; 592 long r; 593 594 r = amdgpu_device_wb_get(adev, &index); 595 if (r) 596 return r; 597 598 gpu_addr = adev->wb.gpu_addr + (index * 4); 599 tmp = 0xCAFEDEAD; 600 adev->wb.wb[index] = cpu_to_le32(tmp); 601 memset(&ib, 0, sizeof(ib)); 602 r = amdgpu_ib_get(adev, NULL, 256, 603 AMDGPU_IB_POOL_DIRECT, &ib); 604 if (r) 605 goto err0; 606 607 ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 608 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 609 ib.ptr[1] = lower_32_bits(gpu_addr); 610 ib.ptr[2] = upper_32_bits(gpu_addr); 611 ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1); 612 ib.ptr[4] = 0xDEADBEEF; 613 ib.ptr[5] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); 614 ib.ptr[6] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); 615 ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); 616 ib.length_dw = 8; 617 618 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 619 if (r) 620 goto err1; 621 622 r = dma_fence_wait_timeout(f, false, timeout); 623 if (r == 0) { 624 r = -ETIMEDOUT; 625 goto err1; 626 } else if (r < 0) { 627 goto err1; 628 } 629 tmp = le32_to_cpu(adev->wb.wb[index]); 630 if (tmp == 0xDEADBEEF) 631 r = 0; 632 else 633 r = -EINVAL; 634 635 err1: 636 amdgpu_ib_free(&ib, NULL); 637 dma_fence_put(f); 638 err0: 639 amdgpu_device_wb_free(adev, index); 640 return r; 641 } 642 643 /** 644 * sdma_v2_4_vm_copy_pte - update PTEs by copying them from the GART 645 * 646 * @ib: indirect buffer to fill with commands 647 * @pe: addr of the page entry 648 * @src: src addr to copy from 649 * @count: number of page entries to update 650 * 651 * Update PTEs by copying them from the GART using sDMA (CIK). 652 */ 653 static void sdma_v2_4_vm_copy_pte(struct amdgpu_ib *ib, 654 uint64_t pe, uint64_t src, 655 unsigned count) 656 { 657 unsigned bytes = count * 8; 658 659 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 660 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 661 ib->ptr[ib->length_dw++] = bytes; 662 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 663 ib->ptr[ib->length_dw++] = lower_32_bits(src); 664 ib->ptr[ib->length_dw++] = upper_32_bits(src); 665 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 666 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 667 } 668 669 /** 670 * sdma_v2_4_vm_write_pte - update PTEs by writing them manually 671 * 672 * @ib: indirect buffer to fill with commands 673 * @pe: addr of the page entry 674 * @value: dst addr to write into pe 675 * @count: number of page entries to update 676 * @incr: increase next addr by incr bytes 677 * 678 * Update PTEs by writing them manually using sDMA (CIK). 679 */ 680 static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, 681 uint64_t value, unsigned count, 682 uint32_t incr) 683 { 684 unsigned ndw = count * 2; 685 686 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 687 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 688 ib->ptr[ib->length_dw++] = pe; 689 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 690 ib->ptr[ib->length_dw++] = ndw; 691 for (; ndw > 0; ndw -= 2) { 692 ib->ptr[ib->length_dw++] = lower_32_bits(value); 693 ib->ptr[ib->length_dw++] = upper_32_bits(value); 694 value += incr; 695 } 696 } 697 698 /** 699 * sdma_v2_4_vm_set_pte_pde - update the page tables using sDMA 700 * 701 * @ib: indirect buffer to fill with commands 702 * @pe: addr of the page entry 703 * @addr: dst addr to write into pe 704 * @count: number of page entries to update 705 * @incr: increase next addr by incr bytes 706 * @flags: access flags 707 * 708 * Update the page tables using sDMA (CIK). 709 */ 710 static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, 711 uint64_t addr, unsigned count, 712 uint32_t incr, uint64_t flags) 713 { 714 /* for physically contiguous pages (vram) */ 715 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE); 716 ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ 717 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 718 ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ 719 ib->ptr[ib->length_dw++] = upper_32_bits(flags); 720 ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ 721 ib->ptr[ib->length_dw++] = upper_32_bits(addr); 722 ib->ptr[ib->length_dw++] = incr; /* increment size */ 723 ib->ptr[ib->length_dw++] = 0; 724 ib->ptr[ib->length_dw++] = count; /* number of entries */ 725 } 726 727 /** 728 * sdma_v2_4_ring_pad_ib - pad the IB to the required number of dw 729 * 730 * @ring: amdgpu_ring structure holding ring information 731 * @ib: indirect buffer to fill with padding 732 * 733 */ 734 static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 735 { 736 struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); 737 u32 pad_count; 738 int i; 739 740 pad_count = (-ib->length_dw) & 7; 741 for (i = 0; i < pad_count; i++) 742 if (sdma && sdma->burst_nop && (i == 0)) 743 ib->ptr[ib->length_dw++] = 744 SDMA_PKT_HEADER_OP(SDMA_OP_NOP) | 745 SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); 746 else 747 ib->ptr[ib->length_dw++] = 748 SDMA_PKT_HEADER_OP(SDMA_OP_NOP); 749 } 750 751 /** 752 * sdma_v2_4_ring_emit_pipeline_sync - sync the pipeline 753 * 754 * @ring: amdgpu_ring pointer 755 * 756 * Make sure all previous operations are completed (CIK). 757 */ 758 static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 759 { 760 uint32_t seq = ring->fence_drv.sync_seq; 761 uint64_t addr = ring->fence_drv.gpu_addr; 762 763 /* wait for idle */ 764 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 765 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 766 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ 767 SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1)); 768 amdgpu_ring_write(ring, addr & 0xfffffffc); 769 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 770 amdgpu_ring_write(ring, seq); /* reference */ 771 amdgpu_ring_write(ring, 0xffffffff); /* mask */ 772 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 773 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ 774 } 775 776 /** 777 * sdma_v2_4_ring_emit_vm_flush - cik vm flush using sDMA 778 * 779 * @ring: amdgpu_ring pointer 780 * @vmid: vmid number to use 781 * @pd_addr: address 782 * 783 * Update the page table base and flush the VM TLB 784 * using sDMA (VI). 785 */ 786 static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring, 787 unsigned vmid, uint64_t pd_addr) 788 { 789 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 790 791 /* wait for flush */ 792 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 793 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 794 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(0)); /* always */ 795 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2); 796 amdgpu_ring_write(ring, 0); 797 amdgpu_ring_write(ring, 0); /* reference */ 798 amdgpu_ring_write(ring, 0); /* mask */ 799 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 800 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ 801 } 802 803 static void sdma_v2_4_ring_emit_wreg(struct amdgpu_ring *ring, 804 uint32_t reg, uint32_t val) 805 { 806 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 807 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 808 amdgpu_ring_write(ring, reg); 809 amdgpu_ring_write(ring, val); 810 } 811 812 static int sdma_v2_4_early_init(struct amdgpu_ip_block *ip_block) 813 { 814 struct amdgpu_device *adev = ip_block->adev; 815 int r; 816 817 adev->sdma.num_instances = SDMA_MAX_INSTANCE; 818 819 r = sdma_v2_4_init_microcode(adev); 820 if (r) 821 return r; 822 823 sdma_v2_4_set_ring_funcs(adev); 824 sdma_v2_4_set_buffer_funcs(adev); 825 sdma_v2_4_set_vm_pte_funcs(adev); 826 sdma_v2_4_set_irq_funcs(adev); 827 828 return 0; 829 } 830 831 static int sdma_v2_4_sw_init(struct amdgpu_ip_block *ip_block) 832 { 833 struct amdgpu_ring *ring; 834 int r, i; 835 struct amdgpu_device *adev = ip_block->adev; 836 837 /* SDMA trap event */ 838 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, 839 &adev->sdma.trap_irq); 840 if (r) 841 return r; 842 843 /* SDMA Privileged inst */ 844 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241, 845 &adev->sdma.illegal_inst_irq); 846 if (r) 847 return r; 848 849 /* SDMA Privileged inst */ 850 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE, 851 &adev->sdma.illegal_inst_irq); 852 if (r) 853 return r; 854 855 for (i = 0; i < adev->sdma.num_instances; i++) { 856 ring = &adev->sdma.instance[i].ring; 857 ring->ring_obj = NULL; 858 ring->use_doorbell = false; 859 sprintf(ring->name, "sdma%d", i); 860 r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, 861 (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 : 862 AMDGPU_SDMA_IRQ_INSTANCE1, 863 AMDGPU_RING_PRIO_DEFAULT, NULL); 864 if (r) 865 return r; 866 } 867 868 return r; 869 } 870 871 static int sdma_v2_4_sw_fini(struct amdgpu_ip_block *ip_block) 872 { 873 struct amdgpu_device *adev = ip_block->adev; 874 int i; 875 876 for (i = 0; i < adev->sdma.num_instances; i++) 877 amdgpu_ring_fini(&adev->sdma.instance[i].ring); 878 879 sdma_v2_4_free_microcode(adev); 880 return 0; 881 } 882 883 static int sdma_v2_4_hw_init(struct amdgpu_ip_block *ip_block) 884 { 885 int r; 886 struct amdgpu_device *adev = ip_block->adev; 887 888 sdma_v2_4_init_golden_registers(adev); 889 890 r = sdma_v2_4_start(adev); 891 if (r) 892 return r; 893 894 return r; 895 } 896 897 static int sdma_v2_4_hw_fini(struct amdgpu_ip_block *ip_block) 898 { 899 sdma_v2_4_enable(ip_block->adev, false); 900 901 return 0; 902 } 903 904 static int sdma_v2_4_suspend(struct amdgpu_ip_block *ip_block) 905 { 906 return sdma_v2_4_hw_fini(ip_block); 907 } 908 909 static int sdma_v2_4_resume(struct amdgpu_ip_block *ip_block) 910 { 911 return sdma_v2_4_hw_init(ip_block); 912 } 913 914 static bool sdma_v2_4_is_idle(void *handle) 915 { 916 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 917 u32 tmp = RREG32(mmSRBM_STATUS2); 918 919 if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK | 920 SRBM_STATUS2__SDMA1_BUSY_MASK)) 921 return false; 922 923 return true; 924 } 925 926 static int sdma_v2_4_wait_for_idle(struct amdgpu_ip_block *ip_block) 927 { 928 unsigned i; 929 u32 tmp; 930 struct amdgpu_device *adev = ip_block->adev; 931 932 for (i = 0; i < adev->usec_timeout; i++) { 933 tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK | 934 SRBM_STATUS2__SDMA1_BUSY_MASK); 935 936 if (!tmp) 937 return 0; 938 udelay(1); 939 } 940 return -ETIMEDOUT; 941 } 942 943 static int sdma_v2_4_soft_reset(struct amdgpu_ip_block *ip_block) 944 { 945 u32 srbm_soft_reset = 0; 946 struct amdgpu_device *adev = ip_block->adev; 947 u32 tmp = RREG32(mmSRBM_STATUS2); 948 949 if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) { 950 /* sdma0 */ 951 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); 952 tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0); 953 WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); 954 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK; 955 } 956 if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) { 957 /* sdma1 */ 958 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); 959 tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0); 960 WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); 961 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK; 962 } 963 964 if (srbm_soft_reset) { 965 tmp = RREG32(mmSRBM_SOFT_RESET); 966 tmp |= srbm_soft_reset; 967 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 968 WREG32(mmSRBM_SOFT_RESET, tmp); 969 tmp = RREG32(mmSRBM_SOFT_RESET); 970 971 udelay(50); 972 973 tmp &= ~srbm_soft_reset; 974 WREG32(mmSRBM_SOFT_RESET, tmp); 975 tmp = RREG32(mmSRBM_SOFT_RESET); 976 977 /* Wait a little for things to settle down */ 978 udelay(50); 979 } 980 981 return 0; 982 } 983 984 static int sdma_v2_4_set_trap_irq_state(struct amdgpu_device *adev, 985 struct amdgpu_irq_src *src, 986 unsigned type, 987 enum amdgpu_interrupt_state state) 988 { 989 u32 sdma_cntl; 990 991 switch (type) { 992 case AMDGPU_SDMA_IRQ_INSTANCE0: 993 switch (state) { 994 case AMDGPU_IRQ_STATE_DISABLE: 995 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); 996 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0); 997 WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl); 998 break; 999 case AMDGPU_IRQ_STATE_ENABLE: 1000 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET); 1001 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1); 1002 WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl); 1003 break; 1004 default: 1005 break; 1006 } 1007 break; 1008 case AMDGPU_SDMA_IRQ_INSTANCE1: 1009 switch (state) { 1010 case AMDGPU_IRQ_STATE_DISABLE: 1011 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); 1012 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0); 1013 WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl); 1014 break; 1015 case AMDGPU_IRQ_STATE_ENABLE: 1016 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET); 1017 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1); 1018 WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl); 1019 break; 1020 default: 1021 break; 1022 } 1023 break; 1024 default: 1025 break; 1026 } 1027 return 0; 1028 } 1029 1030 static int sdma_v2_4_process_trap_irq(struct amdgpu_device *adev, 1031 struct amdgpu_irq_src *source, 1032 struct amdgpu_iv_entry *entry) 1033 { 1034 u8 instance_id, queue_id; 1035 1036 instance_id = (entry->ring_id & 0x3) >> 0; 1037 queue_id = (entry->ring_id & 0xc) >> 2; 1038 DRM_DEBUG("IH: SDMA trap\n"); 1039 switch (instance_id) { 1040 case 0: 1041 switch (queue_id) { 1042 case 0: 1043 amdgpu_fence_process(&adev->sdma.instance[0].ring); 1044 break; 1045 case 1: 1046 /* XXX compute */ 1047 break; 1048 case 2: 1049 /* XXX compute */ 1050 break; 1051 } 1052 break; 1053 case 1: 1054 switch (queue_id) { 1055 case 0: 1056 amdgpu_fence_process(&adev->sdma.instance[1].ring); 1057 break; 1058 case 1: 1059 /* XXX compute */ 1060 break; 1061 case 2: 1062 /* XXX compute */ 1063 break; 1064 } 1065 break; 1066 } 1067 return 0; 1068 } 1069 1070 static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev, 1071 struct amdgpu_irq_src *source, 1072 struct amdgpu_iv_entry *entry) 1073 { 1074 u8 instance_id, queue_id; 1075 1076 DRM_ERROR("Illegal instruction in SDMA command stream\n"); 1077 instance_id = (entry->ring_id & 0x3) >> 0; 1078 queue_id = (entry->ring_id & 0xc) >> 2; 1079 1080 if (instance_id <= 1 && queue_id == 0) 1081 drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched); 1082 return 0; 1083 } 1084 1085 static int sdma_v2_4_set_clockgating_state(struct amdgpu_ip_block *ip_block, 1086 enum amd_clockgating_state state) 1087 { 1088 /* XXX handled via the smc on VI */ 1089 return 0; 1090 } 1091 1092 static int sdma_v2_4_set_powergating_state(struct amdgpu_ip_block *ip_block, 1093 enum amd_powergating_state state) 1094 { 1095 return 0; 1096 } 1097 1098 static const struct amd_ip_funcs sdma_v2_4_ip_funcs = { 1099 .name = "sdma_v2_4", 1100 .early_init = sdma_v2_4_early_init, 1101 .sw_init = sdma_v2_4_sw_init, 1102 .sw_fini = sdma_v2_4_sw_fini, 1103 .hw_init = sdma_v2_4_hw_init, 1104 .hw_fini = sdma_v2_4_hw_fini, 1105 .suspend = sdma_v2_4_suspend, 1106 .resume = sdma_v2_4_resume, 1107 .is_idle = sdma_v2_4_is_idle, 1108 .wait_for_idle = sdma_v2_4_wait_for_idle, 1109 .soft_reset = sdma_v2_4_soft_reset, 1110 .set_clockgating_state = sdma_v2_4_set_clockgating_state, 1111 .set_powergating_state = sdma_v2_4_set_powergating_state, 1112 }; 1113 1114 static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { 1115 .type = AMDGPU_RING_TYPE_SDMA, 1116 .align_mask = 0xf, 1117 .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 1118 .support_64bit_ptrs = false, 1119 .secure_submission_supported = true, 1120 .get_rptr = sdma_v2_4_ring_get_rptr, 1121 .get_wptr = sdma_v2_4_ring_get_wptr, 1122 .set_wptr = sdma_v2_4_ring_set_wptr, 1123 .emit_frame_size = 1124 6 + /* sdma_v2_4_ring_emit_hdp_flush */ 1125 3 + /* hdp invalidate */ 1126 6 + /* sdma_v2_4_ring_emit_pipeline_sync */ 1127 VI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* sdma_v2_4_ring_emit_vm_flush */ 1128 10 + 10 + 10, /* sdma_v2_4_ring_emit_fence x3 for user fence, vm fence */ 1129 .emit_ib_size = 7 + 6, /* sdma_v2_4_ring_emit_ib */ 1130 .emit_ib = sdma_v2_4_ring_emit_ib, 1131 .emit_fence = sdma_v2_4_ring_emit_fence, 1132 .emit_pipeline_sync = sdma_v2_4_ring_emit_pipeline_sync, 1133 .emit_vm_flush = sdma_v2_4_ring_emit_vm_flush, 1134 .emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush, 1135 .test_ring = sdma_v2_4_ring_test_ring, 1136 .test_ib = sdma_v2_4_ring_test_ib, 1137 .insert_nop = sdma_v2_4_ring_insert_nop, 1138 .pad_ib = sdma_v2_4_ring_pad_ib, 1139 .emit_wreg = sdma_v2_4_ring_emit_wreg, 1140 }; 1141 1142 static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev) 1143 { 1144 int i; 1145 1146 for (i = 0; i < adev->sdma.num_instances; i++) { 1147 adev->sdma.instance[i].ring.funcs = &sdma_v2_4_ring_funcs; 1148 adev->sdma.instance[i].ring.me = i; 1149 } 1150 } 1151 1152 static const struct amdgpu_irq_src_funcs sdma_v2_4_trap_irq_funcs = { 1153 .set = sdma_v2_4_set_trap_irq_state, 1154 .process = sdma_v2_4_process_trap_irq, 1155 }; 1156 1157 static const struct amdgpu_irq_src_funcs sdma_v2_4_illegal_inst_irq_funcs = { 1158 .process = sdma_v2_4_process_illegal_inst_irq, 1159 }; 1160 1161 static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev) 1162 { 1163 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; 1164 adev->sdma.trap_irq.funcs = &sdma_v2_4_trap_irq_funcs; 1165 adev->sdma.illegal_inst_irq.funcs = &sdma_v2_4_illegal_inst_irq_funcs; 1166 } 1167 1168 /** 1169 * sdma_v2_4_emit_copy_buffer - copy buffer using the sDMA engine 1170 * 1171 * @ib: indirect buffer to copy to 1172 * @src_offset: src GPU address 1173 * @dst_offset: dst GPU address 1174 * @byte_count: number of bytes to xfer 1175 * @copy_flags: unused 1176 * 1177 * Copy GPU buffers using the DMA engine (VI). 1178 * Used by the amdgpu ttm implementation to move pages if 1179 * registered as the asic copy callback. 1180 */ 1181 static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, 1182 uint64_t src_offset, 1183 uint64_t dst_offset, 1184 uint32_t byte_count, 1185 uint32_t copy_flags) 1186 { 1187 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | 1188 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 1189 ib->ptr[ib->length_dw++] = byte_count; 1190 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 1191 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 1192 ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); 1193 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1194 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1195 } 1196 1197 /** 1198 * sdma_v2_4_emit_fill_buffer - fill buffer using the sDMA engine 1199 * 1200 * @ib: indirect buffer to copy to 1201 * @src_data: value to write to buffer 1202 * @dst_offset: dst GPU address 1203 * @byte_count: number of bytes to xfer 1204 * 1205 * Fill GPU buffers using the DMA engine (VI). 1206 */ 1207 static void sdma_v2_4_emit_fill_buffer(struct amdgpu_ib *ib, 1208 uint32_t src_data, 1209 uint64_t dst_offset, 1210 uint32_t byte_count) 1211 { 1212 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL); 1213 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 1214 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 1215 ib->ptr[ib->length_dw++] = src_data; 1216 ib->ptr[ib->length_dw++] = byte_count; 1217 } 1218 1219 static const struct amdgpu_buffer_funcs sdma_v2_4_buffer_funcs = { 1220 .copy_max_bytes = 0x1fffff, 1221 .copy_num_dw = 7, 1222 .emit_copy_buffer = sdma_v2_4_emit_copy_buffer, 1223 1224 .fill_max_bytes = 0x1fffff, 1225 .fill_num_dw = 7, 1226 .emit_fill_buffer = sdma_v2_4_emit_fill_buffer, 1227 }; 1228 1229 static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev) 1230 { 1231 adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs; 1232 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; 1233 } 1234 1235 static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { 1236 .copy_pte_num_dw = 7, 1237 .copy_pte = sdma_v2_4_vm_copy_pte, 1238 1239 .write_pte = sdma_v2_4_vm_write_pte, 1240 .set_pte_pde = sdma_v2_4_vm_set_pte_pde, 1241 }; 1242 1243 static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) 1244 { 1245 unsigned i; 1246 1247 adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; 1248 for (i = 0; i < adev->sdma.num_instances; i++) { 1249 adev->vm_manager.vm_pte_scheds[i] = 1250 &adev->sdma.instance[i].ring.sched; 1251 } 1252 adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; 1253 } 1254 1255 const struct amdgpu_ip_block_version sdma_v2_4_ip_block = { 1256 .type = AMD_IP_BLOCK_TYPE_SDMA, 1257 .major = 2, 1258 .minor = 4, 1259 .rev = 0, 1260 .funcs = &sdma_v2_4_ip_funcs, 1261 }; 1262