1 /* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 25 #include "amdgpu.h" 26 #include "amdgpu_trace.h" 27 #include "si.h" 28 #include "sid.h" 29 30 #include "oss/oss_1_0_d.h" 31 #include "oss/oss_1_0_sh_mask.h" 32 const u32 sdma_offsets[SDMA_MAX_INSTANCE] = 33 { 34 DMA0_REGISTER_OFFSET, 35 DMA1_REGISTER_OFFSET 36 }; 37 38 static void si_dma_set_ring_funcs(struct amdgpu_device *adev); 39 static void si_dma_set_buffer_funcs(struct amdgpu_device *adev); 40 static void si_dma_set_irq_funcs(struct amdgpu_device *adev); 41 42 /** 43 * si_dma_ring_get_rptr - get the current read pointer 44 * 45 * @ring: amdgpu ring pointer 46 * 47 * Get the current rptr from the hardware (SI). 48 */ 49 static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring) 50 { 51 return *ring->rptr_cpu_addr; 52 } 53 54 /** 55 * si_dma_ring_get_wptr - get the current write pointer 56 * 57 * @ring: amdgpu ring pointer 58 * 59 * Get the current wptr from the hardware (SI). 60 */ 61 static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) 62 { 63 struct amdgpu_device *adev = ring->adev; 64 u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; 65 66 return (RREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2; 67 } 68 69 static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) 70 { 71 struct amdgpu_device *adev = ring->adev; 72 u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; 73 74 WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); 75 } 76 77 static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, 78 struct amdgpu_job *job, 79 struct amdgpu_ib *ib, 80 uint32_t flags) 81 { 82 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 83 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. 84 * Pad as necessary with NOPs. 85 */ 86 while ((lower_32_bits(ring->wptr) & 7) != 5) 87 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); 88 amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vmid, 0)); 89 amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); 90 amdgpu_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); 91 92 } 93 94 /** 95 * si_dma_ring_emit_fence - emit a fence on the DMA ring 96 * 97 * @ring: amdgpu ring pointer 98 * @addr: address 99 * @seq: sequence number 100 * @flags: fence related flags 101 * 102 * Add a DMA fence packet to the ring to write 103 * the fence seq number and DMA trap packet to generate 104 * an interrupt if needed (VI). 105 */ 106 static void si_dma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 107 unsigned flags) 108 { 109 110 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 111 /* write the fence */ 112 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0)); 113 amdgpu_ring_write(ring, addr & 0xfffffffc); 114 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff)); 115 amdgpu_ring_write(ring, seq); 116 /* optionally write high bits as well */ 117 if (write64bit) { 118 addr += 4; 119 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0)); 120 amdgpu_ring_write(ring, addr & 0xfffffffc); 121 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff)); 122 amdgpu_ring_write(ring, upper_32_bits(seq)); 123 } 124 /* generate an interrupt */ 125 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0, 0)); 126 } 127 128 static void si_dma_stop(struct amdgpu_device *adev) 129 { 130 u32 rb_cntl; 131 unsigned i; 132 133 for (i = 0; i < adev->sdma.num_instances; i++) { 134 /* dma0 */ 135 rb_cntl = RREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i]); 136 rb_cntl &= ~DMA_GFX_RB_CNTL__RB_ENABLE_MASK; 137 WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 138 } 139 } 140 141 static int si_dma_start(struct amdgpu_device *adev) 142 { 143 struct amdgpu_ring *ring; 144 u32 rb_cntl, dma_cntl, ib_cntl, rb_bufsz; 145 int i, r; 146 uint64_t rptr_addr; 147 148 for (i = 0; i < adev->sdma.num_instances; i++) { 149 ring = &adev->sdma.instance[i].ring; 150 151 WREG32(mmDMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0); 152 WREG32(mmDMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); 153 154 /* Set ring buffer size in dwords */ 155 rb_bufsz = order_base_2(ring->ring_size / 4); 156 rb_cntl = rb_bufsz << 1; 157 #ifdef __BIG_ENDIAN 158 rb_cntl |= DMA_GFX_RB_CNTL__RB_SWAP_ENABLE_MASK | DMA_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE_MASK; 159 #endif 160 WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 161 162 /* Initialize the ring buffer's read and write pointers */ 163 WREG32(mmDMA_GFX_RB_RPTR + sdma_offsets[i], 0); 164 WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], 0); 165 166 rptr_addr = ring->rptr_gpu_addr; 167 168 WREG32(mmDMA_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr)); 169 WREG32(mmDMA_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF); 170 171 rb_cntl |= DMA_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK; 172 173 WREG32(mmDMA_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); 174 175 /* enable DMA IBs */ 176 ib_cntl = DMA_GFX_IB_CNTL__IB_ENABLE_MASK | DMA_GFX_IB_CNTL__CMD_VMID_FORCE_MASK; 177 #ifdef __BIG_ENDIAN 178 ib_cntl |= DMA_GFX_IB_CNTL__IB_SWAP_ENABLE_MASK; 179 #endif 180 WREG32(mmDMA_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); 181 182 dma_cntl = RREG32(mmDMA_CNTL + sdma_offsets[i]); 183 dma_cntl &= ~DMA_CNTL__CTXEMPTY_INT_ENABLE_MASK; 184 WREG32(mmDMA_CNTL + sdma_offsets[i], dma_cntl); 185 186 ring->wptr = 0; 187 WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); 188 WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_GFX_RB_CNTL__RB_ENABLE_MASK); 189 190 r = amdgpu_ring_test_helper(ring); 191 if (r) 192 return r; 193 } 194 195 return 0; 196 } 197 198 /** 199 * si_dma_ring_test_ring - simple async dma engine test 200 * 201 * @ring: amdgpu_ring structure holding ring information 202 * 203 * Test the DMA engine by writing using it to write an 204 * value to memory. (VI). 205 * Returns 0 for success, error for failure. 206 */ 207 static int si_dma_ring_test_ring(struct amdgpu_ring *ring) 208 { 209 struct amdgpu_device *adev = ring->adev; 210 unsigned i; 211 unsigned index; 212 int r; 213 u32 tmp; 214 u64 gpu_addr; 215 216 r = amdgpu_device_wb_get(adev, &index); 217 if (r) 218 return r; 219 220 gpu_addr = adev->wb.gpu_addr + (index * 4); 221 tmp = 0xCAFEDEAD; 222 adev->wb.wb[index] = cpu_to_le32(tmp); 223 224 r = amdgpu_ring_alloc(ring, 4); 225 if (r) 226 goto error_free_wb; 227 228 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1)); 229 amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); 230 amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xff); 231 amdgpu_ring_write(ring, 0xDEADBEEF); 232 amdgpu_ring_commit(ring); 233 234 for (i = 0; i < adev->usec_timeout; i++) { 235 tmp = le32_to_cpu(adev->wb.wb[index]); 236 if (tmp == 0xDEADBEEF) 237 break; 238 udelay(1); 239 } 240 241 if (i >= adev->usec_timeout) 242 r = -ETIMEDOUT; 243 244 error_free_wb: 245 amdgpu_device_wb_free(adev, index); 246 return r; 247 } 248 249 /** 250 * si_dma_ring_test_ib - test an IB on the DMA engine 251 * 252 * @ring: amdgpu_ring structure holding ring information 253 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT 254 * 255 * Test a simple IB in the DMA ring (VI). 256 * Returns 0 on success, error on failure. 257 */ 258 static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) 259 { 260 struct amdgpu_device *adev = ring->adev; 261 struct amdgpu_ib ib; 262 struct dma_fence *f = NULL; 263 unsigned index; 264 u32 tmp = 0; 265 u64 gpu_addr; 266 long r; 267 268 r = amdgpu_device_wb_get(adev, &index); 269 if (r) 270 return r; 271 272 gpu_addr = adev->wb.gpu_addr + (index * 4); 273 tmp = 0xCAFEDEAD; 274 adev->wb.wb[index] = cpu_to_le32(tmp); 275 memset(&ib, 0, sizeof(ib)); 276 r = amdgpu_ib_get(adev, NULL, 256, 277 AMDGPU_IB_POOL_DIRECT, &ib); 278 if (r) 279 goto err0; 280 281 ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1); 282 ib.ptr[1] = lower_32_bits(gpu_addr); 283 ib.ptr[2] = upper_32_bits(gpu_addr) & 0xff; 284 ib.ptr[3] = 0xDEADBEEF; 285 ib.length_dw = 4; 286 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 287 if (r) 288 goto err1; 289 290 r = dma_fence_wait_timeout(f, false, timeout); 291 if (r == 0) { 292 r = -ETIMEDOUT; 293 goto err1; 294 } else if (r < 0) { 295 goto err1; 296 } 297 tmp = le32_to_cpu(adev->wb.wb[index]); 298 if (tmp == 0xDEADBEEF) 299 r = 0; 300 else 301 r = -EINVAL; 302 303 err1: 304 amdgpu_ib_free(&ib, NULL); 305 dma_fence_put(f); 306 err0: 307 amdgpu_device_wb_free(adev, index); 308 return r; 309 } 310 311 /** 312 * si_dma_vm_copy_pte - update PTEs by copying them from the GART 313 * 314 * @ib: indirect buffer to fill with commands 315 * @pe: addr of the page entry 316 * @src: src addr to copy from 317 * @count: number of page entries to update 318 * 319 * Update PTEs by copying them from the GART using DMA (SI). 320 */ 321 static void si_dma_vm_copy_pte(struct amdgpu_ib *ib, 322 uint64_t pe, uint64_t src, 323 unsigned count) 324 { 325 unsigned bytes = count * 8; 326 327 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 328 1, 0, 0, bytes); 329 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 330 ib->ptr[ib->length_dw++] = lower_32_bits(src); 331 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 332 ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; 333 } 334 335 /** 336 * si_dma_vm_write_pte - update PTEs by writing them manually 337 * 338 * @ib: indirect buffer to fill with commands 339 * @pe: addr of the page entry 340 * @value: dst addr to write into pe 341 * @count: number of page entries to update 342 * @incr: increase next addr by incr bytes 343 * 344 * Update PTEs by writing them manually using DMA (SI). 345 */ 346 static void si_dma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, 347 uint64_t value, unsigned count, 348 uint32_t incr) 349 { 350 unsigned ndw = count * 2; 351 352 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); 353 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 354 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 355 for (; ndw > 0; ndw -= 2) { 356 ib->ptr[ib->length_dw++] = lower_32_bits(value); 357 ib->ptr[ib->length_dw++] = upper_32_bits(value); 358 value += incr; 359 } 360 } 361 362 /** 363 * si_dma_vm_set_pte_pde - update the page tables using sDMA 364 * 365 * @ib: indirect buffer to fill with commands 366 * @pe: addr of the page entry 367 * @addr: dst addr to write into pe 368 * @count: number of page entries to update 369 * @incr: increase next addr by incr bytes 370 * @flags: access flags 371 * 372 * Update the page tables using sDMA (CIK). 373 */ 374 static void si_dma_vm_set_pte_pde(struct amdgpu_ib *ib, 375 uint64_t pe, 376 uint64_t addr, unsigned count, 377 uint32_t incr, uint64_t flags) 378 { 379 uint64_t value; 380 unsigned ndw; 381 382 while (count) { 383 ndw = count * 2; 384 if (ndw > 0xFFFFE) 385 ndw = 0xFFFFE; 386 387 if (flags & AMDGPU_PTE_VALID) 388 value = addr; 389 else 390 value = 0; 391 392 /* for physically contiguous pages (vram) */ 393 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); 394 ib->ptr[ib->length_dw++] = pe; /* dst addr */ 395 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 396 ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ 397 ib->ptr[ib->length_dw++] = upper_32_bits(flags); 398 ib->ptr[ib->length_dw++] = value; /* value */ 399 ib->ptr[ib->length_dw++] = upper_32_bits(value); 400 ib->ptr[ib->length_dw++] = incr; /* increment size */ 401 ib->ptr[ib->length_dw++] = 0; 402 pe += ndw * 4; 403 addr += (ndw / 2) * incr; 404 count -= ndw / 2; 405 } 406 } 407 408 /** 409 * si_dma_ring_pad_ib - pad the IB to the required number of dw 410 * 411 * @ring: amdgpu_ring pointer 412 * @ib: indirect buffer to fill with padding 413 * 414 */ 415 static void si_dma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 416 { 417 while (ib->length_dw & 0x7) 418 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0); 419 } 420 421 /** 422 * si_dma_ring_emit_pipeline_sync - sync the pipeline 423 * 424 * @ring: amdgpu_ring pointer 425 * 426 * Make sure all previous operations are completed (CIK). 427 */ 428 static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 429 { 430 uint32_t seq = ring->fence_drv.sync_seq; 431 uint64_t addr = ring->fence_drv.gpu_addr; 432 433 /* wait for idle */ 434 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0) | 435 (1 << 27)); /* Poll memory */ 436 amdgpu_ring_write(ring, lower_32_bits(addr)); 437 amdgpu_ring_write(ring, (0xff << 16) | upper_32_bits(addr)); /* retry, addr_hi */ 438 amdgpu_ring_write(ring, 0xffffffff); /* mask */ 439 amdgpu_ring_write(ring, seq); /* value */ 440 amdgpu_ring_write(ring, (3 << 28) | 0x20); /* func(equal) | poll interval */ 441 } 442 443 /** 444 * si_dma_ring_emit_vm_flush - cik vm flush using sDMA 445 * 446 * @ring: amdgpu_ring pointer 447 * @vmid: vmid number to use 448 * @pd_addr: address 449 * 450 * Update the page table base and flush the VM TLB 451 * using sDMA (VI). 452 */ 453 static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring, 454 unsigned vmid, uint64_t pd_addr) 455 { 456 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 457 458 /* wait for invalidate to complete */ 459 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0)); 460 amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST); 461 amdgpu_ring_write(ring, 0xff << 16); /* retry */ 462 amdgpu_ring_write(ring, 1 << vmid); /* mask */ 463 amdgpu_ring_write(ring, 0); /* value */ 464 amdgpu_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */ 465 } 466 467 static void si_dma_ring_emit_wreg(struct amdgpu_ring *ring, 468 uint32_t reg, uint32_t val) 469 { 470 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); 471 amdgpu_ring_write(ring, (0xf << 16) | reg); 472 amdgpu_ring_write(ring, val); 473 } 474 475 static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { 476 .copy_pte_num_dw = 5, 477 .copy_pte = si_dma_vm_copy_pte, 478 479 .write_pte = si_dma_vm_write_pte, 480 .set_pte_pde = si_dma_vm_set_pte_pde, 481 }; 482 483 static int si_dma_early_init(struct amdgpu_ip_block *ip_block) 484 { 485 struct amdgpu_device *adev = ip_block->adev; 486 487 adev->sdma.num_instances = SDMA_MAX_INSTANCE; 488 489 si_dma_set_ring_funcs(adev); 490 amdgpu_sdma_set_vm_pte_scheds(adev, &si_dma_vm_pte_funcs); 491 si_dma_set_irq_funcs(adev); 492 493 return 0; 494 } 495 496 static int si_dma_sw_init(struct amdgpu_ip_block *ip_block) 497 { 498 struct amdgpu_ring *ring; 499 int r, i; 500 struct amdgpu_device *adev = ip_block->adev; 501 502 /* DMA0 trap event */ 503 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224, 504 &adev->sdma.trap_irq); 505 if (r) 506 return r; 507 508 /* DMA1 trap event */ 509 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 244, 510 &adev->sdma.trap_irq); 511 if (r) 512 return r; 513 514 for (i = 0; i < adev->sdma.num_instances; i++) { 515 ring = &adev->sdma.instance[i].ring; 516 ring->ring_obj = NULL; 517 ring->use_doorbell = false; 518 sprintf(ring->name, "sdma%d", i); 519 r = amdgpu_ring_init(adev, ring, 1024, 520 &adev->sdma.trap_irq, 521 (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 : 522 AMDGPU_SDMA_IRQ_INSTANCE1, 523 AMDGPU_RING_PRIO_DEFAULT, NULL); 524 if (r) 525 return r; 526 } 527 528 return r; 529 } 530 531 static int si_dma_sw_fini(struct amdgpu_ip_block *ip_block) 532 { 533 struct amdgpu_device *adev = ip_block->adev; 534 int i; 535 536 for (i = 0; i < adev->sdma.num_instances; i++) 537 amdgpu_ring_fini(&adev->sdma.instance[i].ring); 538 539 return 0; 540 } 541 542 static int si_dma_hw_init(struct amdgpu_ip_block *ip_block) 543 { 544 struct amdgpu_device *adev = ip_block->adev; 545 int r; 546 547 r = si_dma_start(adev); 548 if (r) 549 return r; 550 si_dma_set_buffer_funcs(adev); 551 552 return 0; 553 } 554 555 static int si_dma_hw_fini(struct amdgpu_ip_block *ip_block) 556 { 557 si_dma_stop(ip_block->adev); 558 559 return 0; 560 } 561 562 static int si_dma_suspend(struct amdgpu_ip_block *ip_block) 563 { 564 return si_dma_hw_fini(ip_block); 565 } 566 567 static int si_dma_resume(struct amdgpu_ip_block *ip_block) 568 { 569 return si_dma_hw_init(ip_block); 570 } 571 572 static bool si_dma_is_idle(struct amdgpu_ip_block *ip_block) 573 { 574 struct amdgpu_device *adev = ip_block->adev; 575 576 u32 tmp = RREG32(mmSRBM_STATUS2); 577 578 if (tmp & (SRBM_STATUS2__DMA_BUSY_MASK | SRBM_STATUS2__DMA1_BUSY_MASK)) 579 return false; 580 581 return true; 582 } 583 584 static int si_dma_wait_for_idle(struct amdgpu_ip_block *ip_block) 585 { 586 unsigned i; 587 struct amdgpu_device *adev = ip_block->adev; 588 589 for (i = 0; i < adev->usec_timeout; i++) { 590 if (si_dma_is_idle(ip_block)) 591 return 0; 592 udelay(1); 593 } 594 return -ETIMEDOUT; 595 } 596 597 static int si_dma_soft_reset(struct amdgpu_ip_block *ip_block) 598 { 599 drm_info(adev_to_drm(ip_block->adev), "si_dma_soft_reset --- not implemented !!!!!!!\n"); 600 return 0; 601 } 602 603 static int si_dma_set_trap_irq_state(struct amdgpu_device *adev, 604 struct amdgpu_irq_src *src, 605 unsigned type, 606 enum amdgpu_interrupt_state state) 607 { 608 u32 sdma_cntl; 609 610 switch (type) { 611 case AMDGPU_SDMA_IRQ_INSTANCE0: 612 switch (state) { 613 case AMDGPU_IRQ_STATE_DISABLE: 614 sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET); 615 sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK; 616 WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); 617 break; 618 case AMDGPU_IRQ_STATE_ENABLE: 619 sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET); 620 sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK; 621 WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); 622 break; 623 default: 624 break; 625 } 626 break; 627 case AMDGPU_SDMA_IRQ_INSTANCE1: 628 switch (state) { 629 case AMDGPU_IRQ_STATE_DISABLE: 630 sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET); 631 sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK; 632 WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); 633 break; 634 case AMDGPU_IRQ_STATE_ENABLE: 635 sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET); 636 sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK; 637 WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); 638 break; 639 default: 640 break; 641 } 642 break; 643 default: 644 break; 645 } 646 return 0; 647 } 648 649 static int si_dma_process_trap_irq(struct amdgpu_device *adev, 650 struct amdgpu_irq_src *source, 651 struct amdgpu_iv_entry *entry) 652 { 653 if (entry->src_id == 224) 654 amdgpu_fence_process(&adev->sdma.instance[0].ring); 655 else 656 amdgpu_fence_process(&adev->sdma.instance[1].ring); 657 return 0; 658 } 659 660 static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block, 661 enum amd_clockgating_state state) 662 { 663 u32 orig, data, offset; 664 int i; 665 bool enable; 666 struct amdgpu_device *adev = ip_block->adev; 667 668 enable = (state == AMD_CG_STATE_GATE); 669 670 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { 671 for (i = 0; i < adev->sdma.num_instances; i++) { 672 if (i == 0) 673 offset = DMA0_REGISTER_OFFSET; 674 else 675 offset = DMA1_REGISTER_OFFSET; 676 orig = data = RREG32(mmDMA_POWER_CNTL + offset); 677 data &= ~DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 678 if (data != orig) 679 WREG32(mmDMA_POWER_CNTL + offset, data); 680 WREG32(mmDMA_CLK_CTRL + offset, 0x00000100); 681 } 682 } else { 683 for (i = 0; i < adev->sdma.num_instances; i++) { 684 if (i == 0) 685 offset = DMA0_REGISTER_OFFSET; 686 else 687 offset = DMA1_REGISTER_OFFSET; 688 orig = data = RREG32(mmDMA_POWER_CNTL + offset); 689 data |= DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 690 if (data != orig) 691 WREG32(mmDMA_POWER_CNTL + offset, data); 692 693 orig = data = RREG32(mmDMA_CLK_CTRL + offset); 694 data = 0xff000000; 695 if (data != orig) 696 WREG32(mmDMA_CLK_CTRL + offset, data); 697 } 698 } 699 700 return 0; 701 } 702 703 static int si_dma_set_powergating_state(struct amdgpu_ip_block *ip_block, 704 enum amd_powergating_state state) 705 { 706 u32 tmp; 707 708 struct amdgpu_device *adev = ip_block->adev; 709 710 WREG32(mmDMA_PGFSM_WRITE, 0x00002000); 711 WREG32(mmDMA_PGFSM_CONFIG, 0x100010ff); 712 713 for (tmp = 0; tmp < 5; tmp++) 714 WREG32(mmDMA_PGFSM_WRITE, 0); 715 716 return 0; 717 } 718 719 static const struct amd_ip_funcs si_dma_ip_funcs = { 720 .name = "si_dma", 721 .early_init = si_dma_early_init, 722 .sw_init = si_dma_sw_init, 723 .sw_fini = si_dma_sw_fini, 724 .hw_init = si_dma_hw_init, 725 .hw_fini = si_dma_hw_fini, 726 .suspend = si_dma_suspend, 727 .resume = si_dma_resume, 728 .is_idle = si_dma_is_idle, 729 .wait_for_idle = si_dma_wait_for_idle, 730 .soft_reset = si_dma_soft_reset, 731 .set_clockgating_state = si_dma_set_clockgating_state, 732 .set_powergating_state = si_dma_set_powergating_state, 733 }; 734 735 static const struct amdgpu_ring_funcs si_dma_ring_funcs = { 736 .type = AMDGPU_RING_TYPE_SDMA, 737 .align_mask = 0xf, 738 .nop = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0), 739 .support_64bit_ptrs = false, 740 .get_rptr = si_dma_ring_get_rptr, 741 .get_wptr = si_dma_ring_get_wptr, 742 .set_wptr = si_dma_ring_set_wptr, 743 .emit_frame_size = 744 3 + 3 + /* hdp flush / invalidate */ 745 6 + /* si_dma_ring_emit_pipeline_sync */ 746 SI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* si_dma_ring_emit_vm_flush */ 747 9 + 9 + 9, /* si_dma_ring_emit_fence x3 for user fence, vm fence */ 748 .emit_ib_size = 7 + 3, /* si_dma_ring_emit_ib */ 749 .emit_ib = si_dma_ring_emit_ib, 750 .emit_fence = si_dma_ring_emit_fence, 751 .emit_pipeline_sync = si_dma_ring_emit_pipeline_sync, 752 .emit_vm_flush = si_dma_ring_emit_vm_flush, 753 .test_ring = si_dma_ring_test_ring, 754 .test_ib = si_dma_ring_test_ib, 755 .insert_nop = amdgpu_ring_insert_nop, 756 .pad_ib = si_dma_ring_pad_ib, 757 .emit_wreg = si_dma_ring_emit_wreg, 758 }; 759 760 static void si_dma_set_ring_funcs(struct amdgpu_device *adev) 761 { 762 int i; 763 764 for (i = 0; i < adev->sdma.num_instances; i++) 765 adev->sdma.instance[i].ring.funcs = &si_dma_ring_funcs; 766 } 767 768 static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = { 769 .set = si_dma_set_trap_irq_state, 770 .process = si_dma_process_trap_irq, 771 }; 772 773 static void si_dma_set_irq_funcs(struct amdgpu_device *adev) 774 { 775 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; 776 adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs; 777 } 778 779 /** 780 * si_dma_emit_copy_buffer - copy buffer using the sDMA engine 781 * 782 * @ib: indirect buffer to copy to 783 * @src_offset: src GPU address 784 * @dst_offset: dst GPU address 785 * @byte_count: number of bytes to xfer 786 * @copy_flags: unused 787 * 788 * Copy GPU buffers using the DMA engine (VI). 789 * Used by the amdgpu ttm implementation to move pages if 790 * registered as the asic copy callback. 791 */ 792 static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib, 793 uint64_t src_offset, 794 uint64_t dst_offset, 795 uint32_t byte_count, 796 uint32_t copy_flags) 797 { 798 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 799 1, 0, 0, byte_count); 800 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 801 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 802 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) & 0xff; 803 ib->ptr[ib->length_dw++] = upper_32_bits(src_offset) & 0xff; 804 } 805 806 /** 807 * si_dma_emit_fill_buffer - fill buffer using the sDMA engine 808 * 809 * @ib: indirect buffer to copy to 810 * @src_data: value to write to buffer 811 * @dst_offset: dst GPU address 812 * @byte_count: number of bytes to xfer 813 * 814 * Fill GPU buffers using the DMA engine (VI). 815 */ 816 static void si_dma_emit_fill_buffer(struct amdgpu_ib *ib, 817 uint32_t src_data, 818 uint64_t dst_offset, 819 uint32_t byte_count) 820 { 821 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_CONSTANT_FILL, 822 0, 0, 0, byte_count / 4); 823 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 824 ib->ptr[ib->length_dw++] = src_data; 825 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) << 16; 826 } 827 828 829 static const struct amdgpu_buffer_funcs si_dma_buffer_funcs = { 830 .copy_max_bytes = 0xffff8, 831 .copy_num_dw = 5, 832 .emit_copy_buffer = si_dma_emit_copy_buffer, 833 834 .fill_max_bytes = 0xffff8, 835 .fill_num_dw = 4, 836 .emit_fill_buffer = si_dma_emit_fill_buffer, 837 }; 838 839 static void si_dma_set_buffer_funcs(struct amdgpu_device *adev) 840 { 841 amdgpu_sdma_set_buffer_funcs_scheds(adev, &si_dma_buffer_funcs); 842 } 843 844 const struct amdgpu_ip_block_version si_dma_ip_block = 845 { 846 .type = AMD_IP_BLOCK_TYPE_SDMA, 847 .major = 1, 848 .minor = 0, 849 .rev = 0, 850 .funcs = &si_dma_ip_funcs, 851 }; 852