1 /* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 25 #include "amdgpu.h" 26 #include "amdgpu_trace.h" 27 #include "si.h" 28 #include "sid.h" 29 30 #include "oss/oss_1_0_d.h" 31 #include "oss/oss_1_0_sh_mask.h" 32 const u32 sdma_offsets[SDMA_MAX_INSTANCE] = 33 { 34 DMA0_REGISTER_OFFSET, 35 DMA1_REGISTER_OFFSET 36 }; 37 38 static void si_dma_set_ring_funcs(struct amdgpu_device *adev); 39 static void si_dma_set_buffer_funcs(struct amdgpu_device *adev); 40 static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev); 41 static void si_dma_set_irq_funcs(struct amdgpu_device *adev); 42 43 /** 44 * si_dma_ring_get_rptr - get the current read pointer 45 * 46 * @ring: amdgpu ring pointer 47 * 48 * Get the current rptr from the hardware (SI). 49 */ 50 static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring) 51 { 52 return *ring->rptr_cpu_addr; 53 } 54 55 /** 56 * si_dma_ring_get_wptr - get the current write pointer 57 * 58 * @ring: amdgpu ring pointer 59 * 60 * Get the current wptr from the hardware (SI). 61 */ 62 static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) 63 { 64 struct amdgpu_device *adev = ring->adev; 65 u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; 66 67 return (RREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2; 68 } 69 70 static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) 71 { 72 struct amdgpu_device *adev = ring->adev; 73 u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; 74 75 WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); 76 } 77 78 static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, 79 struct amdgpu_job *job, 80 struct amdgpu_ib *ib, 81 uint32_t flags) 82 { 83 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 84 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. 85 * Pad as necessary with NOPs. 86 */ 87 while ((lower_32_bits(ring->wptr) & 7) != 5) 88 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); 89 amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vmid, 0)); 90 amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); 91 amdgpu_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); 92 93 } 94 95 /** 96 * si_dma_ring_emit_fence - emit a fence on the DMA ring 97 * 98 * @ring: amdgpu ring pointer 99 * @addr: address 100 * @seq: sequence number 101 * @flags: fence related flags 102 * 103 * Add a DMA fence packet to the ring to write 104 * the fence seq number and DMA trap packet to generate 105 * an interrupt if needed (VI). 106 */ 107 static void si_dma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 108 unsigned flags) 109 { 110 111 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 112 /* write the fence */ 113 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0)); 114 amdgpu_ring_write(ring, addr & 0xfffffffc); 115 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff)); 116 amdgpu_ring_write(ring, seq); 117 /* optionally write high bits as well */ 118 if (write64bit) { 119 addr += 4; 120 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0)); 121 amdgpu_ring_write(ring, addr & 0xfffffffc); 122 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff)); 123 amdgpu_ring_write(ring, upper_32_bits(seq)); 124 } 125 /* generate an interrupt */ 126 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0, 0)); 127 } 128 129 static void si_dma_stop(struct amdgpu_device *adev) 130 { 131 u32 rb_cntl; 132 unsigned i; 133 134 for (i = 0; i < adev->sdma.num_instances; i++) { 135 /* dma0 */ 136 rb_cntl = RREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i]); 137 rb_cntl &= ~DMA_GFX_RB_CNTL__RB_ENABLE_MASK; 138 WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 139 } 140 } 141 142 static int si_dma_start(struct amdgpu_device *adev) 143 { 144 struct amdgpu_ring *ring; 145 u32 rb_cntl, dma_cntl, ib_cntl, rb_bufsz; 146 int i, r; 147 uint64_t rptr_addr; 148 149 for (i = 0; i < adev->sdma.num_instances; i++) { 150 ring = &adev->sdma.instance[i].ring; 151 152 WREG32(mmDMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0); 153 WREG32(mmDMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); 154 155 /* Set ring buffer size in dwords */ 156 rb_bufsz = order_base_2(ring->ring_size / 4); 157 rb_cntl = rb_bufsz << 1; 158 #ifdef __BIG_ENDIAN 159 rb_cntl |= DMA_GFX_RB_CNTL__RB_SWAP_ENABLE_MASK | DMA_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE_MASK; 160 #endif 161 WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 162 163 /* Initialize the ring buffer's read and write pointers */ 164 WREG32(mmDMA_GFX_RB_RPTR + sdma_offsets[i], 0); 165 WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], 0); 166 167 rptr_addr = ring->rptr_gpu_addr; 168 169 WREG32(mmDMA_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr)); 170 WREG32(mmDMA_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF); 171 172 rb_cntl |= DMA_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK; 173 174 WREG32(mmDMA_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); 175 176 /* enable DMA IBs */ 177 ib_cntl = DMA_GFX_IB_CNTL__IB_ENABLE_MASK | DMA_GFX_IB_CNTL__CMD_VMID_FORCE_MASK; 178 #ifdef __BIG_ENDIAN 179 ib_cntl |= DMA_GFX_IB_CNTL__IB_SWAP_ENABLE_MASK; 180 #endif 181 WREG32(mmDMA_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); 182 183 dma_cntl = RREG32(mmDMA_CNTL + sdma_offsets[i]); 184 dma_cntl &= ~DMA_CNTL__CTXEMPTY_INT_ENABLE_MASK; 185 WREG32(mmDMA_CNTL + sdma_offsets[i], dma_cntl); 186 187 ring->wptr = 0; 188 WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); 189 WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_GFX_RB_CNTL__RB_ENABLE_MASK); 190 191 r = amdgpu_ring_test_helper(ring); 192 if (r) 193 return r; 194 } 195 196 return 0; 197 } 198 199 /** 200 * si_dma_ring_test_ring - simple async dma engine test 201 * 202 * @ring: amdgpu_ring structure holding ring information 203 * 204 * Test the DMA engine by writing using it to write an 205 * value to memory. (VI). 206 * Returns 0 for success, error for failure. 207 */ 208 static int si_dma_ring_test_ring(struct amdgpu_ring *ring) 209 { 210 struct amdgpu_device *adev = ring->adev; 211 unsigned i; 212 unsigned index; 213 int r; 214 u32 tmp; 215 u64 gpu_addr; 216 217 r = amdgpu_device_wb_get(adev, &index); 218 if (r) 219 return r; 220 221 gpu_addr = adev->wb.gpu_addr + (index * 4); 222 tmp = 0xCAFEDEAD; 223 adev->wb.wb[index] = cpu_to_le32(tmp); 224 225 r = amdgpu_ring_alloc(ring, 4); 226 if (r) 227 goto error_free_wb; 228 229 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1)); 230 amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); 231 amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xff); 232 amdgpu_ring_write(ring, 0xDEADBEEF); 233 amdgpu_ring_commit(ring); 234 235 for (i = 0; i < adev->usec_timeout; i++) { 236 tmp = le32_to_cpu(adev->wb.wb[index]); 237 if (tmp == 0xDEADBEEF) 238 break; 239 udelay(1); 240 } 241 242 if (i >= adev->usec_timeout) 243 r = -ETIMEDOUT; 244 245 error_free_wb: 246 amdgpu_device_wb_free(adev, index); 247 return r; 248 } 249 250 /** 251 * si_dma_ring_test_ib - test an IB on the DMA engine 252 * 253 * @ring: amdgpu_ring structure holding ring information 254 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT 255 * 256 * Test a simple IB in the DMA ring (VI). 257 * Returns 0 on success, error on failure. 258 */ 259 static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) 260 { 261 struct amdgpu_device *adev = ring->adev; 262 struct amdgpu_ib ib; 263 struct dma_fence *f = NULL; 264 unsigned index; 265 u32 tmp = 0; 266 u64 gpu_addr; 267 long r; 268 269 r = amdgpu_device_wb_get(adev, &index); 270 if (r) 271 return r; 272 273 gpu_addr = adev->wb.gpu_addr + (index * 4); 274 tmp = 0xCAFEDEAD; 275 adev->wb.wb[index] = cpu_to_le32(tmp); 276 memset(&ib, 0, sizeof(ib)); 277 r = amdgpu_ib_get(adev, NULL, 256, 278 AMDGPU_IB_POOL_DIRECT, &ib); 279 if (r) 280 goto err0; 281 282 ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1); 283 ib.ptr[1] = lower_32_bits(gpu_addr); 284 ib.ptr[2] = upper_32_bits(gpu_addr) & 0xff; 285 ib.ptr[3] = 0xDEADBEEF; 286 ib.length_dw = 4; 287 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 288 if (r) 289 goto err1; 290 291 r = dma_fence_wait_timeout(f, false, timeout); 292 if (r == 0) { 293 r = -ETIMEDOUT; 294 goto err1; 295 } else if (r < 0) { 296 goto err1; 297 } 298 tmp = le32_to_cpu(adev->wb.wb[index]); 299 if (tmp == 0xDEADBEEF) 300 r = 0; 301 else 302 r = -EINVAL; 303 304 err1: 305 amdgpu_ib_free(&ib, NULL); 306 dma_fence_put(f); 307 err0: 308 amdgpu_device_wb_free(adev, index); 309 return r; 310 } 311 312 /** 313 * si_dma_vm_copy_pte - update PTEs by copying them from the GART 314 * 315 * @ib: indirect buffer to fill with commands 316 * @pe: addr of the page entry 317 * @src: src addr to copy from 318 * @count: number of page entries to update 319 * 320 * Update PTEs by copying them from the GART using DMA (SI). 321 */ 322 static void si_dma_vm_copy_pte(struct amdgpu_ib *ib, 323 uint64_t pe, uint64_t src, 324 unsigned count) 325 { 326 unsigned bytes = count * 8; 327 328 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 329 1, 0, 0, bytes); 330 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 331 ib->ptr[ib->length_dw++] = lower_32_bits(src); 332 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 333 ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; 334 } 335 336 /** 337 * si_dma_vm_write_pte - update PTEs by writing them manually 338 * 339 * @ib: indirect buffer to fill with commands 340 * @pe: addr of the page entry 341 * @value: dst addr to write into pe 342 * @count: number of page entries to update 343 * @incr: increase next addr by incr bytes 344 * 345 * Update PTEs by writing them manually using DMA (SI). 346 */ 347 static void si_dma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, 348 uint64_t value, unsigned count, 349 uint32_t incr) 350 { 351 unsigned ndw = count * 2; 352 353 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); 354 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 355 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 356 for (; ndw > 0; ndw -= 2) { 357 ib->ptr[ib->length_dw++] = lower_32_bits(value); 358 ib->ptr[ib->length_dw++] = upper_32_bits(value); 359 value += incr; 360 } 361 } 362 363 /** 364 * si_dma_vm_set_pte_pde - update the page tables using sDMA 365 * 366 * @ib: indirect buffer to fill with commands 367 * @pe: addr of the page entry 368 * @addr: dst addr to write into pe 369 * @count: number of page entries to update 370 * @incr: increase next addr by incr bytes 371 * @flags: access flags 372 * 373 * Update the page tables using sDMA (CIK). 374 */ 375 static void si_dma_vm_set_pte_pde(struct amdgpu_ib *ib, 376 uint64_t pe, 377 uint64_t addr, unsigned count, 378 uint32_t incr, uint64_t flags) 379 { 380 uint64_t value; 381 unsigned ndw; 382 383 while (count) { 384 ndw = count * 2; 385 if (ndw > 0xFFFFE) 386 ndw = 0xFFFFE; 387 388 if (flags & AMDGPU_PTE_VALID) 389 value = addr; 390 else 391 value = 0; 392 393 /* for physically contiguous pages (vram) */ 394 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); 395 ib->ptr[ib->length_dw++] = pe; /* dst addr */ 396 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 397 ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ 398 ib->ptr[ib->length_dw++] = upper_32_bits(flags); 399 ib->ptr[ib->length_dw++] = value; /* value */ 400 ib->ptr[ib->length_dw++] = upper_32_bits(value); 401 ib->ptr[ib->length_dw++] = incr; /* increment size */ 402 ib->ptr[ib->length_dw++] = 0; 403 pe += ndw * 4; 404 addr += (ndw / 2) * incr; 405 count -= ndw / 2; 406 } 407 } 408 409 /** 410 * si_dma_ring_pad_ib - pad the IB to the required number of dw 411 * 412 * @ring: amdgpu_ring pointer 413 * @ib: indirect buffer to fill with padding 414 * 415 */ 416 static void si_dma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 417 { 418 while (ib->length_dw & 0x7) 419 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0); 420 } 421 422 /** 423 * si_dma_ring_emit_pipeline_sync - sync the pipeline 424 * 425 * @ring: amdgpu_ring pointer 426 * 427 * Make sure all previous operations are completed (CIK). 428 */ 429 static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 430 { 431 uint32_t seq = ring->fence_drv.sync_seq; 432 uint64_t addr = ring->fence_drv.gpu_addr; 433 434 /* wait for idle */ 435 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0) | 436 (1 << 27)); /* Poll memory */ 437 amdgpu_ring_write(ring, lower_32_bits(addr)); 438 amdgpu_ring_write(ring, (0xff << 16) | upper_32_bits(addr)); /* retry, addr_hi */ 439 amdgpu_ring_write(ring, 0xffffffff); /* mask */ 440 amdgpu_ring_write(ring, seq); /* value */ 441 amdgpu_ring_write(ring, (3 << 28) | 0x20); /* func(equal) | poll interval */ 442 } 443 444 /** 445 * si_dma_ring_emit_vm_flush - cik vm flush using sDMA 446 * 447 * @ring: amdgpu_ring pointer 448 * @vmid: vmid number to use 449 * @pd_addr: address 450 * 451 * Update the page table base and flush the VM TLB 452 * using sDMA (VI). 453 */ 454 static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring, 455 unsigned vmid, uint64_t pd_addr) 456 { 457 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 458 459 /* wait for invalidate to complete */ 460 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0)); 461 amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST); 462 amdgpu_ring_write(ring, 0xff << 16); /* retry */ 463 amdgpu_ring_write(ring, 1 << vmid); /* mask */ 464 amdgpu_ring_write(ring, 0); /* value */ 465 amdgpu_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */ 466 } 467 468 static void si_dma_ring_emit_wreg(struct amdgpu_ring *ring, 469 uint32_t reg, uint32_t val) 470 { 471 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); 472 amdgpu_ring_write(ring, (0xf << 16) | reg); 473 amdgpu_ring_write(ring, val); 474 } 475 476 static int si_dma_early_init(struct amdgpu_ip_block *ip_block) 477 { 478 struct amdgpu_device *adev = ip_block->adev; 479 480 adev->sdma.num_instances = SDMA_MAX_INSTANCE; 481 482 si_dma_set_ring_funcs(adev); 483 si_dma_set_buffer_funcs(adev); 484 si_dma_set_vm_pte_funcs(adev); 485 si_dma_set_irq_funcs(adev); 486 487 return 0; 488 } 489 490 static int si_dma_sw_init(struct amdgpu_ip_block *ip_block) 491 { 492 struct amdgpu_ring *ring; 493 int r, i; 494 struct amdgpu_device *adev = ip_block->adev; 495 496 /* DMA0 trap event */ 497 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224, 498 &adev->sdma.trap_irq); 499 if (r) 500 return r; 501 502 /* DMA1 trap event */ 503 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 244, 504 &adev->sdma.trap_irq); 505 if (r) 506 return r; 507 508 for (i = 0; i < adev->sdma.num_instances; i++) { 509 ring = &adev->sdma.instance[i].ring; 510 ring->ring_obj = NULL; 511 ring->use_doorbell = false; 512 sprintf(ring->name, "sdma%d", i); 513 r = amdgpu_ring_init(adev, ring, 1024, 514 &adev->sdma.trap_irq, 515 (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 : 516 AMDGPU_SDMA_IRQ_INSTANCE1, 517 AMDGPU_RING_PRIO_DEFAULT, NULL); 518 if (r) 519 return r; 520 } 521 522 return r; 523 } 524 525 static int si_dma_sw_fini(struct amdgpu_ip_block *ip_block) 526 { 527 struct amdgpu_device *adev = ip_block->adev; 528 int i; 529 530 for (i = 0; i < adev->sdma.num_instances; i++) 531 amdgpu_ring_fini(&adev->sdma.instance[i].ring); 532 533 return 0; 534 } 535 536 static int si_dma_hw_init(struct amdgpu_ip_block *ip_block) 537 { 538 struct amdgpu_device *adev = ip_block->adev; 539 540 return si_dma_start(adev); 541 } 542 543 static int si_dma_hw_fini(struct amdgpu_ip_block *ip_block) 544 { 545 si_dma_stop(ip_block->adev); 546 547 return 0; 548 } 549 550 static int si_dma_suspend(struct amdgpu_ip_block *ip_block) 551 { 552 return si_dma_hw_fini(ip_block); 553 } 554 555 static int si_dma_resume(struct amdgpu_ip_block *ip_block) 556 { 557 return si_dma_hw_init(ip_block); 558 } 559 560 static bool si_dma_is_idle(struct amdgpu_ip_block *ip_block) 561 { 562 struct amdgpu_device *adev = ip_block->adev; 563 564 u32 tmp = RREG32(mmSRBM_STATUS2); 565 566 if (tmp & (SRBM_STATUS2__DMA_BUSY_MASK | SRBM_STATUS2__DMA1_BUSY_MASK)) 567 return false; 568 569 return true; 570 } 571 572 static int si_dma_wait_for_idle(struct amdgpu_ip_block *ip_block) 573 { 574 unsigned i; 575 struct amdgpu_device *adev = ip_block->adev; 576 577 for (i = 0; i < adev->usec_timeout; i++) { 578 if (si_dma_is_idle(ip_block)) 579 return 0; 580 udelay(1); 581 } 582 return -ETIMEDOUT; 583 } 584 585 static int si_dma_soft_reset(struct amdgpu_ip_block *ip_block) 586 { 587 DRM_INFO("si_dma_soft_reset --- not implemented !!!!!!!\n"); 588 return 0; 589 } 590 591 static int si_dma_set_trap_irq_state(struct amdgpu_device *adev, 592 struct amdgpu_irq_src *src, 593 unsigned type, 594 enum amdgpu_interrupt_state state) 595 { 596 u32 sdma_cntl; 597 598 switch (type) { 599 case AMDGPU_SDMA_IRQ_INSTANCE0: 600 switch (state) { 601 case AMDGPU_IRQ_STATE_DISABLE: 602 sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET); 603 sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK; 604 WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); 605 break; 606 case AMDGPU_IRQ_STATE_ENABLE: 607 sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET); 608 sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK; 609 WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); 610 break; 611 default: 612 break; 613 } 614 break; 615 case AMDGPU_SDMA_IRQ_INSTANCE1: 616 switch (state) { 617 case AMDGPU_IRQ_STATE_DISABLE: 618 sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET); 619 sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK; 620 WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); 621 break; 622 case AMDGPU_IRQ_STATE_ENABLE: 623 sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET); 624 sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK; 625 WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); 626 break; 627 default: 628 break; 629 } 630 break; 631 default: 632 break; 633 } 634 return 0; 635 } 636 637 static int si_dma_process_trap_irq(struct amdgpu_device *adev, 638 struct amdgpu_irq_src *source, 639 struct amdgpu_iv_entry *entry) 640 { 641 if (entry->src_id == 224) 642 amdgpu_fence_process(&adev->sdma.instance[0].ring); 643 else 644 amdgpu_fence_process(&adev->sdma.instance[1].ring); 645 return 0; 646 } 647 648 static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block, 649 enum amd_clockgating_state state) 650 { 651 u32 orig, data, offset; 652 int i; 653 bool enable; 654 struct amdgpu_device *adev = ip_block->adev; 655 656 enable = (state == AMD_CG_STATE_GATE); 657 658 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { 659 for (i = 0; i < adev->sdma.num_instances; i++) { 660 if (i == 0) 661 offset = DMA0_REGISTER_OFFSET; 662 else 663 offset = DMA1_REGISTER_OFFSET; 664 orig = data = RREG32(mmDMA_POWER_CNTL + offset); 665 data &= ~DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 666 if (data != orig) 667 WREG32(mmDMA_POWER_CNTL + offset, data); 668 WREG32(mmDMA_CLK_CTRL + offset, 0x00000100); 669 } 670 } else { 671 for (i = 0; i < adev->sdma.num_instances; i++) { 672 if (i == 0) 673 offset = DMA0_REGISTER_OFFSET; 674 else 675 offset = DMA1_REGISTER_OFFSET; 676 orig = data = RREG32(mmDMA_POWER_CNTL + offset); 677 data |= DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 678 if (data != orig) 679 WREG32(mmDMA_POWER_CNTL + offset, data); 680 681 orig = data = RREG32(mmDMA_CLK_CTRL + offset); 682 data = 0xff000000; 683 if (data != orig) 684 WREG32(mmDMA_CLK_CTRL + offset, data); 685 } 686 } 687 688 return 0; 689 } 690 691 static int si_dma_set_powergating_state(struct amdgpu_ip_block *ip_block, 692 enum amd_powergating_state state) 693 { 694 u32 tmp; 695 696 struct amdgpu_device *adev = ip_block->adev; 697 698 WREG32(mmDMA_PGFSM_WRITE, 0x00002000); 699 WREG32(mmDMA_PGFSM_CONFIG, 0x100010ff); 700 701 for (tmp = 0; tmp < 5; tmp++) 702 WREG32(mmDMA_PGFSM_WRITE, 0); 703 704 return 0; 705 } 706 707 static const struct amd_ip_funcs si_dma_ip_funcs = { 708 .name = "si_dma", 709 .early_init = si_dma_early_init, 710 .sw_init = si_dma_sw_init, 711 .sw_fini = si_dma_sw_fini, 712 .hw_init = si_dma_hw_init, 713 .hw_fini = si_dma_hw_fini, 714 .suspend = si_dma_suspend, 715 .resume = si_dma_resume, 716 .is_idle = si_dma_is_idle, 717 .wait_for_idle = si_dma_wait_for_idle, 718 .soft_reset = si_dma_soft_reset, 719 .set_clockgating_state = si_dma_set_clockgating_state, 720 .set_powergating_state = si_dma_set_powergating_state, 721 }; 722 723 static const struct amdgpu_ring_funcs si_dma_ring_funcs = { 724 .type = AMDGPU_RING_TYPE_SDMA, 725 .align_mask = 0xf, 726 .nop = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0), 727 .support_64bit_ptrs = false, 728 .get_rptr = si_dma_ring_get_rptr, 729 .get_wptr = si_dma_ring_get_wptr, 730 .set_wptr = si_dma_ring_set_wptr, 731 .emit_frame_size = 732 3 + 3 + /* hdp flush / invalidate */ 733 6 + /* si_dma_ring_emit_pipeline_sync */ 734 SI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* si_dma_ring_emit_vm_flush */ 735 9 + 9 + 9, /* si_dma_ring_emit_fence x3 for user fence, vm fence */ 736 .emit_ib_size = 7 + 3, /* si_dma_ring_emit_ib */ 737 .emit_ib = si_dma_ring_emit_ib, 738 .emit_fence = si_dma_ring_emit_fence, 739 .emit_pipeline_sync = si_dma_ring_emit_pipeline_sync, 740 .emit_vm_flush = si_dma_ring_emit_vm_flush, 741 .test_ring = si_dma_ring_test_ring, 742 .test_ib = si_dma_ring_test_ib, 743 .insert_nop = amdgpu_ring_insert_nop, 744 .pad_ib = si_dma_ring_pad_ib, 745 .emit_wreg = si_dma_ring_emit_wreg, 746 }; 747 748 static void si_dma_set_ring_funcs(struct amdgpu_device *adev) 749 { 750 int i; 751 752 for (i = 0; i < adev->sdma.num_instances; i++) 753 adev->sdma.instance[i].ring.funcs = &si_dma_ring_funcs; 754 } 755 756 static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = { 757 .set = si_dma_set_trap_irq_state, 758 .process = si_dma_process_trap_irq, 759 }; 760 761 static void si_dma_set_irq_funcs(struct amdgpu_device *adev) 762 { 763 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; 764 adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs; 765 } 766 767 /** 768 * si_dma_emit_copy_buffer - copy buffer using the sDMA engine 769 * 770 * @ib: indirect buffer to copy to 771 * @src_offset: src GPU address 772 * @dst_offset: dst GPU address 773 * @byte_count: number of bytes to xfer 774 * @copy_flags: unused 775 * 776 * Copy GPU buffers using the DMA engine (VI). 777 * Used by the amdgpu ttm implementation to move pages if 778 * registered as the asic copy callback. 779 */ 780 static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib, 781 uint64_t src_offset, 782 uint64_t dst_offset, 783 uint32_t byte_count, 784 uint32_t copy_flags) 785 { 786 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 787 1, 0, 0, byte_count); 788 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 789 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 790 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) & 0xff; 791 ib->ptr[ib->length_dw++] = upper_32_bits(src_offset) & 0xff; 792 } 793 794 /** 795 * si_dma_emit_fill_buffer - fill buffer using the sDMA engine 796 * 797 * @ib: indirect buffer to copy to 798 * @src_data: value to write to buffer 799 * @dst_offset: dst GPU address 800 * @byte_count: number of bytes to xfer 801 * 802 * Fill GPU buffers using the DMA engine (VI). 803 */ 804 static void si_dma_emit_fill_buffer(struct amdgpu_ib *ib, 805 uint32_t src_data, 806 uint64_t dst_offset, 807 uint32_t byte_count) 808 { 809 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_CONSTANT_FILL, 810 0, 0, 0, byte_count / 4); 811 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 812 ib->ptr[ib->length_dw++] = src_data; 813 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) << 16; 814 } 815 816 817 static const struct amdgpu_buffer_funcs si_dma_buffer_funcs = { 818 .copy_max_bytes = 0xffff8, 819 .copy_num_dw = 5, 820 .emit_copy_buffer = si_dma_emit_copy_buffer, 821 822 .fill_max_bytes = 0xffff8, 823 .fill_num_dw = 4, 824 .emit_fill_buffer = si_dma_emit_fill_buffer, 825 }; 826 827 static void si_dma_set_buffer_funcs(struct amdgpu_device *adev) 828 { 829 adev->mman.buffer_funcs = &si_dma_buffer_funcs; 830 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; 831 } 832 833 static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { 834 .copy_pte_num_dw = 5, 835 .copy_pte = si_dma_vm_copy_pte, 836 837 .write_pte = si_dma_vm_write_pte, 838 .set_pte_pde = si_dma_vm_set_pte_pde, 839 }; 840 841 static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev) 842 { 843 unsigned i; 844 845 adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs; 846 for (i = 0; i < adev->sdma.num_instances; i++) { 847 adev->vm_manager.vm_pte_scheds[i] = 848 &adev->sdma.instance[i].ring.sched; 849 } 850 adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; 851 } 852 853 const struct amdgpu_ip_block_version si_dma_ip_block = 854 { 855 .type = AMD_IP_BLOCK_TYPE_SDMA, 856 .major = 1, 857 .minor = 0, 858 .rev = 0, 859 .funcs = &si_dma_ip_funcs, 860 }; 861