1 /* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Alex Deucher 23 */ 24 25 #include "amdgpu.h" 26 #include "amdgpu_trace.h" 27 #include "si.h" 28 #include "sid.h" 29 30 #include "oss/oss_1_0_d.h" 31 #include "oss/oss_1_0_sh_mask.h" 32 const u32 sdma_offsets[SDMA_MAX_INSTANCE] = 33 { 34 DMA0_REGISTER_OFFSET, 35 DMA1_REGISTER_OFFSET 36 }; 37 38 static void si_dma_set_ring_funcs(struct amdgpu_device *adev); 39 static void si_dma_set_buffer_funcs(struct amdgpu_device *adev); 40 static void si_dma_set_irq_funcs(struct amdgpu_device *adev); 41 42 /** 43 * si_dma_ring_get_rptr - get the current read pointer 44 * 45 * @ring: amdgpu ring pointer 46 * 47 * Get the current rptr from the hardware (SI). 48 */ 49 static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring) 50 { 51 return *ring->rptr_cpu_addr; 52 } 53 54 /** 55 * si_dma_ring_get_wptr - get the current write pointer 56 * 57 * @ring: amdgpu ring pointer 58 * 59 * Get the current wptr from the hardware (SI). 60 */ 61 static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) 62 { 63 struct amdgpu_device *adev = ring->adev; 64 u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; 65 66 return (RREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2; 67 } 68 69 static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) 70 { 71 struct amdgpu_device *adev = ring->adev; 72 u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; 73 74 WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); 75 } 76 77 static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, 78 struct amdgpu_job *job, 79 struct amdgpu_ib *ib, 80 uint32_t flags) 81 { 82 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 83 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. 84 * Pad as necessary with NOPs. 85 */ 86 while ((lower_32_bits(ring->wptr) & 7) != 5) 87 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); 88 amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vmid, 0)); 89 amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); 90 amdgpu_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); 91 92 } 93 94 /** 95 * si_dma_ring_emit_fence - emit a fence on the DMA ring 96 * 97 * @ring: amdgpu ring pointer 98 * @addr: address 99 * @seq: sequence number 100 * @flags: fence related flags 101 * 102 * Add a DMA fence packet to the ring to write 103 * the fence seq number and DMA trap packet to generate 104 * an interrupt if needed (VI). 105 */ 106 static void si_dma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 107 unsigned flags) 108 { 109 110 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 111 /* write the fence */ 112 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0)); 113 amdgpu_ring_write(ring, addr & 0xfffffffc); 114 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff)); 115 amdgpu_ring_write(ring, seq); 116 /* optionally write high bits as well */ 117 if (write64bit) { 118 addr += 4; 119 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0)); 120 amdgpu_ring_write(ring, addr & 0xfffffffc); 121 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff)); 122 amdgpu_ring_write(ring, upper_32_bits(seq)); 123 } 124 /* generate an interrupt */ 125 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0, 0)); 126 } 127 128 static void si_dma_stop(struct amdgpu_device *adev) 129 { 130 u32 rb_cntl; 131 unsigned i; 132 133 for (i = 0; i < adev->sdma.num_instances; i++) { 134 /* dma0 */ 135 rb_cntl = RREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i]); 136 rb_cntl &= ~DMA_GFX_RB_CNTL__RB_ENABLE_MASK; 137 WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 138 } 139 } 140 141 static int si_dma_start(struct amdgpu_device *adev) 142 { 143 struct amdgpu_ring *ring; 144 u32 rb_cntl, dma_cntl, ib_cntl, rb_bufsz; 145 int i, r; 146 uint64_t rptr_addr; 147 148 for (i = 0; i < adev->sdma.num_instances; i++) { 149 ring = &adev->sdma.instance[i].ring; 150 151 WREG32(mmDMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0); 152 WREG32(mmDMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); 153 154 /* Set ring buffer size in dwords */ 155 rb_bufsz = order_base_2(ring->ring_size / 4); 156 rb_cntl = rb_bufsz << 1; 157 #ifdef __BIG_ENDIAN 158 rb_cntl |= DMA_GFX_RB_CNTL__RB_SWAP_ENABLE_MASK | DMA_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE_MASK; 159 #endif 160 WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 161 162 /* Initialize the ring buffer's read and write pointers */ 163 WREG32(mmDMA_GFX_RB_RPTR + sdma_offsets[i], 0); 164 WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], 0); 165 166 rptr_addr = ring->rptr_gpu_addr; 167 168 WREG32(mmDMA_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr)); 169 WREG32(mmDMA_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF); 170 171 rb_cntl |= DMA_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK; 172 173 WREG32(mmDMA_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); 174 175 /* enable DMA IBs */ 176 ib_cntl = DMA_GFX_IB_CNTL__IB_ENABLE_MASK | DMA_GFX_IB_CNTL__CMD_VMID_FORCE_MASK; 177 #ifdef __BIG_ENDIAN 178 ib_cntl |= DMA_GFX_IB_CNTL__IB_SWAP_ENABLE_MASK; 179 #endif 180 WREG32(mmDMA_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); 181 182 dma_cntl = RREG32(mmDMA_CNTL + sdma_offsets[i]); 183 dma_cntl &= ~DMA_CNTL__CTXEMPTY_INT_ENABLE_MASK; 184 WREG32(mmDMA_CNTL + sdma_offsets[i], dma_cntl); 185 186 ring->wptr = 0; 187 WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); 188 WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_GFX_RB_CNTL__RB_ENABLE_MASK); 189 190 r = amdgpu_ring_test_helper(ring); 191 if (r) 192 return r; 193 } 194 195 return 0; 196 } 197 198 /** 199 * si_dma_ring_test_ring - simple async dma engine test 200 * 201 * @ring: amdgpu_ring structure holding ring information 202 * 203 * Test the DMA engine by writing using it to write an 204 * value to memory. (VI). 205 * Returns 0 for success, error for failure. 206 */ 207 static int si_dma_ring_test_ring(struct amdgpu_ring *ring) 208 { 209 struct amdgpu_device *adev = ring->adev; 210 unsigned i; 211 unsigned index; 212 int r; 213 u32 tmp; 214 u64 gpu_addr; 215 216 r = amdgpu_device_wb_get(adev, &index); 217 if (r) 218 return r; 219 220 gpu_addr = adev->wb.gpu_addr + (index * 4); 221 tmp = 0xCAFEDEAD; 222 adev->wb.wb[index] = cpu_to_le32(tmp); 223 224 r = amdgpu_ring_alloc(ring, 4); 225 if (r) 226 goto error_free_wb; 227 228 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1)); 229 amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); 230 amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xff); 231 amdgpu_ring_write(ring, 0xDEADBEEF); 232 amdgpu_ring_commit(ring); 233 234 for (i = 0; i < adev->usec_timeout; i++) { 235 tmp = le32_to_cpu(adev->wb.wb[index]); 236 if (tmp == 0xDEADBEEF) 237 break; 238 udelay(1); 239 } 240 241 if (i >= adev->usec_timeout) 242 r = -ETIMEDOUT; 243 244 error_free_wb: 245 amdgpu_device_wb_free(adev, index); 246 return r; 247 } 248 249 /** 250 * si_dma_ring_test_ib - test an IB on the DMA engine 251 * 252 * @ring: amdgpu_ring structure holding ring information 253 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT 254 * 255 * Test a simple IB in the DMA ring (VI). 256 * Returns 0 on success, error on failure. 257 */ 258 static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) 259 { 260 struct amdgpu_device *adev = ring->adev; 261 struct amdgpu_ib ib; 262 struct dma_fence *f = NULL; 263 unsigned index; 264 u32 tmp = 0; 265 u64 gpu_addr; 266 long r; 267 268 r = amdgpu_device_wb_get(adev, &index); 269 if (r) 270 return r; 271 272 gpu_addr = adev->wb.gpu_addr + (index * 4); 273 tmp = 0xCAFEDEAD; 274 adev->wb.wb[index] = cpu_to_le32(tmp); 275 memset(&ib, 0, sizeof(ib)); 276 r = amdgpu_ib_get(adev, NULL, 256, 277 AMDGPU_IB_POOL_DIRECT, &ib); 278 if (r) 279 goto err0; 280 281 ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1); 282 ib.ptr[1] = lower_32_bits(gpu_addr); 283 ib.ptr[2] = upper_32_bits(gpu_addr) & 0xff; 284 ib.ptr[3] = 0xDEADBEEF; 285 ib.length_dw = 4; 286 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 287 if (r) 288 goto err1; 289 290 r = dma_fence_wait_timeout(f, false, timeout); 291 if (r == 0) { 292 r = -ETIMEDOUT; 293 goto err1; 294 } else if (r < 0) { 295 goto err1; 296 } 297 tmp = le32_to_cpu(adev->wb.wb[index]); 298 if (tmp == 0xDEADBEEF) 299 r = 0; 300 else 301 r = -EINVAL; 302 303 err1: 304 amdgpu_ib_free(&ib, NULL); 305 dma_fence_put(f); 306 err0: 307 amdgpu_device_wb_free(adev, index); 308 return r; 309 } 310 311 /** 312 * si_dma_vm_copy_pte - update PTEs by copying them from the GART 313 * 314 * @ib: indirect buffer to fill with commands 315 * @pe: addr of the page entry 316 * @src: src addr to copy from 317 * @count: number of page entries to update 318 * 319 * Update PTEs by copying them from the GART using DMA (SI). 320 */ 321 static void si_dma_vm_copy_pte(struct amdgpu_ib *ib, 322 uint64_t pe, uint64_t src, 323 unsigned count) 324 { 325 unsigned bytes = count * 8; 326 327 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 328 1, 0, 0, bytes); 329 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 330 ib->ptr[ib->length_dw++] = lower_32_bits(src); 331 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 332 ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; 333 } 334 335 /** 336 * si_dma_vm_write_pte - update PTEs by writing them manually 337 * 338 * @ib: indirect buffer to fill with commands 339 * @pe: addr of the page entry 340 * @value: dst addr to write into pe 341 * @count: number of page entries to update 342 * @incr: increase next addr by incr bytes 343 * 344 * Update PTEs by writing them manually using DMA (SI). 345 */ 346 static void si_dma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, 347 uint64_t value, unsigned count, 348 uint32_t incr) 349 { 350 unsigned ndw = count * 2; 351 352 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); 353 ib->ptr[ib->length_dw++] = lower_32_bits(pe); 354 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 355 for (; ndw > 0; ndw -= 2) { 356 ib->ptr[ib->length_dw++] = lower_32_bits(value); 357 ib->ptr[ib->length_dw++] = upper_32_bits(value); 358 value += incr; 359 } 360 } 361 362 /** 363 * si_dma_vm_set_pte_pde - update the page tables using sDMA 364 * 365 * @ib: indirect buffer to fill with commands 366 * @pe: addr of the page entry 367 * @addr: dst addr to write into pe 368 * @count: number of page entries to update 369 * @incr: increase next addr by incr bytes 370 * @flags: access flags 371 * 372 * Update the page tables using sDMA (CIK). 373 */ 374 static void si_dma_vm_set_pte_pde(struct amdgpu_ib *ib, 375 uint64_t pe, 376 uint64_t addr, unsigned count, 377 uint32_t incr, uint64_t flags) 378 { 379 uint64_t value; 380 unsigned ndw; 381 382 while (count) { 383 ndw = count * 2; 384 if (ndw > 0xFFFFE) 385 ndw = 0xFFFFE; 386 387 if (flags & AMDGPU_PTE_VALID) 388 value = addr; 389 else 390 value = 0; 391 392 /* for physically contiguous pages (vram) */ 393 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); 394 ib->ptr[ib->length_dw++] = pe; /* dst addr */ 395 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 396 ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ 397 ib->ptr[ib->length_dw++] = upper_32_bits(flags); 398 ib->ptr[ib->length_dw++] = value; /* value */ 399 ib->ptr[ib->length_dw++] = upper_32_bits(value); 400 ib->ptr[ib->length_dw++] = incr; /* increment size */ 401 ib->ptr[ib->length_dw++] = 0; 402 pe += ndw * 4; 403 addr += (ndw / 2) * incr; 404 count -= ndw / 2; 405 } 406 } 407 408 /** 409 * si_dma_ring_pad_ib - pad the IB to the required number of dw 410 * 411 * @ring: amdgpu_ring pointer 412 * @ib: indirect buffer to fill with padding 413 * 414 */ 415 static void si_dma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 416 { 417 while (ib->length_dw & 0x7) 418 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0); 419 } 420 421 /** 422 * si_dma_ring_emit_pipeline_sync - sync the pipeline 423 * 424 * @ring: amdgpu_ring pointer 425 * 426 * Make sure all previous operations are completed (CIK). 427 */ 428 static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 429 { 430 uint32_t seq = ring->fence_drv.sync_seq; 431 uint64_t addr = ring->fence_drv.gpu_addr; 432 433 /* wait for idle */ 434 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0) | 435 (1 << 27)); /* Poll memory */ 436 amdgpu_ring_write(ring, lower_32_bits(addr)); 437 amdgpu_ring_write(ring, (0xff << 16) | upper_32_bits(addr)); /* retry, addr_hi */ 438 amdgpu_ring_write(ring, 0xffffffff); /* mask */ 439 amdgpu_ring_write(ring, seq); /* value */ 440 amdgpu_ring_write(ring, (3 << 28) | 0x20); /* func(equal) | poll interval */ 441 } 442 443 /** 444 * si_dma_ring_emit_vm_flush - cik vm flush using sDMA 445 * 446 * @ring: amdgpu_ring pointer 447 * @vmid: vmid number to use 448 * @pd_addr: address 449 * 450 * Update the page table base and flush the VM TLB 451 * using sDMA (VI). 452 */ 453 static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring, 454 unsigned vmid, uint64_t pd_addr) 455 { 456 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 457 458 /* wait for invalidate to complete */ 459 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0)); 460 amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST); 461 amdgpu_ring_write(ring, 0xff << 16); /* retry */ 462 amdgpu_ring_write(ring, 1 << vmid); /* mask */ 463 amdgpu_ring_write(ring, 0); /* value */ 464 amdgpu_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */ 465 } 466 467 static void si_dma_ring_emit_wreg(struct amdgpu_ring *ring, 468 uint32_t reg, uint32_t val) 469 { 470 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); 471 amdgpu_ring_write(ring, (0xf << 16) | reg); 472 amdgpu_ring_write(ring, val); 473 } 474 475 static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { 476 .copy_pte_num_dw = 5, 477 .copy_pte = si_dma_vm_copy_pte, 478 479 .write_pte = si_dma_vm_write_pte, 480 .set_pte_pde = si_dma_vm_set_pte_pde, 481 }; 482 483 static int si_dma_early_init(struct amdgpu_ip_block *ip_block) 484 { 485 struct amdgpu_device *adev = ip_block->adev; 486 487 adev->sdma.num_instances = SDMA_MAX_INSTANCE; 488 489 si_dma_set_ring_funcs(adev); 490 si_dma_set_buffer_funcs(adev); 491 amdgpu_sdma_set_vm_pte_scheds(adev, &si_dma_vm_pte_funcs); 492 si_dma_set_irq_funcs(adev); 493 494 return 0; 495 } 496 497 static int si_dma_sw_init(struct amdgpu_ip_block *ip_block) 498 { 499 struct amdgpu_ring *ring; 500 int r, i; 501 struct amdgpu_device *adev = ip_block->adev; 502 503 /* DMA0 trap event */ 504 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224, 505 &adev->sdma.trap_irq); 506 if (r) 507 return r; 508 509 /* DMA1 trap event */ 510 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 244, 511 &adev->sdma.trap_irq); 512 if (r) 513 return r; 514 515 for (i = 0; i < adev->sdma.num_instances; i++) { 516 ring = &adev->sdma.instance[i].ring; 517 ring->ring_obj = NULL; 518 ring->use_doorbell = false; 519 sprintf(ring->name, "sdma%d", i); 520 r = amdgpu_ring_init(adev, ring, 1024, 521 &adev->sdma.trap_irq, 522 (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 : 523 AMDGPU_SDMA_IRQ_INSTANCE1, 524 AMDGPU_RING_PRIO_DEFAULT, NULL); 525 if (r) 526 return r; 527 } 528 529 return r; 530 } 531 532 static int si_dma_sw_fini(struct amdgpu_ip_block *ip_block) 533 { 534 struct amdgpu_device *adev = ip_block->adev; 535 int i; 536 537 for (i = 0; i < adev->sdma.num_instances; i++) 538 amdgpu_ring_fini(&adev->sdma.instance[i].ring); 539 540 return 0; 541 } 542 543 static int si_dma_hw_init(struct amdgpu_ip_block *ip_block) 544 { 545 struct amdgpu_device *adev = ip_block->adev; 546 547 return si_dma_start(adev); 548 } 549 550 static int si_dma_hw_fini(struct amdgpu_ip_block *ip_block) 551 { 552 si_dma_stop(ip_block->adev); 553 554 return 0; 555 } 556 557 static int si_dma_suspend(struct amdgpu_ip_block *ip_block) 558 { 559 return si_dma_hw_fini(ip_block); 560 } 561 562 static int si_dma_resume(struct amdgpu_ip_block *ip_block) 563 { 564 return si_dma_hw_init(ip_block); 565 } 566 567 static bool si_dma_is_idle(struct amdgpu_ip_block *ip_block) 568 { 569 struct amdgpu_device *adev = ip_block->adev; 570 571 u32 tmp = RREG32(mmSRBM_STATUS2); 572 573 if (tmp & (SRBM_STATUS2__DMA_BUSY_MASK | SRBM_STATUS2__DMA1_BUSY_MASK)) 574 return false; 575 576 return true; 577 } 578 579 static int si_dma_wait_for_idle(struct amdgpu_ip_block *ip_block) 580 { 581 unsigned i; 582 struct amdgpu_device *adev = ip_block->adev; 583 584 for (i = 0; i < adev->usec_timeout; i++) { 585 if (si_dma_is_idle(ip_block)) 586 return 0; 587 udelay(1); 588 } 589 return -ETIMEDOUT; 590 } 591 592 static int si_dma_soft_reset(struct amdgpu_ip_block *ip_block) 593 { 594 drm_info(adev_to_drm(ip_block->adev), "si_dma_soft_reset --- not implemented !!!!!!!\n"); 595 return 0; 596 } 597 598 static int si_dma_set_trap_irq_state(struct amdgpu_device *adev, 599 struct amdgpu_irq_src *src, 600 unsigned type, 601 enum amdgpu_interrupt_state state) 602 { 603 u32 sdma_cntl; 604 605 switch (type) { 606 case AMDGPU_SDMA_IRQ_INSTANCE0: 607 switch (state) { 608 case AMDGPU_IRQ_STATE_DISABLE: 609 sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET); 610 sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK; 611 WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); 612 break; 613 case AMDGPU_IRQ_STATE_ENABLE: 614 sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET); 615 sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK; 616 WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); 617 break; 618 default: 619 break; 620 } 621 break; 622 case AMDGPU_SDMA_IRQ_INSTANCE1: 623 switch (state) { 624 case AMDGPU_IRQ_STATE_DISABLE: 625 sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET); 626 sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK; 627 WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); 628 break; 629 case AMDGPU_IRQ_STATE_ENABLE: 630 sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET); 631 sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK; 632 WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); 633 break; 634 default: 635 break; 636 } 637 break; 638 default: 639 break; 640 } 641 return 0; 642 } 643 644 static int si_dma_process_trap_irq(struct amdgpu_device *adev, 645 struct amdgpu_irq_src *source, 646 struct amdgpu_iv_entry *entry) 647 { 648 if (entry->src_id == 224) 649 amdgpu_fence_process(&adev->sdma.instance[0].ring); 650 else 651 amdgpu_fence_process(&adev->sdma.instance[1].ring); 652 return 0; 653 } 654 655 static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block, 656 enum amd_clockgating_state state) 657 { 658 u32 orig, data, offset; 659 int i; 660 bool enable; 661 struct amdgpu_device *adev = ip_block->adev; 662 663 enable = (state == AMD_CG_STATE_GATE); 664 665 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { 666 for (i = 0; i < adev->sdma.num_instances; i++) { 667 if (i == 0) 668 offset = DMA0_REGISTER_OFFSET; 669 else 670 offset = DMA1_REGISTER_OFFSET; 671 orig = data = RREG32(mmDMA_POWER_CNTL + offset); 672 data &= ~DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 673 if (data != orig) 674 WREG32(mmDMA_POWER_CNTL + offset, data); 675 WREG32(mmDMA_CLK_CTRL + offset, 0x00000100); 676 } 677 } else { 678 for (i = 0; i < adev->sdma.num_instances; i++) { 679 if (i == 0) 680 offset = DMA0_REGISTER_OFFSET; 681 else 682 offset = DMA1_REGISTER_OFFSET; 683 orig = data = RREG32(mmDMA_POWER_CNTL + offset); 684 data |= DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 685 if (data != orig) 686 WREG32(mmDMA_POWER_CNTL + offset, data); 687 688 orig = data = RREG32(mmDMA_CLK_CTRL + offset); 689 data = 0xff000000; 690 if (data != orig) 691 WREG32(mmDMA_CLK_CTRL + offset, data); 692 } 693 } 694 695 return 0; 696 } 697 698 static int si_dma_set_powergating_state(struct amdgpu_ip_block *ip_block, 699 enum amd_powergating_state state) 700 { 701 u32 tmp; 702 703 struct amdgpu_device *adev = ip_block->adev; 704 705 WREG32(mmDMA_PGFSM_WRITE, 0x00002000); 706 WREG32(mmDMA_PGFSM_CONFIG, 0x100010ff); 707 708 for (tmp = 0; tmp < 5; tmp++) 709 WREG32(mmDMA_PGFSM_WRITE, 0); 710 711 return 0; 712 } 713 714 static const struct amd_ip_funcs si_dma_ip_funcs = { 715 .name = "si_dma", 716 .early_init = si_dma_early_init, 717 .sw_init = si_dma_sw_init, 718 .sw_fini = si_dma_sw_fini, 719 .hw_init = si_dma_hw_init, 720 .hw_fini = si_dma_hw_fini, 721 .suspend = si_dma_suspend, 722 .resume = si_dma_resume, 723 .is_idle = si_dma_is_idle, 724 .wait_for_idle = si_dma_wait_for_idle, 725 .soft_reset = si_dma_soft_reset, 726 .set_clockgating_state = si_dma_set_clockgating_state, 727 .set_powergating_state = si_dma_set_powergating_state, 728 }; 729 730 static const struct amdgpu_ring_funcs si_dma_ring_funcs = { 731 .type = AMDGPU_RING_TYPE_SDMA, 732 .align_mask = 0xf, 733 .nop = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0), 734 .support_64bit_ptrs = false, 735 .get_rptr = si_dma_ring_get_rptr, 736 .get_wptr = si_dma_ring_get_wptr, 737 .set_wptr = si_dma_ring_set_wptr, 738 .emit_frame_size = 739 3 + 3 + /* hdp flush / invalidate */ 740 6 + /* si_dma_ring_emit_pipeline_sync */ 741 SI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* si_dma_ring_emit_vm_flush */ 742 9 + 9 + 9, /* si_dma_ring_emit_fence x3 for user fence, vm fence */ 743 .emit_ib_size = 7 + 3, /* si_dma_ring_emit_ib */ 744 .emit_ib = si_dma_ring_emit_ib, 745 .emit_fence = si_dma_ring_emit_fence, 746 .emit_pipeline_sync = si_dma_ring_emit_pipeline_sync, 747 .emit_vm_flush = si_dma_ring_emit_vm_flush, 748 .test_ring = si_dma_ring_test_ring, 749 .test_ib = si_dma_ring_test_ib, 750 .insert_nop = amdgpu_ring_insert_nop, 751 .pad_ib = si_dma_ring_pad_ib, 752 .emit_wreg = si_dma_ring_emit_wreg, 753 }; 754 755 static void si_dma_set_ring_funcs(struct amdgpu_device *adev) 756 { 757 int i; 758 759 for (i = 0; i < adev->sdma.num_instances; i++) 760 adev->sdma.instance[i].ring.funcs = &si_dma_ring_funcs; 761 } 762 763 static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = { 764 .set = si_dma_set_trap_irq_state, 765 .process = si_dma_process_trap_irq, 766 }; 767 768 static void si_dma_set_irq_funcs(struct amdgpu_device *adev) 769 { 770 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; 771 adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs; 772 } 773 774 /** 775 * si_dma_emit_copy_buffer - copy buffer using the sDMA engine 776 * 777 * @ib: indirect buffer to copy to 778 * @src_offset: src GPU address 779 * @dst_offset: dst GPU address 780 * @byte_count: number of bytes to xfer 781 * @copy_flags: unused 782 * 783 * Copy GPU buffers using the DMA engine (VI). 784 * Used by the amdgpu ttm implementation to move pages if 785 * registered as the asic copy callback. 786 */ 787 static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib, 788 uint64_t src_offset, 789 uint64_t dst_offset, 790 uint32_t byte_count, 791 uint32_t copy_flags) 792 { 793 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 794 1, 0, 0, byte_count); 795 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 796 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 797 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) & 0xff; 798 ib->ptr[ib->length_dw++] = upper_32_bits(src_offset) & 0xff; 799 } 800 801 /** 802 * si_dma_emit_fill_buffer - fill buffer using the sDMA engine 803 * 804 * @ib: indirect buffer to copy to 805 * @src_data: value to write to buffer 806 * @dst_offset: dst GPU address 807 * @byte_count: number of bytes to xfer 808 * 809 * Fill GPU buffers using the DMA engine (VI). 810 */ 811 static void si_dma_emit_fill_buffer(struct amdgpu_ib *ib, 812 uint32_t src_data, 813 uint64_t dst_offset, 814 uint32_t byte_count) 815 { 816 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_CONSTANT_FILL, 817 0, 0, 0, byte_count / 4); 818 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 819 ib->ptr[ib->length_dw++] = src_data; 820 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) << 16; 821 } 822 823 824 static const struct amdgpu_buffer_funcs si_dma_buffer_funcs = { 825 .copy_max_bytes = 0xffff8, 826 .copy_num_dw = 5, 827 .emit_copy_buffer = si_dma_emit_copy_buffer, 828 829 .fill_max_bytes = 0xffff8, 830 .fill_num_dw = 4, 831 .emit_fill_buffer = si_dma_emit_fill_buffer, 832 }; 833 834 static void si_dma_set_buffer_funcs(struct amdgpu_device *adev) 835 { 836 adev->mman.buffer_funcs = &si_dma_buffer_funcs; 837 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; 838 } 839 840 const struct amdgpu_ip_block_version si_dma_ip_block = 841 { 842 .type = AMD_IP_BLOCK_TYPE_SDMA, 843 .major = 1, 844 .minor = 0, 845 .rev = 0, 846 .funcs = &si_dma_ip_funcs, 847 }; 848