1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 * Christian König 28 */ 29 #include <linux/seq_file.h> 30 #include <linux/slab.h> 31 #include <drm/drmP.h> 32 #include <drm/amdgpu_drm.h> 33 #include "amdgpu.h" 34 #include "atom.h" 35 36 /* 37 * Rings 38 * Most engines on the GPU are fed via ring buffers. Ring 39 * buffers are areas of GPU accessible memory that the host 40 * writes commands into and the GPU reads commands out of. 41 * There is a rptr (read pointer) that determines where the 42 * GPU is currently reading, and a wptr (write pointer) 43 * which determines where the host has written. When the 44 * pointers are equal, the ring is idle. When the host 45 * writes commands to the ring buffer, it increments the 46 * wptr. The GPU then starts fetching commands and executes 47 * them until the pointers are equal again. 48 */ 49 static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); 50 51 /** 52 * amdgpu_ring_free_size - update the free size 53 * 54 * @adev: amdgpu_device pointer 55 * @ring: amdgpu_ring structure holding ring information 56 * 57 * Update the free dw slots in the ring buffer (all asics). 58 */ 59 void amdgpu_ring_free_size(struct amdgpu_ring *ring) 60 { 61 uint32_t rptr = amdgpu_ring_get_rptr(ring); 62 63 /* This works because ring_size is a power of 2 */ 64 ring->ring_free_dw = rptr + (ring->ring_size / 4); 65 ring->ring_free_dw -= ring->wptr; 66 ring->ring_free_dw &= ring->ptr_mask; 67 if (!ring->ring_free_dw) { 68 /* this is an empty ring */ 69 ring->ring_free_dw = ring->ring_size / 4; 70 } 71 } 72 73 /** 74 * amdgpu_ring_alloc - allocate space on the ring buffer 75 * 76 * @adev: amdgpu_device pointer 77 * @ring: amdgpu_ring structure holding ring information 78 * @ndw: number of dwords to allocate in the ring buffer 79 * 80 * Allocate @ndw dwords in the ring buffer (all asics). 81 * Returns 0 on success, error on failure. 82 */ 83 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw) 84 { 85 int r; 86 87 /* make sure we aren't trying to allocate more space than there is on the ring */ 88 if (ndw > (ring->ring_size / 4)) 89 return -ENOMEM; 90 /* Align requested size with padding so unlock_commit can 91 * pad safely */ 92 amdgpu_ring_free_size(ring); 93 ndw = (ndw + ring->align_mask) & ~ring->align_mask; 94 while (ndw > (ring->ring_free_dw - 1)) { 95 amdgpu_ring_free_size(ring); 96 if (ndw < ring->ring_free_dw) { 97 break; 98 } 99 r = amdgpu_fence_wait_next(ring); 100 if (r) 101 return r; 102 } 103 ring->count_dw = ndw; 104 ring->wptr_old = ring->wptr; 105 return 0; 106 } 107 108 /** 109 * amdgpu_ring_lock - lock the ring and allocate space on it 110 * 111 * @adev: amdgpu_device pointer 112 * @ring: amdgpu_ring structure holding ring information 113 * @ndw: number of dwords to allocate in the ring buffer 114 * 115 * Lock the ring and allocate @ndw dwords in the ring buffer 116 * (all asics). 117 * Returns 0 on success, error on failure. 118 */ 119 int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw) 120 { 121 int r; 122 123 mutex_lock(ring->ring_lock); 124 r = amdgpu_ring_alloc(ring, ndw); 125 if (r) { 126 mutex_unlock(ring->ring_lock); 127 return r; 128 } 129 return 0; 130 } 131 132 /** amdgpu_ring_insert_nop - insert NOP packets 133 * 134 * @ring: amdgpu_ring structure holding ring information 135 * @count: the number of NOP packets to insert 136 * 137 * This is the generic insert_nop function for rings except SDMA 138 */ 139 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 140 { 141 int i; 142 143 for (i = 0; i < count; i++) 144 amdgpu_ring_write(ring, ring->nop); 145 } 146 147 /** 148 * amdgpu_ring_commit - tell the GPU to execute the new 149 * commands on the ring buffer 150 * 151 * @adev: amdgpu_device pointer 152 * @ring: amdgpu_ring structure holding ring information 153 * 154 * Update the wptr (write pointer) to tell the GPU to 155 * execute new commands on the ring buffer (all asics). 156 */ 157 void amdgpu_ring_commit(struct amdgpu_ring *ring) 158 { 159 uint32_t count; 160 161 /* We pad to match fetch size */ 162 count = ring->align_mask + 1 - (ring->wptr & ring->align_mask); 163 count %= ring->align_mask + 1; 164 ring->funcs->insert_nop(ring, count); 165 166 mb(); 167 amdgpu_ring_set_wptr(ring); 168 } 169 170 /** 171 * amdgpu_ring_unlock_commit - tell the GPU to execute the new 172 * commands on the ring buffer and unlock it 173 * 174 * @ring: amdgpu_ring structure holding ring information 175 * 176 * Call amdgpu_ring_commit() then unlock the ring (all asics). 177 */ 178 void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring) 179 { 180 amdgpu_ring_commit(ring); 181 mutex_unlock(ring->ring_lock); 182 } 183 184 /** 185 * amdgpu_ring_undo - reset the wptr 186 * 187 * @ring: amdgpu_ring structure holding ring information 188 * 189 * Reset the driver's copy of the wptr (all asics). 190 */ 191 void amdgpu_ring_undo(struct amdgpu_ring *ring) 192 { 193 ring->wptr = ring->wptr_old; 194 } 195 196 /** 197 * amdgpu_ring_unlock_undo - reset the wptr and unlock the ring 198 * 199 * @ring: amdgpu_ring structure holding ring information 200 * 201 * Call amdgpu_ring_undo() then unlock the ring (all asics). 202 */ 203 void amdgpu_ring_unlock_undo(struct amdgpu_ring *ring) 204 { 205 amdgpu_ring_undo(ring); 206 mutex_unlock(ring->ring_lock); 207 } 208 209 /** 210 * amdgpu_ring_backup - Back up the content of a ring 211 * 212 * @ring: the ring we want to back up 213 * 214 * Saves all unprocessed commits from a ring, returns the number of dwords saved. 215 */ 216 unsigned amdgpu_ring_backup(struct amdgpu_ring *ring, 217 uint32_t **data) 218 { 219 unsigned size, ptr, i; 220 221 /* just in case lock the ring */ 222 mutex_lock(ring->ring_lock); 223 *data = NULL; 224 225 if (ring->ring_obj == NULL) { 226 mutex_unlock(ring->ring_lock); 227 return 0; 228 } 229 230 /* it doesn't make sense to save anything if all fences are signaled */ 231 if (!amdgpu_fence_count_emitted(ring)) { 232 mutex_unlock(ring->ring_lock); 233 return 0; 234 } 235 236 ptr = le32_to_cpu(*ring->next_rptr_cpu_addr); 237 238 size = ring->wptr + (ring->ring_size / 4); 239 size -= ptr; 240 size &= ring->ptr_mask; 241 if (size == 0) { 242 mutex_unlock(ring->ring_lock); 243 return 0; 244 } 245 246 /* and then save the content of the ring */ 247 *data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); 248 if (!*data) { 249 mutex_unlock(ring->ring_lock); 250 return 0; 251 } 252 for (i = 0; i < size; ++i) { 253 (*data)[i] = ring->ring[ptr++]; 254 ptr &= ring->ptr_mask; 255 } 256 257 mutex_unlock(ring->ring_lock); 258 return size; 259 } 260 261 /** 262 * amdgpu_ring_restore - append saved commands to the ring again 263 * 264 * @ring: ring to append commands to 265 * @size: number of dwords we want to write 266 * @data: saved commands 267 * 268 * Allocates space on the ring and restore the previously saved commands. 269 */ 270 int amdgpu_ring_restore(struct amdgpu_ring *ring, 271 unsigned size, uint32_t *data) 272 { 273 int i, r; 274 275 if (!size || !data) 276 return 0; 277 278 /* restore the saved ring content */ 279 r = amdgpu_ring_lock(ring, size); 280 if (r) 281 return r; 282 283 for (i = 0; i < size; ++i) { 284 amdgpu_ring_write(ring, data[i]); 285 } 286 287 amdgpu_ring_unlock_commit(ring); 288 kfree(data); 289 return 0; 290 } 291 292 /** 293 * amdgpu_ring_init - init driver ring struct. 294 * 295 * @adev: amdgpu_device pointer 296 * @ring: amdgpu_ring structure holding ring information 297 * @ring_size: size of the ring 298 * @nop: nop packet for this ring 299 * 300 * Initialize the driver information for the selected ring (all asics). 301 * Returns 0 on success, error on failure. 302 */ 303 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, 304 unsigned ring_size, u32 nop, u32 align_mask, 305 struct amdgpu_irq_src *irq_src, unsigned irq_type, 306 enum amdgpu_ring_type ring_type) 307 { 308 u32 rb_bufsz; 309 int r; 310 311 if (ring->adev == NULL) { 312 if (adev->num_rings >= AMDGPU_MAX_RINGS) 313 return -EINVAL; 314 315 ring->adev = adev; 316 ring->idx = adev->num_rings++; 317 adev->rings[ring->idx] = ring; 318 r = amdgpu_fence_driver_init_ring(ring); 319 if (r) 320 return r; 321 } 322 323 r = amdgpu_wb_get(adev, &ring->rptr_offs); 324 if (r) { 325 dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); 326 return r; 327 } 328 329 r = amdgpu_wb_get(adev, &ring->wptr_offs); 330 if (r) { 331 dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); 332 return r; 333 } 334 335 r = amdgpu_wb_get(adev, &ring->fence_offs); 336 if (r) { 337 dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r); 338 return r; 339 } 340 341 r = amdgpu_wb_get(adev, &ring->next_rptr_offs); 342 if (r) { 343 dev_err(adev->dev, "(%d) ring next_rptr wb alloc failed\n", r); 344 return r; 345 } 346 ring->next_rptr_gpu_addr = adev->wb.gpu_addr + (ring->next_rptr_offs * 4); 347 ring->next_rptr_cpu_addr = &adev->wb.wb[ring->next_rptr_offs]; 348 spin_lock_init(&ring->fence_lock); 349 r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type); 350 if (r) { 351 dev_err(adev->dev, "failed initializing fences (%d).\n", r); 352 return r; 353 } 354 355 ring->ring_lock = &adev->ring_lock; 356 /* Align ring size */ 357 rb_bufsz = order_base_2(ring_size / 8); 358 ring_size = (1 << (rb_bufsz + 1)) * 4; 359 ring->ring_size = ring_size; 360 ring->align_mask = align_mask; 361 ring->nop = nop; 362 ring->type = ring_type; 363 364 /* Allocate ring buffer */ 365 if (ring->ring_obj == NULL) { 366 r = amdgpu_bo_create(adev, ring->ring_size, PAGE_SIZE, true, 367 AMDGPU_GEM_DOMAIN_GTT, 0, 368 NULL, NULL, &ring->ring_obj); 369 if (r) { 370 dev_err(adev->dev, "(%d) ring create failed\n", r); 371 return r; 372 } 373 r = amdgpu_bo_reserve(ring->ring_obj, false); 374 if (unlikely(r != 0)) 375 return r; 376 r = amdgpu_bo_pin(ring->ring_obj, AMDGPU_GEM_DOMAIN_GTT, 377 &ring->gpu_addr); 378 if (r) { 379 amdgpu_bo_unreserve(ring->ring_obj); 380 dev_err(adev->dev, "(%d) ring pin failed\n", r); 381 return r; 382 } 383 r = amdgpu_bo_kmap(ring->ring_obj, 384 (void **)&ring->ring); 385 amdgpu_bo_unreserve(ring->ring_obj); 386 if (r) { 387 dev_err(adev->dev, "(%d) ring map failed\n", r); 388 return r; 389 } 390 } 391 ring->ptr_mask = (ring->ring_size / 4) - 1; 392 ring->ring_free_dw = ring->ring_size / 4; 393 394 if (amdgpu_debugfs_ring_init(adev, ring)) { 395 DRM_ERROR("Failed to register debugfs file for rings !\n"); 396 } 397 return 0; 398 } 399 400 /** 401 * amdgpu_ring_fini - tear down the driver ring struct. 402 * 403 * @adev: amdgpu_device pointer 404 * @ring: amdgpu_ring structure holding ring information 405 * 406 * Tear down the driver information for the selected ring (all asics). 407 */ 408 void amdgpu_ring_fini(struct amdgpu_ring *ring) 409 { 410 int r; 411 struct amdgpu_bo *ring_obj; 412 413 if (ring->ring_lock == NULL) 414 return; 415 416 mutex_lock(ring->ring_lock); 417 ring_obj = ring->ring_obj; 418 ring->ready = false; 419 ring->ring = NULL; 420 ring->ring_obj = NULL; 421 mutex_unlock(ring->ring_lock); 422 423 amdgpu_wb_free(ring->adev, ring->fence_offs); 424 amdgpu_wb_free(ring->adev, ring->rptr_offs); 425 amdgpu_wb_free(ring->adev, ring->wptr_offs); 426 amdgpu_wb_free(ring->adev, ring->next_rptr_offs); 427 428 if (ring_obj) { 429 r = amdgpu_bo_reserve(ring_obj, false); 430 if (likely(r == 0)) { 431 amdgpu_bo_kunmap(ring_obj); 432 amdgpu_bo_unpin(ring_obj); 433 amdgpu_bo_unreserve(ring_obj); 434 } 435 amdgpu_bo_unref(&ring_obj); 436 } 437 } 438 439 /** 440 * amdgpu_ring_from_fence - get ring from fence 441 * 442 * @f: fence structure 443 * 444 * Extract the ring a fence belongs to. Handles both scheduler as 445 * well as hardware fences. 446 */ 447 struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f) 448 { 449 struct amdgpu_fence *a_fence; 450 struct amd_sched_fence *s_fence; 451 452 s_fence = to_amd_sched_fence(f); 453 if (s_fence) 454 return container_of(s_fence->sched, struct amdgpu_ring, sched); 455 456 a_fence = to_amdgpu_fence(f); 457 if (a_fence) 458 return a_fence->ring; 459 460 return NULL; 461 } 462 463 /* 464 * Debugfs info 465 */ 466 #if defined(CONFIG_DEBUG_FS) 467 468 static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data) 469 { 470 struct drm_info_node *node = (struct drm_info_node *) m->private; 471 struct drm_device *dev = node->minor->dev; 472 struct amdgpu_device *adev = dev->dev_private; 473 int roffset = *(int*)node->info_ent->data; 474 struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset); 475 476 uint32_t rptr, wptr, rptr_next; 477 unsigned count, i, j; 478 479 amdgpu_ring_free_size(ring); 480 count = (ring->ring_size / 4) - ring->ring_free_dw; 481 482 wptr = amdgpu_ring_get_wptr(ring); 483 seq_printf(m, "wptr: 0x%08x [%5d]\n", 484 wptr, wptr); 485 486 rptr = amdgpu_ring_get_rptr(ring); 487 seq_printf(m, "rptr: 0x%08x [%5d]\n", 488 rptr, rptr); 489 490 rptr_next = ~0; 491 492 seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n", 493 ring->wptr, ring->wptr); 494 seq_printf(m, "last semaphore signal addr : 0x%016llx\n", 495 ring->last_semaphore_signal_addr); 496 seq_printf(m, "last semaphore wait addr : 0x%016llx\n", 497 ring->last_semaphore_wait_addr); 498 seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); 499 seq_printf(m, "%u dwords in ring\n", count); 500 501 if (!ring->ready) 502 return 0; 503 504 /* print 8 dw before current rptr as often it's the last executed 505 * packet that is the root issue 506 */ 507 i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask; 508 for (j = 0; j <= (count + 32); j++) { 509 seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]); 510 if (rptr == i) 511 seq_puts(m, " *"); 512 if (rptr_next == i) 513 seq_puts(m, " #"); 514 seq_puts(m, "\n"); 515 i = (i + 1) & ring->ptr_mask; 516 } 517 return 0; 518 } 519 520 /* TODO: clean this up !*/ 521 static int amdgpu_gfx_index = offsetof(struct amdgpu_device, gfx.gfx_ring[0]); 522 static int cayman_cp1_index = offsetof(struct amdgpu_device, gfx.compute_ring[0]); 523 static int cayman_cp2_index = offsetof(struct amdgpu_device, gfx.compute_ring[1]); 524 static int amdgpu_dma1_index = offsetof(struct amdgpu_device, sdma.instance[0].ring); 525 static int amdgpu_dma2_index = offsetof(struct amdgpu_device, sdma.instance[1].ring); 526 static int r600_uvd_index = offsetof(struct amdgpu_device, uvd.ring); 527 static int si_vce1_index = offsetof(struct amdgpu_device, vce.ring[0]); 528 static int si_vce2_index = offsetof(struct amdgpu_device, vce.ring[1]); 529 530 static struct drm_info_list amdgpu_debugfs_ring_info_list[] = { 531 {"amdgpu_ring_gfx", amdgpu_debugfs_ring_info, 0, &amdgpu_gfx_index}, 532 {"amdgpu_ring_cp1", amdgpu_debugfs_ring_info, 0, &cayman_cp1_index}, 533 {"amdgpu_ring_cp2", amdgpu_debugfs_ring_info, 0, &cayman_cp2_index}, 534 {"amdgpu_ring_dma1", amdgpu_debugfs_ring_info, 0, &amdgpu_dma1_index}, 535 {"amdgpu_ring_dma2", amdgpu_debugfs_ring_info, 0, &amdgpu_dma2_index}, 536 {"amdgpu_ring_uvd", amdgpu_debugfs_ring_info, 0, &r600_uvd_index}, 537 {"amdgpu_ring_vce1", amdgpu_debugfs_ring_info, 0, &si_vce1_index}, 538 {"amdgpu_ring_vce2", amdgpu_debugfs_ring_info, 0, &si_vce2_index}, 539 }; 540 541 #endif 542 543 static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring) 544 { 545 #if defined(CONFIG_DEBUG_FS) 546 unsigned i; 547 for (i = 0; i < ARRAY_SIZE(amdgpu_debugfs_ring_info_list); ++i) { 548 struct drm_info_list *info = &amdgpu_debugfs_ring_info_list[i]; 549 int roffset = *(int*)amdgpu_debugfs_ring_info_list[i].data; 550 struct amdgpu_ring *other = (void *)(((uint8_t*)adev) + roffset); 551 unsigned r; 552 553 if (other != ring) 554 continue; 555 556 r = amdgpu_debugfs_add_files(adev, info, 1); 557 if (r) 558 return r; 559 } 560 #endif 561 return 0; 562 } 563