1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 * Christian König 28 */ 29 #include <linux/seq_file.h> 30 #include <linux/slab.h> 31 #include <drm/drmP.h> 32 #include <drm/amdgpu_drm.h> 33 #include "amdgpu.h" 34 #include "atom.h" 35 36 /* 37 * Rings 38 * Most engines on the GPU are fed via ring buffers. Ring 39 * buffers are areas of GPU accessible memory that the host 40 * writes commands into and the GPU reads commands out of. 41 * There is a rptr (read pointer) that determines where the 42 * GPU is currently reading, and a wptr (write pointer) 43 * which determines where the host has written. When the 44 * pointers are equal, the ring is idle. When the host 45 * writes commands to the ring buffer, it increments the 46 * wptr. The GPU then starts fetching commands and executes 47 * them until the pointers are equal again. 48 */ 49 static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); 50 51 /** 52 * amdgpu_ring_free_size - update the free size 53 * 54 * @adev: amdgpu_device pointer 55 * @ring: amdgpu_ring structure holding ring information 56 * 57 * Update the free dw slots in the ring buffer (all asics). 58 */ 59 void amdgpu_ring_free_size(struct amdgpu_ring *ring) 60 { 61 uint32_t rptr = amdgpu_ring_get_rptr(ring); 62 63 /* This works because ring_size is a power of 2 */ 64 ring->ring_free_dw = rptr + (ring->ring_size / 4); 65 ring->ring_free_dw -= ring->wptr; 66 ring->ring_free_dw &= ring->ptr_mask; 67 if (!ring->ring_free_dw) { 68 /* this is an empty ring */ 69 ring->ring_free_dw = ring->ring_size / 4; 70 /* update lockup info to avoid false positive */ 71 amdgpu_ring_lockup_update(ring); 72 } 73 } 74 75 /** 76 * amdgpu_ring_alloc - allocate space on the ring buffer 77 * 78 * @adev: amdgpu_device pointer 79 * @ring: amdgpu_ring structure holding ring information 80 * @ndw: number of dwords to allocate in the ring buffer 81 * 82 * Allocate @ndw dwords in the ring buffer (all asics). 83 * Returns 0 on success, error on failure. 84 */ 85 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw) 86 { 87 int r; 88 89 /* make sure we aren't trying to allocate more space than there is on the ring */ 90 if (ndw > (ring->ring_size / 4)) 91 return -ENOMEM; 92 /* Align requested size with padding so unlock_commit can 93 * pad safely */ 94 amdgpu_ring_free_size(ring); 95 ndw = (ndw + ring->align_mask) & ~ring->align_mask; 96 while (ndw > (ring->ring_free_dw - 1)) { 97 amdgpu_ring_free_size(ring); 98 if (ndw < ring->ring_free_dw) { 99 break; 100 } 101 r = amdgpu_fence_wait_next(ring); 102 if (r) 103 return r; 104 } 105 ring->count_dw = ndw; 106 ring->wptr_old = ring->wptr; 107 return 0; 108 } 109 110 /** 111 * amdgpu_ring_lock - lock the ring and allocate space on it 112 * 113 * @adev: amdgpu_device pointer 114 * @ring: amdgpu_ring structure holding ring information 115 * @ndw: number of dwords to allocate in the ring buffer 116 * 117 * Lock the ring and allocate @ndw dwords in the ring buffer 118 * (all asics). 119 * Returns 0 on success, error on failure. 120 */ 121 int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw) 122 { 123 int r; 124 125 mutex_lock(ring->ring_lock); 126 r = amdgpu_ring_alloc(ring, ndw); 127 if (r) { 128 mutex_unlock(ring->ring_lock); 129 return r; 130 } 131 return 0; 132 } 133 134 /** amdgpu_ring_insert_nop - insert NOP packets 135 * 136 * @ring: amdgpu_ring structure holding ring information 137 * @count: the number of NOP packets to insert 138 * 139 * This is the generic insert_nop function for rings except SDMA 140 */ 141 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 142 { 143 int i; 144 145 for (i = 0; i < count; i++) 146 amdgpu_ring_write(ring, ring->nop); 147 } 148 149 /** 150 * amdgpu_ring_commit - tell the GPU to execute the new 151 * commands on the ring buffer 152 * 153 * @adev: amdgpu_device pointer 154 * @ring: amdgpu_ring structure holding ring information 155 * 156 * Update the wptr (write pointer) to tell the GPU to 157 * execute new commands on the ring buffer (all asics). 158 */ 159 void amdgpu_ring_commit(struct amdgpu_ring *ring) 160 { 161 uint32_t count; 162 163 /* We pad to match fetch size */ 164 count = ring->align_mask + 1 - (ring->wptr & ring->align_mask); 165 count %= ring->align_mask + 1; 166 ring->funcs->insert_nop(ring, count); 167 168 mb(); 169 amdgpu_ring_set_wptr(ring); 170 } 171 172 /** 173 * amdgpu_ring_unlock_commit - tell the GPU to execute the new 174 * commands on the ring buffer and unlock it 175 * 176 * @ring: amdgpu_ring structure holding ring information 177 * 178 * Call amdgpu_ring_commit() then unlock the ring (all asics). 179 */ 180 void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring) 181 { 182 amdgpu_ring_commit(ring); 183 mutex_unlock(ring->ring_lock); 184 } 185 186 /** 187 * amdgpu_ring_undo - reset the wptr 188 * 189 * @ring: amdgpu_ring structure holding ring information 190 * 191 * Reset the driver's copy of the wptr (all asics). 192 */ 193 void amdgpu_ring_undo(struct amdgpu_ring *ring) 194 { 195 ring->wptr = ring->wptr_old; 196 } 197 198 /** 199 * amdgpu_ring_unlock_undo - reset the wptr and unlock the ring 200 * 201 * @ring: amdgpu_ring structure holding ring information 202 * 203 * Call amdgpu_ring_undo() then unlock the ring (all asics). 204 */ 205 void amdgpu_ring_unlock_undo(struct amdgpu_ring *ring) 206 { 207 amdgpu_ring_undo(ring); 208 mutex_unlock(ring->ring_lock); 209 } 210 211 /** 212 * amdgpu_ring_lockup_update - update lockup variables 213 * 214 * @ring: amdgpu_ring structure holding ring information 215 * 216 * Update the last rptr value and timestamp (all asics). 217 */ 218 void amdgpu_ring_lockup_update(struct amdgpu_ring *ring) 219 { 220 atomic_set(&ring->last_rptr, amdgpu_ring_get_rptr(ring)); 221 atomic64_set(&ring->last_activity, jiffies_64); 222 } 223 224 /** 225 * amdgpu_ring_test_lockup() - check if ring is lockedup by recording information 226 * @ring: amdgpu_ring structure holding ring information 227 * 228 */ 229 bool amdgpu_ring_test_lockup(struct amdgpu_ring *ring) 230 { 231 uint32_t rptr = amdgpu_ring_get_rptr(ring); 232 uint64_t last = atomic64_read(&ring->last_activity); 233 uint64_t elapsed; 234 235 if (rptr != atomic_read(&ring->last_rptr)) { 236 /* ring is still working, no lockup */ 237 amdgpu_ring_lockup_update(ring); 238 return false; 239 } 240 241 elapsed = jiffies_to_msecs(jiffies_64 - last); 242 if (amdgpu_lockup_timeout && elapsed >= amdgpu_lockup_timeout) { 243 dev_err(ring->adev->dev, "ring %d stalled for more than %llumsec\n", 244 ring->idx, elapsed); 245 return true; 246 } 247 /* give a chance to the GPU ... */ 248 return false; 249 } 250 251 /** 252 * amdgpu_ring_backup - Back up the content of a ring 253 * 254 * @ring: the ring we want to back up 255 * 256 * Saves all unprocessed commits from a ring, returns the number of dwords saved. 257 */ 258 unsigned amdgpu_ring_backup(struct amdgpu_ring *ring, 259 uint32_t **data) 260 { 261 unsigned size, ptr, i; 262 263 /* just in case lock the ring */ 264 mutex_lock(ring->ring_lock); 265 *data = NULL; 266 267 if (ring->ring_obj == NULL) { 268 mutex_unlock(ring->ring_lock); 269 return 0; 270 } 271 272 /* it doesn't make sense to save anything if all fences are signaled */ 273 if (!amdgpu_fence_count_emitted(ring)) { 274 mutex_unlock(ring->ring_lock); 275 return 0; 276 } 277 278 ptr = le32_to_cpu(*ring->next_rptr_cpu_addr); 279 280 size = ring->wptr + (ring->ring_size / 4); 281 size -= ptr; 282 size &= ring->ptr_mask; 283 if (size == 0) { 284 mutex_unlock(ring->ring_lock); 285 return 0; 286 } 287 288 /* and then save the content of the ring */ 289 *data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); 290 if (!*data) { 291 mutex_unlock(ring->ring_lock); 292 return 0; 293 } 294 for (i = 0; i < size; ++i) { 295 (*data)[i] = ring->ring[ptr++]; 296 ptr &= ring->ptr_mask; 297 } 298 299 mutex_unlock(ring->ring_lock); 300 return size; 301 } 302 303 /** 304 * amdgpu_ring_restore - append saved commands to the ring again 305 * 306 * @ring: ring to append commands to 307 * @size: number of dwords we want to write 308 * @data: saved commands 309 * 310 * Allocates space on the ring and restore the previously saved commands. 311 */ 312 int amdgpu_ring_restore(struct amdgpu_ring *ring, 313 unsigned size, uint32_t *data) 314 { 315 int i, r; 316 317 if (!size || !data) 318 return 0; 319 320 /* restore the saved ring content */ 321 r = amdgpu_ring_lock(ring, size); 322 if (r) 323 return r; 324 325 for (i = 0; i < size; ++i) { 326 amdgpu_ring_write(ring, data[i]); 327 } 328 329 amdgpu_ring_unlock_commit(ring); 330 kfree(data); 331 return 0; 332 } 333 334 /** 335 * amdgpu_ring_init - init driver ring struct. 336 * 337 * @adev: amdgpu_device pointer 338 * @ring: amdgpu_ring structure holding ring information 339 * @ring_size: size of the ring 340 * @nop: nop packet for this ring 341 * 342 * Initialize the driver information for the selected ring (all asics). 343 * Returns 0 on success, error on failure. 344 */ 345 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, 346 unsigned ring_size, u32 nop, u32 align_mask, 347 struct amdgpu_irq_src *irq_src, unsigned irq_type, 348 enum amdgpu_ring_type ring_type) 349 { 350 u32 rb_bufsz; 351 int r; 352 353 if (ring->adev == NULL) { 354 if (adev->num_rings >= AMDGPU_MAX_RINGS) 355 return -EINVAL; 356 357 ring->adev = adev; 358 ring->idx = adev->num_rings++; 359 adev->rings[ring->idx] = ring; 360 r = amdgpu_fence_driver_init_ring(ring); 361 if (r) 362 return r; 363 } 364 365 r = amdgpu_wb_get(adev, &ring->rptr_offs); 366 if (r) { 367 dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); 368 return r; 369 } 370 371 r = amdgpu_wb_get(adev, &ring->wptr_offs); 372 if (r) { 373 dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); 374 return r; 375 } 376 377 r = amdgpu_wb_get(adev, &ring->fence_offs); 378 if (r) { 379 dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r); 380 return r; 381 } 382 383 r = amdgpu_wb_get(adev, &ring->next_rptr_offs); 384 if (r) { 385 dev_err(adev->dev, "(%d) ring next_rptr wb alloc failed\n", r); 386 return r; 387 } 388 ring->next_rptr_gpu_addr = adev->wb.gpu_addr + (ring->next_rptr_offs * 4); 389 ring->next_rptr_cpu_addr = &adev->wb.wb[ring->next_rptr_offs]; 390 spin_lock_init(&ring->fence_lock); 391 r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type); 392 if (r) { 393 dev_err(adev->dev, "failed initializing fences (%d).\n", r); 394 return r; 395 } 396 397 ring->ring_lock = &adev->ring_lock; 398 /* Align ring size */ 399 rb_bufsz = order_base_2(ring_size / 8); 400 ring_size = (1 << (rb_bufsz + 1)) * 4; 401 ring->ring_size = ring_size; 402 ring->align_mask = align_mask; 403 ring->nop = nop; 404 ring->type = ring_type; 405 406 /* Allocate ring buffer */ 407 if (ring->ring_obj == NULL) { 408 r = amdgpu_bo_create(adev, ring->ring_size, PAGE_SIZE, true, 409 AMDGPU_GEM_DOMAIN_GTT, 0, 410 NULL, NULL, &ring->ring_obj); 411 if (r) { 412 dev_err(adev->dev, "(%d) ring create failed\n", r); 413 return r; 414 } 415 r = amdgpu_bo_reserve(ring->ring_obj, false); 416 if (unlikely(r != 0)) 417 return r; 418 r = amdgpu_bo_pin(ring->ring_obj, AMDGPU_GEM_DOMAIN_GTT, 419 &ring->gpu_addr); 420 if (r) { 421 amdgpu_bo_unreserve(ring->ring_obj); 422 dev_err(adev->dev, "(%d) ring pin failed\n", r); 423 return r; 424 } 425 r = amdgpu_bo_kmap(ring->ring_obj, 426 (void **)&ring->ring); 427 amdgpu_bo_unreserve(ring->ring_obj); 428 if (r) { 429 dev_err(adev->dev, "(%d) ring map failed\n", r); 430 return r; 431 } 432 } 433 ring->ptr_mask = (ring->ring_size / 4) - 1; 434 ring->ring_free_dw = ring->ring_size / 4; 435 436 if (amdgpu_debugfs_ring_init(adev, ring)) { 437 DRM_ERROR("Failed to register debugfs file for rings !\n"); 438 } 439 amdgpu_ring_lockup_update(ring); 440 return 0; 441 } 442 443 /** 444 * amdgpu_ring_fini - tear down the driver ring struct. 445 * 446 * @adev: amdgpu_device pointer 447 * @ring: amdgpu_ring structure holding ring information 448 * 449 * Tear down the driver information for the selected ring (all asics). 450 */ 451 void amdgpu_ring_fini(struct amdgpu_ring *ring) 452 { 453 int r; 454 struct amdgpu_bo *ring_obj; 455 456 if (ring->ring_lock == NULL) 457 return; 458 459 mutex_lock(ring->ring_lock); 460 ring_obj = ring->ring_obj; 461 ring->ready = false; 462 ring->ring = NULL; 463 ring->ring_obj = NULL; 464 mutex_unlock(ring->ring_lock); 465 466 amdgpu_wb_free(ring->adev, ring->fence_offs); 467 amdgpu_wb_free(ring->adev, ring->rptr_offs); 468 amdgpu_wb_free(ring->adev, ring->wptr_offs); 469 amdgpu_wb_free(ring->adev, ring->next_rptr_offs); 470 471 if (ring_obj) { 472 r = amdgpu_bo_reserve(ring_obj, false); 473 if (likely(r == 0)) { 474 amdgpu_bo_kunmap(ring_obj); 475 amdgpu_bo_unpin(ring_obj); 476 amdgpu_bo_unreserve(ring_obj); 477 } 478 amdgpu_bo_unref(&ring_obj); 479 } 480 } 481 482 /* 483 * Debugfs info 484 */ 485 #if defined(CONFIG_DEBUG_FS) 486 487 static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data) 488 { 489 struct drm_info_node *node = (struct drm_info_node *) m->private; 490 struct drm_device *dev = node->minor->dev; 491 struct amdgpu_device *adev = dev->dev_private; 492 int roffset = *(int*)node->info_ent->data; 493 struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset); 494 495 uint32_t rptr, wptr, rptr_next; 496 unsigned count, i, j; 497 498 amdgpu_ring_free_size(ring); 499 count = (ring->ring_size / 4) - ring->ring_free_dw; 500 501 wptr = amdgpu_ring_get_wptr(ring); 502 seq_printf(m, "wptr: 0x%08x [%5d]\n", 503 wptr, wptr); 504 505 rptr = amdgpu_ring_get_rptr(ring); 506 seq_printf(m, "rptr: 0x%08x [%5d]\n", 507 rptr, rptr); 508 509 rptr_next = ~0; 510 511 seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n", 512 ring->wptr, ring->wptr); 513 seq_printf(m, "last semaphore signal addr : 0x%016llx\n", 514 ring->last_semaphore_signal_addr); 515 seq_printf(m, "last semaphore wait addr : 0x%016llx\n", 516 ring->last_semaphore_wait_addr); 517 seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); 518 seq_printf(m, "%u dwords in ring\n", count); 519 520 if (!ring->ready) 521 return 0; 522 523 /* print 8 dw before current rptr as often it's the last executed 524 * packet that is the root issue 525 */ 526 i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask; 527 for (j = 0; j <= (count + 32); j++) { 528 seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]); 529 if (rptr == i) 530 seq_puts(m, " *"); 531 if (rptr_next == i) 532 seq_puts(m, " #"); 533 seq_puts(m, "\n"); 534 i = (i + 1) & ring->ptr_mask; 535 } 536 return 0; 537 } 538 539 /* TODO: clean this up !*/ 540 static int amdgpu_gfx_index = offsetof(struct amdgpu_device, gfx.gfx_ring[0]); 541 static int cayman_cp1_index = offsetof(struct amdgpu_device, gfx.compute_ring[0]); 542 static int cayman_cp2_index = offsetof(struct amdgpu_device, gfx.compute_ring[1]); 543 static int amdgpu_dma1_index = offsetof(struct amdgpu_device, sdma[0].ring); 544 static int amdgpu_dma2_index = offsetof(struct amdgpu_device, sdma[1].ring); 545 static int r600_uvd_index = offsetof(struct amdgpu_device, uvd.ring); 546 static int si_vce1_index = offsetof(struct amdgpu_device, vce.ring[0]); 547 static int si_vce2_index = offsetof(struct amdgpu_device, vce.ring[1]); 548 549 static struct drm_info_list amdgpu_debugfs_ring_info_list[] = { 550 {"amdgpu_ring_gfx", amdgpu_debugfs_ring_info, 0, &amdgpu_gfx_index}, 551 {"amdgpu_ring_cp1", amdgpu_debugfs_ring_info, 0, &cayman_cp1_index}, 552 {"amdgpu_ring_cp2", amdgpu_debugfs_ring_info, 0, &cayman_cp2_index}, 553 {"amdgpu_ring_dma1", amdgpu_debugfs_ring_info, 0, &amdgpu_dma1_index}, 554 {"amdgpu_ring_dma2", amdgpu_debugfs_ring_info, 0, &amdgpu_dma2_index}, 555 {"amdgpu_ring_uvd", amdgpu_debugfs_ring_info, 0, &r600_uvd_index}, 556 {"amdgpu_ring_vce1", amdgpu_debugfs_ring_info, 0, &si_vce1_index}, 557 {"amdgpu_ring_vce2", amdgpu_debugfs_ring_info, 0, &si_vce2_index}, 558 }; 559 560 #endif 561 562 static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring) 563 { 564 #if defined(CONFIG_DEBUG_FS) 565 unsigned i; 566 for (i = 0; i < ARRAY_SIZE(amdgpu_debugfs_ring_info_list); ++i) { 567 struct drm_info_list *info = &amdgpu_debugfs_ring_info_list[i]; 568 int roffset = *(int*)amdgpu_debugfs_ring_info_list[i].data; 569 struct amdgpu_ring *other = (void *)(((uint8_t*)adev) + roffset); 570 unsigned r; 571 572 if (other != ring) 573 continue; 574 575 r = amdgpu_debugfs_add_files(adev, info, 1); 576 if (r) 577 return r; 578 } 579 #endif 580 return 0; 581 } 582