1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/power_supply.h> 29 #include <linux/kthread.h> 30 #include <linux/module.h> 31 #include <linux/console.h> 32 #include <linux/slab.h> 33 34 #include <drm/drm_atomic_helper.h> 35 #include <drm/drm_probe_helper.h> 36 #include <drm/amdgpu_drm.h> 37 #include <linux/vgaarb.h> 38 #include <linux/vga_switcheroo.h> 39 #include <linux/efi.h> 40 #include "amdgpu.h" 41 #include "amdgpu_trace.h" 42 #include "amdgpu_i2c.h" 43 #include "atom.h" 44 #include "amdgpu_atombios.h" 45 #include "amdgpu_atomfirmware.h" 46 #include "amd_pcie.h" 47 #ifdef CONFIG_DRM_AMDGPU_SI 48 #include "si.h" 49 #endif 50 #ifdef CONFIG_DRM_AMDGPU_CIK 51 #include "cik.h" 52 #endif 53 #include "vi.h" 54 #include "soc15.h" 55 #include "nv.h" 56 #include "bif/bif_4_1_d.h" 57 #include <linux/pci.h> 58 #include <linux/firmware.h> 59 #include "amdgpu_vf_error.h" 60 61 #include "amdgpu_amdkfd.h" 62 #include "amdgpu_pm.h" 63 64 #include "amdgpu_xgmi.h" 65 #include "amdgpu_ras.h" 66 #include "amdgpu_pmu.h" 67 #include "amdgpu_fru_eeprom.h" 68 #include "amdgpu_reset.h" 69 70 #include <linux/suspend.h> 71 #include <drm/task_barrier.h> 72 #include <linux/pm_runtime.h> 73 74 #include <drm/drm_drv.h> 75 76 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); 77 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); 78 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); 79 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); 80 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); 81 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); 82 MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin"); 83 MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); 84 MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); 85 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); 86 MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin"); 87 MODULE_FIRMWARE("amdgpu/yellow_carp_gpu_info.bin"); 88 89 #define AMDGPU_RESUME_MS 2000 90 91 const char *amdgpu_asic_name[] = { 92 "TAHITI", 93 "PITCAIRN", 94 "VERDE", 95 "OLAND", 96 "HAINAN", 97 "BONAIRE", 98 "KAVERI", 99 "KABINI", 100 "HAWAII", 101 "MULLINS", 102 "TOPAZ", 103 "TONGA", 104 "FIJI", 105 "CARRIZO", 106 "STONEY", 107 "POLARIS10", 108 "POLARIS11", 109 "POLARIS12", 110 "VEGAM", 111 "VEGA10", 112 "VEGA12", 113 "VEGA20", 114 "RAVEN", 115 "ARCTURUS", 116 "RENOIR", 117 "ALDEBARAN", 118 "NAVI10", 119 "CYAN_SKILLFISH", 120 "NAVI14", 121 "NAVI12", 122 "SIENNA_CICHLID", 123 "NAVY_FLOUNDER", 124 "VANGOGH", 125 "DIMGREY_CAVEFISH", 126 "BEIGE_GOBY", 127 "YELLOW_CARP", 128 "LAST", 129 }; 130 131 /** 132 * DOC: pcie_replay_count 133 * 134 * The amdgpu driver provides a sysfs API for reporting the total number 135 * of PCIe replays (NAKs) 136 * The file pcie_replay_count is used for this and returns the total 137 * number of replays as a sum of the NAKs generated and NAKs received 138 */ 139 140 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, 141 struct device_attribute *attr, char *buf) 142 { 143 struct drm_device *ddev = dev_get_drvdata(dev); 144 struct amdgpu_device *adev = drm_to_adev(ddev); 145 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev); 146 147 return sysfs_emit(buf, "%llu\n", cnt); 148 } 149 150 static DEVICE_ATTR(pcie_replay_count, S_IRUGO, 151 amdgpu_device_get_pcie_replay_count, NULL); 152 153 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); 154 155 /** 156 * DOC: product_name 157 * 158 * The amdgpu driver provides a sysfs API for reporting the product name 159 * for the device 160 * The file serial_number is used for this and returns the product name 161 * as returned from the FRU. 162 * NOTE: This is only available for certain server cards 163 */ 164 165 static ssize_t amdgpu_device_get_product_name(struct device *dev, 166 struct device_attribute *attr, char *buf) 167 { 168 struct drm_device *ddev = dev_get_drvdata(dev); 169 struct amdgpu_device *adev = drm_to_adev(ddev); 170 171 return sysfs_emit(buf, "%s\n", adev->product_name); 172 } 173 174 static DEVICE_ATTR(product_name, S_IRUGO, 175 amdgpu_device_get_product_name, NULL); 176 177 /** 178 * DOC: product_number 179 * 180 * The amdgpu driver provides a sysfs API for reporting the part number 181 * for the device 182 * The file serial_number is used for this and returns the part number 183 * as returned from the FRU. 184 * NOTE: This is only available for certain server cards 185 */ 186 187 static ssize_t amdgpu_device_get_product_number(struct device *dev, 188 struct device_attribute *attr, char *buf) 189 { 190 struct drm_device *ddev = dev_get_drvdata(dev); 191 struct amdgpu_device *adev = drm_to_adev(ddev); 192 193 return sysfs_emit(buf, "%s\n", adev->product_number); 194 } 195 196 static DEVICE_ATTR(product_number, S_IRUGO, 197 amdgpu_device_get_product_number, NULL); 198 199 /** 200 * DOC: serial_number 201 * 202 * The amdgpu driver provides a sysfs API for reporting the serial number 203 * for the device 204 * The file serial_number is used for this and returns the serial number 205 * as returned from the FRU. 206 * NOTE: This is only available for certain server cards 207 */ 208 209 static ssize_t amdgpu_device_get_serial_number(struct device *dev, 210 struct device_attribute *attr, char *buf) 211 { 212 struct drm_device *ddev = dev_get_drvdata(dev); 213 struct amdgpu_device *adev = drm_to_adev(ddev); 214 215 return sysfs_emit(buf, "%s\n", adev->serial); 216 } 217 218 static DEVICE_ATTR(serial_number, S_IRUGO, 219 amdgpu_device_get_serial_number, NULL); 220 221 /** 222 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control 223 * 224 * @dev: drm_device pointer 225 * 226 * Returns true if the device is a dGPU with ATPX power control, 227 * otherwise return false. 228 */ 229 bool amdgpu_device_supports_px(struct drm_device *dev) 230 { 231 struct amdgpu_device *adev = drm_to_adev(dev); 232 233 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid()) 234 return true; 235 return false; 236 } 237 238 /** 239 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources 240 * 241 * @dev: drm_device pointer 242 * 243 * Returns true if the device is a dGPU with ACPI power control, 244 * otherwise return false. 245 */ 246 bool amdgpu_device_supports_boco(struct drm_device *dev) 247 { 248 struct amdgpu_device *adev = drm_to_adev(dev); 249 250 if (adev->has_pr3 || 251 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid())) 252 return true; 253 return false; 254 } 255 256 /** 257 * amdgpu_device_supports_baco - Does the device support BACO 258 * 259 * @dev: drm_device pointer 260 * 261 * Returns true if the device supporte BACO, 262 * otherwise return false. 263 */ 264 bool amdgpu_device_supports_baco(struct drm_device *dev) 265 { 266 struct amdgpu_device *adev = drm_to_adev(dev); 267 268 return amdgpu_asic_supports_baco(adev); 269 } 270 271 /** 272 * amdgpu_device_supports_smart_shift - Is the device dGPU with 273 * smart shift support 274 * 275 * @dev: drm_device pointer 276 * 277 * Returns true if the device is a dGPU with Smart Shift support, 278 * otherwise returns false. 279 */ 280 bool amdgpu_device_supports_smart_shift(struct drm_device *dev) 281 { 282 return (amdgpu_device_supports_boco(dev) && 283 amdgpu_acpi_is_power_shift_control_supported()); 284 } 285 286 /* 287 * VRAM access helper functions 288 */ 289 290 /** 291 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA 292 * 293 * @adev: amdgpu_device pointer 294 * @pos: offset of the buffer in vram 295 * @buf: virtual address of the buffer in system memory 296 * @size: read/write size, sizeof(@buf) must > @size 297 * @write: true - write to vram, otherwise - read from vram 298 */ 299 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos, 300 void *buf, size_t size, bool write) 301 { 302 unsigned long flags; 303 uint32_t hi = ~0, tmp = 0; 304 uint32_t *data = buf; 305 uint64_t last; 306 int idx; 307 308 if (!drm_dev_enter(&adev->ddev, &idx)) 309 return; 310 311 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4)); 312 313 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 314 for (last = pos + size; pos < last; pos += 4) { 315 tmp = pos >> 31; 316 317 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); 318 if (tmp != hi) { 319 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp); 320 hi = tmp; 321 } 322 if (write) 323 WREG32_NO_KIQ(mmMM_DATA, *data++); 324 else 325 *data++ = RREG32_NO_KIQ(mmMM_DATA); 326 } 327 328 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 329 drm_dev_exit(idx); 330 } 331 332 /** 333 * amdgpu_device_vram_access - access vram by vram aperature 334 * 335 * @adev: amdgpu_device pointer 336 * @pos: offset of the buffer in vram 337 * @buf: virtual address of the buffer in system memory 338 * @size: read/write size, sizeof(@buf) must > @size 339 * @write: true - write to vram, otherwise - read from vram 340 * 341 * The return value means how many bytes have been transferred. 342 */ 343 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos, 344 void *buf, size_t size, bool write) 345 { 346 #ifdef CONFIG_64BIT 347 void __iomem *addr; 348 size_t count = 0; 349 uint64_t last; 350 351 if (!adev->mman.aper_base_kaddr) 352 return 0; 353 354 last = min(pos + size, adev->gmc.visible_vram_size); 355 if (last > pos) { 356 addr = adev->mman.aper_base_kaddr + pos; 357 count = last - pos; 358 359 if (write) { 360 memcpy_toio(addr, buf, count); 361 mb(); 362 amdgpu_device_flush_hdp(adev, NULL); 363 } else { 364 amdgpu_device_invalidate_hdp(adev, NULL); 365 mb(); 366 memcpy_fromio(buf, addr, count); 367 } 368 369 } 370 371 return count; 372 #else 373 return 0; 374 #endif 375 } 376 377 /** 378 * amdgpu_device_vram_access - read/write a buffer in vram 379 * 380 * @adev: amdgpu_device pointer 381 * @pos: offset of the buffer in vram 382 * @buf: virtual address of the buffer in system memory 383 * @size: read/write size, sizeof(@buf) must > @size 384 * @write: true - write to vram, otherwise - read from vram 385 */ 386 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, 387 void *buf, size_t size, bool write) 388 { 389 size_t count; 390 391 /* try to using vram apreature to access vram first */ 392 count = amdgpu_device_aper_access(adev, pos, buf, size, write); 393 size -= count; 394 if (size) { 395 /* using MM to access rest vram */ 396 pos += count; 397 buf += count; 398 amdgpu_device_mm_access(adev, pos, buf, size, write); 399 } 400 } 401 402 /* 403 * register access helper functions. 404 */ 405 406 /* Check if hw access should be skipped because of hotplug or device error */ 407 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev) 408 { 409 if (adev->no_hw_access) 410 return true; 411 412 #ifdef CONFIG_LOCKDEP 413 /* 414 * This is a bit complicated to understand, so worth a comment. What we assert 415 * here is that the GPU reset is not running on another thread in parallel. 416 * 417 * For this we trylock the read side of the reset semaphore, if that succeeds 418 * we know that the reset is not running in paralell. 419 * 420 * If the trylock fails we assert that we are either already holding the read 421 * side of the lock or are the reset thread itself and hold the write side of 422 * the lock. 423 */ 424 if (in_task()) { 425 if (down_read_trylock(&adev->reset_sem)) 426 up_read(&adev->reset_sem); 427 else 428 lockdep_assert_held(&adev->reset_sem); 429 } 430 #endif 431 return false; 432 } 433 434 /** 435 * amdgpu_device_rreg - read a memory mapped IO or indirect register 436 * 437 * @adev: amdgpu_device pointer 438 * @reg: dword aligned register offset 439 * @acc_flags: access flags which require special behavior 440 * 441 * Returns the 32 bit value from the offset specified. 442 */ 443 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, 444 uint32_t reg, uint32_t acc_flags) 445 { 446 uint32_t ret; 447 448 if (amdgpu_device_skip_hw_access(adev)) 449 return 0; 450 451 if ((reg * 4) < adev->rmmio_size) { 452 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && 453 amdgpu_sriov_runtime(adev) && 454 down_read_trylock(&adev->reset_sem)) { 455 ret = amdgpu_kiq_rreg(adev, reg); 456 up_read(&adev->reset_sem); 457 } else { 458 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); 459 } 460 } else { 461 ret = adev->pcie_rreg(adev, reg * 4); 462 } 463 464 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret); 465 466 return ret; 467 } 468 469 /* 470 * MMIO register read with bytes helper functions 471 * @offset:bytes offset from MMIO start 472 * 473 */ 474 475 /** 476 * amdgpu_mm_rreg8 - read a memory mapped IO register 477 * 478 * @adev: amdgpu_device pointer 479 * @offset: byte aligned register offset 480 * 481 * Returns the 8 bit value from the offset specified. 482 */ 483 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) 484 { 485 if (amdgpu_device_skip_hw_access(adev)) 486 return 0; 487 488 if (offset < adev->rmmio_size) 489 return (readb(adev->rmmio + offset)); 490 BUG(); 491 } 492 493 /* 494 * MMIO register write with bytes helper functions 495 * @offset:bytes offset from MMIO start 496 * @value: the value want to be written to the register 497 * 498 */ 499 /** 500 * amdgpu_mm_wreg8 - read a memory mapped IO register 501 * 502 * @adev: amdgpu_device pointer 503 * @offset: byte aligned register offset 504 * @value: 8 bit value to write 505 * 506 * Writes the value specified to the offset specified. 507 */ 508 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) 509 { 510 if (amdgpu_device_skip_hw_access(adev)) 511 return; 512 513 if (offset < adev->rmmio_size) 514 writeb(value, adev->rmmio + offset); 515 else 516 BUG(); 517 } 518 519 /** 520 * amdgpu_device_wreg - write to a memory mapped IO or indirect register 521 * 522 * @adev: amdgpu_device pointer 523 * @reg: dword aligned register offset 524 * @v: 32 bit value to write to the register 525 * @acc_flags: access flags which require special behavior 526 * 527 * Writes the value specified to the offset specified. 528 */ 529 void amdgpu_device_wreg(struct amdgpu_device *adev, 530 uint32_t reg, uint32_t v, 531 uint32_t acc_flags) 532 { 533 if (amdgpu_device_skip_hw_access(adev)) 534 return; 535 536 if ((reg * 4) < adev->rmmio_size) { 537 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && 538 amdgpu_sriov_runtime(adev) && 539 down_read_trylock(&adev->reset_sem)) { 540 amdgpu_kiq_wreg(adev, reg, v); 541 up_read(&adev->reset_sem); 542 } else { 543 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); 544 } 545 } else { 546 adev->pcie_wreg(adev, reg * 4, v); 547 } 548 549 trace_amdgpu_device_wreg(adev->pdev->device, reg, v); 550 } 551 552 /* 553 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range 554 * 555 * this function is invoked only the debugfs register access 556 * */ 557 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, 558 uint32_t reg, uint32_t v) 559 { 560 if (amdgpu_device_skip_hw_access(adev)) 561 return; 562 563 if (amdgpu_sriov_fullaccess(adev) && 564 adev->gfx.rlc.funcs && 565 adev->gfx.rlc.funcs->is_rlcg_access_range) { 566 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) 567 return adev->gfx.rlc.funcs->sriov_wreg(adev, reg, v, 0, 0); 568 } else { 569 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); 570 } 571 } 572 573 /** 574 * amdgpu_mm_rdoorbell - read a doorbell dword 575 * 576 * @adev: amdgpu_device pointer 577 * @index: doorbell index 578 * 579 * Returns the value in the doorbell aperture at the 580 * requested doorbell index (CIK). 581 */ 582 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) 583 { 584 if (amdgpu_device_skip_hw_access(adev)) 585 return 0; 586 587 if (index < adev->doorbell.num_doorbells) { 588 return readl(adev->doorbell.ptr + index); 589 } else { 590 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 591 return 0; 592 } 593 } 594 595 /** 596 * amdgpu_mm_wdoorbell - write a doorbell dword 597 * 598 * @adev: amdgpu_device pointer 599 * @index: doorbell index 600 * @v: value to write 601 * 602 * Writes @v to the doorbell aperture at the 603 * requested doorbell index (CIK). 604 */ 605 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) 606 { 607 if (amdgpu_device_skip_hw_access(adev)) 608 return; 609 610 if (index < adev->doorbell.num_doorbells) { 611 writel(v, adev->doorbell.ptr + index); 612 } else { 613 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 614 } 615 } 616 617 /** 618 * amdgpu_mm_rdoorbell64 - read a doorbell Qword 619 * 620 * @adev: amdgpu_device pointer 621 * @index: doorbell index 622 * 623 * Returns the value in the doorbell aperture at the 624 * requested doorbell index (VEGA10+). 625 */ 626 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) 627 { 628 if (amdgpu_device_skip_hw_access(adev)) 629 return 0; 630 631 if (index < adev->doorbell.num_doorbells) { 632 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); 633 } else { 634 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 635 return 0; 636 } 637 } 638 639 /** 640 * amdgpu_mm_wdoorbell64 - write a doorbell Qword 641 * 642 * @adev: amdgpu_device pointer 643 * @index: doorbell index 644 * @v: value to write 645 * 646 * Writes @v to the doorbell aperture at the 647 * requested doorbell index (VEGA10+). 648 */ 649 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) 650 { 651 if (amdgpu_device_skip_hw_access(adev)) 652 return; 653 654 if (index < adev->doorbell.num_doorbells) { 655 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); 656 } else { 657 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 658 } 659 } 660 661 /** 662 * amdgpu_device_indirect_rreg - read an indirect register 663 * 664 * @adev: amdgpu_device pointer 665 * @pcie_index: mmio register offset 666 * @pcie_data: mmio register offset 667 * @reg_addr: indirect register address to read from 668 * 669 * Returns the value of indirect register @reg_addr 670 */ 671 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev, 672 u32 pcie_index, u32 pcie_data, 673 u32 reg_addr) 674 { 675 unsigned long flags; 676 u32 r; 677 void __iomem *pcie_index_offset; 678 void __iomem *pcie_data_offset; 679 680 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 681 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 682 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 683 684 writel(reg_addr, pcie_index_offset); 685 readl(pcie_index_offset); 686 r = readl(pcie_data_offset); 687 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 688 689 return r; 690 } 691 692 /** 693 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register 694 * 695 * @adev: amdgpu_device pointer 696 * @pcie_index: mmio register offset 697 * @pcie_data: mmio register offset 698 * @reg_addr: indirect register address to read from 699 * 700 * Returns the value of indirect register @reg_addr 701 */ 702 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev, 703 u32 pcie_index, u32 pcie_data, 704 u32 reg_addr) 705 { 706 unsigned long flags; 707 u64 r; 708 void __iomem *pcie_index_offset; 709 void __iomem *pcie_data_offset; 710 711 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 712 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 713 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 714 715 /* read low 32 bits */ 716 writel(reg_addr, pcie_index_offset); 717 readl(pcie_index_offset); 718 r = readl(pcie_data_offset); 719 /* read high 32 bits */ 720 writel(reg_addr + 4, pcie_index_offset); 721 readl(pcie_index_offset); 722 r |= ((u64)readl(pcie_data_offset) << 32); 723 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 724 725 return r; 726 } 727 728 /** 729 * amdgpu_device_indirect_wreg - write an indirect register address 730 * 731 * @adev: amdgpu_device pointer 732 * @pcie_index: mmio register offset 733 * @pcie_data: mmio register offset 734 * @reg_addr: indirect register offset 735 * @reg_data: indirect register data 736 * 737 */ 738 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev, 739 u32 pcie_index, u32 pcie_data, 740 u32 reg_addr, u32 reg_data) 741 { 742 unsigned long flags; 743 void __iomem *pcie_index_offset; 744 void __iomem *pcie_data_offset; 745 746 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 747 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 748 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 749 750 writel(reg_addr, pcie_index_offset); 751 readl(pcie_index_offset); 752 writel(reg_data, pcie_data_offset); 753 readl(pcie_data_offset); 754 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 755 } 756 757 /** 758 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address 759 * 760 * @adev: amdgpu_device pointer 761 * @pcie_index: mmio register offset 762 * @pcie_data: mmio register offset 763 * @reg_addr: indirect register offset 764 * @reg_data: indirect register data 765 * 766 */ 767 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, 768 u32 pcie_index, u32 pcie_data, 769 u32 reg_addr, u64 reg_data) 770 { 771 unsigned long flags; 772 void __iomem *pcie_index_offset; 773 void __iomem *pcie_data_offset; 774 775 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 776 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 777 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 778 779 /* write low 32 bits */ 780 writel(reg_addr, pcie_index_offset); 781 readl(pcie_index_offset); 782 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset); 783 readl(pcie_data_offset); 784 /* write high 32 bits */ 785 writel(reg_addr + 4, pcie_index_offset); 786 readl(pcie_index_offset); 787 writel((u32)(reg_data >> 32), pcie_data_offset); 788 readl(pcie_data_offset); 789 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 790 } 791 792 /** 793 * amdgpu_invalid_rreg - dummy reg read function 794 * 795 * @adev: amdgpu_device pointer 796 * @reg: offset of register 797 * 798 * Dummy register read function. Used for register blocks 799 * that certain asics don't have (all asics). 800 * Returns the value in the register. 801 */ 802 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) 803 { 804 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg); 805 BUG(); 806 return 0; 807 } 808 809 /** 810 * amdgpu_invalid_wreg - dummy reg write function 811 * 812 * @adev: amdgpu_device pointer 813 * @reg: offset of register 814 * @v: value to write to the register 815 * 816 * Dummy register read function. Used for register blocks 817 * that certain asics don't have (all asics). 818 */ 819 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) 820 { 821 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n", 822 reg, v); 823 BUG(); 824 } 825 826 /** 827 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function 828 * 829 * @adev: amdgpu_device pointer 830 * @reg: offset of register 831 * 832 * Dummy register read function. Used for register blocks 833 * that certain asics don't have (all asics). 834 * Returns the value in the register. 835 */ 836 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) 837 { 838 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg); 839 BUG(); 840 return 0; 841 } 842 843 /** 844 * amdgpu_invalid_wreg64 - dummy reg write function 845 * 846 * @adev: amdgpu_device pointer 847 * @reg: offset of register 848 * @v: value to write to the register 849 * 850 * Dummy register read function. Used for register blocks 851 * that certain asics don't have (all asics). 852 */ 853 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) 854 { 855 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", 856 reg, v); 857 BUG(); 858 } 859 860 /** 861 * amdgpu_block_invalid_rreg - dummy reg read function 862 * 863 * @adev: amdgpu_device pointer 864 * @block: offset of instance 865 * @reg: offset of register 866 * 867 * Dummy register read function. Used for register blocks 868 * that certain asics don't have (all asics). 869 * Returns the value in the register. 870 */ 871 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev, 872 uint32_t block, uint32_t reg) 873 { 874 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n", 875 reg, block); 876 BUG(); 877 return 0; 878 } 879 880 /** 881 * amdgpu_block_invalid_wreg - dummy reg write function 882 * 883 * @adev: amdgpu_device pointer 884 * @block: offset of instance 885 * @reg: offset of register 886 * @v: value to write to the register 887 * 888 * Dummy register read function. Used for register blocks 889 * that certain asics don't have (all asics). 890 */ 891 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, 892 uint32_t block, 893 uint32_t reg, uint32_t v) 894 { 895 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n", 896 reg, block, v); 897 BUG(); 898 } 899 900 /** 901 * amdgpu_device_asic_init - Wrapper for atom asic_init 902 * 903 * @adev: amdgpu_device pointer 904 * 905 * Does any asic specific work and then calls atom asic init. 906 */ 907 static int amdgpu_device_asic_init(struct amdgpu_device *adev) 908 { 909 amdgpu_asic_pre_asic_init(adev); 910 911 return amdgpu_atom_asic_init(adev->mode_info.atom_context); 912 } 913 914 /** 915 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page 916 * 917 * @adev: amdgpu_device pointer 918 * 919 * Allocates a scratch page of VRAM for use by various things in the 920 * driver. 921 */ 922 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev) 923 { 924 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, 925 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 926 &adev->vram_scratch.robj, 927 &adev->vram_scratch.gpu_addr, 928 (void **)&adev->vram_scratch.ptr); 929 } 930 931 /** 932 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page 933 * 934 * @adev: amdgpu_device pointer 935 * 936 * Frees the VRAM scratch page. 937 */ 938 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev) 939 { 940 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL); 941 } 942 943 /** 944 * amdgpu_device_program_register_sequence - program an array of registers. 945 * 946 * @adev: amdgpu_device pointer 947 * @registers: pointer to the register array 948 * @array_size: size of the register array 949 * 950 * Programs an array or registers with and and or masks. 951 * This is a helper for setting golden registers. 952 */ 953 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, 954 const u32 *registers, 955 const u32 array_size) 956 { 957 u32 tmp, reg, and_mask, or_mask; 958 int i; 959 960 if (array_size % 3) 961 return; 962 963 for (i = 0; i < array_size; i +=3) { 964 reg = registers[i + 0]; 965 and_mask = registers[i + 1]; 966 or_mask = registers[i + 2]; 967 968 if (and_mask == 0xffffffff) { 969 tmp = or_mask; 970 } else { 971 tmp = RREG32(reg); 972 tmp &= ~and_mask; 973 if (adev->family >= AMDGPU_FAMILY_AI) 974 tmp |= (or_mask & and_mask); 975 else 976 tmp |= or_mask; 977 } 978 WREG32(reg, tmp); 979 } 980 } 981 982 /** 983 * amdgpu_device_pci_config_reset - reset the GPU 984 * 985 * @adev: amdgpu_device pointer 986 * 987 * Resets the GPU using the pci config reset sequence. 988 * Only applicable to asics prior to vega10. 989 */ 990 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) 991 { 992 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA); 993 } 994 995 /** 996 * amdgpu_device_pci_reset - reset the GPU using generic PCI means 997 * 998 * @adev: amdgpu_device pointer 999 * 1000 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.). 1001 */ 1002 int amdgpu_device_pci_reset(struct amdgpu_device *adev) 1003 { 1004 return pci_reset_function(adev->pdev); 1005 } 1006 1007 /* 1008 * GPU doorbell aperture helpers function. 1009 */ 1010 /** 1011 * amdgpu_device_doorbell_init - Init doorbell driver information. 1012 * 1013 * @adev: amdgpu_device pointer 1014 * 1015 * Init doorbell driver information (CIK) 1016 * Returns 0 on success, error on failure. 1017 */ 1018 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) 1019 { 1020 1021 /* No doorbell on SI hardware generation */ 1022 if (adev->asic_type < CHIP_BONAIRE) { 1023 adev->doorbell.base = 0; 1024 adev->doorbell.size = 0; 1025 adev->doorbell.num_doorbells = 0; 1026 adev->doorbell.ptr = NULL; 1027 return 0; 1028 } 1029 1030 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) 1031 return -EINVAL; 1032 1033 amdgpu_asic_init_doorbell_index(adev); 1034 1035 /* doorbell bar mapping */ 1036 adev->doorbell.base = pci_resource_start(adev->pdev, 2); 1037 adev->doorbell.size = pci_resource_len(adev->pdev, 2); 1038 1039 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), 1040 adev->doorbell_index.max_assignment+1); 1041 if (adev->doorbell.num_doorbells == 0) 1042 return -EINVAL; 1043 1044 /* For Vega, reserve and map two pages on doorbell BAR since SDMA 1045 * paging queue doorbell use the second page. The 1046 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the 1047 * doorbells are in the first page. So with paging queue enabled, 1048 * the max num_doorbells should + 1 page (0x400 in dword) 1049 */ 1050 if (adev->asic_type >= CHIP_VEGA10) 1051 adev->doorbell.num_doorbells += 0x400; 1052 1053 adev->doorbell.ptr = ioremap(adev->doorbell.base, 1054 adev->doorbell.num_doorbells * 1055 sizeof(u32)); 1056 if (adev->doorbell.ptr == NULL) 1057 return -ENOMEM; 1058 1059 return 0; 1060 } 1061 1062 /** 1063 * amdgpu_device_doorbell_fini - Tear down doorbell driver information. 1064 * 1065 * @adev: amdgpu_device pointer 1066 * 1067 * Tear down doorbell driver information (CIK) 1068 */ 1069 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev) 1070 { 1071 iounmap(adev->doorbell.ptr); 1072 adev->doorbell.ptr = NULL; 1073 } 1074 1075 1076 1077 /* 1078 * amdgpu_device_wb_*() 1079 * Writeback is the method by which the GPU updates special pages in memory 1080 * with the status of certain GPU events (fences, ring pointers,etc.). 1081 */ 1082 1083 /** 1084 * amdgpu_device_wb_fini - Disable Writeback and free memory 1085 * 1086 * @adev: amdgpu_device pointer 1087 * 1088 * Disables Writeback and frees the Writeback memory (all asics). 1089 * Used at driver shutdown. 1090 */ 1091 static void amdgpu_device_wb_fini(struct amdgpu_device *adev) 1092 { 1093 if (adev->wb.wb_obj) { 1094 amdgpu_bo_free_kernel(&adev->wb.wb_obj, 1095 &adev->wb.gpu_addr, 1096 (void **)&adev->wb.wb); 1097 adev->wb.wb_obj = NULL; 1098 } 1099 } 1100 1101 /** 1102 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory 1103 * 1104 * @adev: amdgpu_device pointer 1105 * 1106 * Initializes writeback and allocates writeback memory (all asics). 1107 * Used at driver startup. 1108 * Returns 0 on success or an -error on failure. 1109 */ 1110 static int amdgpu_device_wb_init(struct amdgpu_device *adev) 1111 { 1112 int r; 1113 1114 if (adev->wb.wb_obj == NULL) { 1115 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */ 1116 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8, 1117 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1118 &adev->wb.wb_obj, &adev->wb.gpu_addr, 1119 (void **)&adev->wb.wb); 1120 if (r) { 1121 dev_warn(adev->dev, "(%d) create WB bo failed\n", r); 1122 return r; 1123 } 1124 1125 adev->wb.num_wb = AMDGPU_MAX_WB; 1126 memset(&adev->wb.used, 0, sizeof(adev->wb.used)); 1127 1128 /* clear wb memory */ 1129 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8); 1130 } 1131 1132 return 0; 1133 } 1134 1135 /** 1136 * amdgpu_device_wb_get - Allocate a wb entry 1137 * 1138 * @adev: amdgpu_device pointer 1139 * @wb: wb index 1140 * 1141 * Allocate a wb slot for use by the driver (all asics). 1142 * Returns 0 on success or -EINVAL on failure. 1143 */ 1144 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) 1145 { 1146 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); 1147 1148 if (offset < adev->wb.num_wb) { 1149 __set_bit(offset, adev->wb.used); 1150 *wb = offset << 3; /* convert to dw offset */ 1151 return 0; 1152 } else { 1153 return -EINVAL; 1154 } 1155 } 1156 1157 /** 1158 * amdgpu_device_wb_free - Free a wb entry 1159 * 1160 * @adev: amdgpu_device pointer 1161 * @wb: wb index 1162 * 1163 * Free a wb slot allocated for use by the driver (all asics) 1164 */ 1165 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) 1166 { 1167 wb >>= 3; 1168 if (wb < adev->wb.num_wb) 1169 __clear_bit(wb, adev->wb.used); 1170 } 1171 1172 /** 1173 * amdgpu_device_resize_fb_bar - try to resize FB BAR 1174 * 1175 * @adev: amdgpu_device pointer 1176 * 1177 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not 1178 * to fail, but if any of the BARs is not accessible after the size we abort 1179 * driver loading by returning -ENODEV. 1180 */ 1181 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) 1182 { 1183 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size); 1184 struct pci_bus *root; 1185 struct resource *res; 1186 unsigned i; 1187 u16 cmd; 1188 int r; 1189 1190 /* Bypass for VF */ 1191 if (amdgpu_sriov_vf(adev)) 1192 return 0; 1193 1194 /* skip if the bios has already enabled large BAR */ 1195 if (adev->gmc.real_vram_size && 1196 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size)) 1197 return 0; 1198 1199 /* Check if the root BUS has 64bit memory resources */ 1200 root = adev->pdev->bus; 1201 while (root->parent) 1202 root = root->parent; 1203 1204 pci_bus_for_each_resource(root, res, i) { 1205 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && 1206 res->start > 0x100000000ull) 1207 break; 1208 } 1209 1210 /* Trying to resize is pointless without a root hub window above 4GB */ 1211 if (!res) 1212 return 0; 1213 1214 /* Limit the BAR size to what is available */ 1215 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1, 1216 rbar_size); 1217 1218 /* Disable memory decoding while we change the BAR addresses and size */ 1219 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd); 1220 pci_write_config_word(adev->pdev, PCI_COMMAND, 1221 cmd & ~PCI_COMMAND_MEMORY); 1222 1223 /* Free the VRAM and doorbell BAR, we most likely need to move both. */ 1224 amdgpu_device_doorbell_fini(adev); 1225 if (adev->asic_type >= CHIP_BONAIRE) 1226 pci_release_resource(adev->pdev, 2); 1227 1228 pci_release_resource(adev->pdev, 0); 1229 1230 r = pci_resize_resource(adev->pdev, 0, rbar_size); 1231 if (r == -ENOSPC) 1232 DRM_INFO("Not enough PCI address space for a large BAR."); 1233 else if (r && r != -ENOTSUPP) 1234 DRM_ERROR("Problem resizing BAR0 (%d).", r); 1235 1236 pci_assign_unassigned_bus_resources(adev->pdev->bus); 1237 1238 /* When the doorbell or fb BAR isn't available we have no chance of 1239 * using the device. 1240 */ 1241 r = amdgpu_device_doorbell_init(adev); 1242 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET)) 1243 return -ENODEV; 1244 1245 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd); 1246 1247 return 0; 1248 } 1249 1250 /* 1251 * GPU helpers function. 1252 */ 1253 /** 1254 * amdgpu_device_need_post - check if the hw need post or not 1255 * 1256 * @adev: amdgpu_device pointer 1257 * 1258 * Check if the asic has been initialized (all asics) at driver startup 1259 * or post is needed if hw reset is performed. 1260 * Returns true if need or false if not. 1261 */ 1262 bool amdgpu_device_need_post(struct amdgpu_device *adev) 1263 { 1264 uint32_t reg; 1265 1266 if (amdgpu_sriov_vf(adev)) 1267 return false; 1268 1269 if (amdgpu_passthrough(adev)) { 1270 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot 1271 * some old smc fw still need driver do vPost otherwise gpu hang, while 1272 * those smc fw version above 22.15 doesn't have this flaw, so we force 1273 * vpost executed for smc version below 22.15 1274 */ 1275 if (adev->asic_type == CHIP_FIJI) { 1276 int err; 1277 uint32_t fw_ver; 1278 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev); 1279 /* force vPost if error occured */ 1280 if (err) 1281 return true; 1282 1283 fw_ver = *((uint32_t *)adev->pm.fw->data + 69); 1284 if (fw_ver < 0x00160e00) 1285 return true; 1286 } 1287 } 1288 1289 /* Don't post if we need to reset whole hive on init */ 1290 if (adev->gmc.xgmi.pending_reset) 1291 return false; 1292 1293 if (adev->has_hw_reset) { 1294 adev->has_hw_reset = false; 1295 return true; 1296 } 1297 1298 /* bios scratch used on CIK+ */ 1299 if (adev->asic_type >= CHIP_BONAIRE) 1300 return amdgpu_atombios_scratch_need_asic_init(adev); 1301 1302 /* check MEM_SIZE for older asics */ 1303 reg = amdgpu_asic_get_config_memsize(adev); 1304 1305 if ((reg != 0) && (reg != 0xffffffff)) 1306 return false; 1307 1308 return true; 1309 } 1310 1311 /* if we get transitioned to only one device, take VGA back */ 1312 /** 1313 * amdgpu_device_vga_set_decode - enable/disable vga decode 1314 * 1315 * @cookie: amdgpu_device pointer 1316 * @state: enable/disable vga decode 1317 * 1318 * Enable/disable vga decode (all asics). 1319 * Returns VGA resource flags. 1320 */ 1321 static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state) 1322 { 1323 struct amdgpu_device *adev = cookie; 1324 amdgpu_asic_set_vga_state(adev, state); 1325 if (state) 1326 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | 1327 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 1328 else 1329 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 1330 } 1331 1332 /** 1333 * amdgpu_device_check_block_size - validate the vm block size 1334 * 1335 * @adev: amdgpu_device pointer 1336 * 1337 * Validates the vm block size specified via module parameter. 1338 * The vm block size defines number of bits in page table versus page directory, 1339 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 1340 * page table and the remaining bits are in the page directory. 1341 */ 1342 static void amdgpu_device_check_block_size(struct amdgpu_device *adev) 1343 { 1344 /* defines number of bits in page table versus page directory, 1345 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 1346 * page table and the remaining bits are in the page directory */ 1347 if (amdgpu_vm_block_size == -1) 1348 return; 1349 1350 if (amdgpu_vm_block_size < 9) { 1351 dev_warn(adev->dev, "VM page table size (%d) too small\n", 1352 amdgpu_vm_block_size); 1353 amdgpu_vm_block_size = -1; 1354 } 1355 } 1356 1357 /** 1358 * amdgpu_device_check_vm_size - validate the vm size 1359 * 1360 * @adev: amdgpu_device pointer 1361 * 1362 * Validates the vm size in GB specified via module parameter. 1363 * The VM size is the size of the GPU virtual memory space in GB. 1364 */ 1365 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) 1366 { 1367 /* no need to check the default value */ 1368 if (amdgpu_vm_size == -1) 1369 return; 1370 1371 if (amdgpu_vm_size < 1) { 1372 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n", 1373 amdgpu_vm_size); 1374 amdgpu_vm_size = -1; 1375 } 1376 } 1377 1378 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) 1379 { 1380 struct sysinfo si; 1381 bool is_os_64 = (sizeof(void *) == 8); 1382 uint64_t total_memory; 1383 uint64_t dram_size_seven_GB = 0x1B8000000; 1384 uint64_t dram_size_three_GB = 0xB8000000; 1385 1386 if (amdgpu_smu_memory_pool_size == 0) 1387 return; 1388 1389 if (!is_os_64) { 1390 DRM_WARN("Not 64-bit OS, feature not supported\n"); 1391 goto def_value; 1392 } 1393 si_meminfo(&si); 1394 total_memory = (uint64_t)si.totalram * si.mem_unit; 1395 1396 if ((amdgpu_smu_memory_pool_size == 1) || 1397 (amdgpu_smu_memory_pool_size == 2)) { 1398 if (total_memory < dram_size_three_GB) 1399 goto def_value1; 1400 } else if ((amdgpu_smu_memory_pool_size == 4) || 1401 (amdgpu_smu_memory_pool_size == 8)) { 1402 if (total_memory < dram_size_seven_GB) 1403 goto def_value1; 1404 } else { 1405 DRM_WARN("Smu memory pool size not supported\n"); 1406 goto def_value; 1407 } 1408 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; 1409 1410 return; 1411 1412 def_value1: 1413 DRM_WARN("No enough system memory\n"); 1414 def_value: 1415 adev->pm.smu_prv_buffer_size = 0; 1416 } 1417 1418 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) 1419 { 1420 if (!(adev->flags & AMD_IS_APU) || 1421 adev->asic_type < CHIP_RAVEN) 1422 return 0; 1423 1424 switch (adev->asic_type) { 1425 case CHIP_RAVEN: 1426 if (adev->pdev->device == 0x15dd) 1427 adev->apu_flags |= AMD_APU_IS_RAVEN; 1428 if (adev->pdev->device == 0x15d8) 1429 adev->apu_flags |= AMD_APU_IS_PICASSO; 1430 break; 1431 case CHIP_RENOIR: 1432 if ((adev->pdev->device == 0x1636) || 1433 (adev->pdev->device == 0x164c)) 1434 adev->apu_flags |= AMD_APU_IS_RENOIR; 1435 else 1436 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE; 1437 break; 1438 case CHIP_VANGOGH: 1439 adev->apu_flags |= AMD_APU_IS_VANGOGH; 1440 break; 1441 case CHIP_YELLOW_CARP: 1442 break; 1443 case CHIP_CYAN_SKILLFISH: 1444 if (adev->pdev->device == 0x13FE) 1445 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2; 1446 break; 1447 default: 1448 return -EINVAL; 1449 } 1450 1451 return 0; 1452 } 1453 1454 /** 1455 * amdgpu_device_check_arguments - validate module params 1456 * 1457 * @adev: amdgpu_device pointer 1458 * 1459 * Validates certain module parameters and updates 1460 * the associated values used by the driver (all asics). 1461 */ 1462 static int amdgpu_device_check_arguments(struct amdgpu_device *adev) 1463 { 1464 if (amdgpu_sched_jobs < 4) { 1465 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", 1466 amdgpu_sched_jobs); 1467 amdgpu_sched_jobs = 4; 1468 } else if (!is_power_of_2(amdgpu_sched_jobs)){ 1469 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n", 1470 amdgpu_sched_jobs); 1471 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); 1472 } 1473 1474 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) { 1475 /* gart size must be greater or equal to 32M */ 1476 dev_warn(adev->dev, "gart size (%d) too small\n", 1477 amdgpu_gart_size); 1478 amdgpu_gart_size = -1; 1479 } 1480 1481 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) { 1482 /* gtt size must be greater or equal to 32M */ 1483 dev_warn(adev->dev, "gtt size (%d) too small\n", 1484 amdgpu_gtt_size); 1485 amdgpu_gtt_size = -1; 1486 } 1487 1488 /* valid range is between 4 and 9 inclusive */ 1489 if (amdgpu_vm_fragment_size != -1 && 1490 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) { 1491 dev_warn(adev->dev, "valid range is between 4 and 9\n"); 1492 amdgpu_vm_fragment_size = -1; 1493 } 1494 1495 if (amdgpu_sched_hw_submission < 2) { 1496 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n", 1497 amdgpu_sched_hw_submission); 1498 amdgpu_sched_hw_submission = 2; 1499 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) { 1500 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n", 1501 amdgpu_sched_hw_submission); 1502 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission); 1503 } 1504 1505 amdgpu_device_check_smu_prv_buffer_size(adev); 1506 1507 amdgpu_device_check_vm_size(adev); 1508 1509 amdgpu_device_check_block_size(adev); 1510 1511 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); 1512 1513 amdgpu_gmc_tmz_set(adev); 1514 1515 amdgpu_gmc_noretry_set(adev); 1516 1517 return 0; 1518 } 1519 1520 /** 1521 * amdgpu_switcheroo_set_state - set switcheroo state 1522 * 1523 * @pdev: pci dev pointer 1524 * @state: vga_switcheroo state 1525 * 1526 * Callback for the switcheroo driver. Suspends or resumes the 1527 * the asics before or after it is powered up using ACPI methods. 1528 */ 1529 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, 1530 enum vga_switcheroo_state state) 1531 { 1532 struct drm_device *dev = pci_get_drvdata(pdev); 1533 int r; 1534 1535 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF) 1536 return; 1537 1538 if (state == VGA_SWITCHEROO_ON) { 1539 pr_info("switched on\n"); 1540 /* don't suspend or resume card normally */ 1541 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 1542 1543 pci_set_power_state(pdev, PCI_D0); 1544 amdgpu_device_load_pci_state(pdev); 1545 r = pci_enable_device(pdev); 1546 if (r) 1547 DRM_WARN("pci_enable_device failed (%d)\n", r); 1548 amdgpu_device_resume(dev, true); 1549 1550 dev->switch_power_state = DRM_SWITCH_POWER_ON; 1551 } else { 1552 pr_info("switched off\n"); 1553 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 1554 amdgpu_device_suspend(dev, true); 1555 amdgpu_device_cache_pci_state(pdev); 1556 /* Shut down the device */ 1557 pci_disable_device(pdev); 1558 pci_set_power_state(pdev, PCI_D3cold); 1559 dev->switch_power_state = DRM_SWITCH_POWER_OFF; 1560 } 1561 } 1562 1563 /** 1564 * amdgpu_switcheroo_can_switch - see if switcheroo state can change 1565 * 1566 * @pdev: pci dev pointer 1567 * 1568 * Callback for the switcheroo driver. Check of the switcheroo 1569 * state can be changed. 1570 * Returns true if the state can be changed, false if not. 1571 */ 1572 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev) 1573 { 1574 struct drm_device *dev = pci_get_drvdata(pdev); 1575 1576 /* 1577 * FIXME: open_count is protected by drm_global_mutex but that would lead to 1578 * locking inversion with the driver load path. And the access here is 1579 * completely racy anyway. So don't bother with locking for now. 1580 */ 1581 return atomic_read(&dev->open_count) == 0; 1582 } 1583 1584 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { 1585 .set_gpu_state = amdgpu_switcheroo_set_state, 1586 .reprobe = NULL, 1587 .can_switch = amdgpu_switcheroo_can_switch, 1588 }; 1589 1590 /** 1591 * amdgpu_device_ip_set_clockgating_state - set the CG state 1592 * 1593 * @dev: amdgpu_device pointer 1594 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1595 * @state: clockgating state (gate or ungate) 1596 * 1597 * Sets the requested clockgating state for all instances of 1598 * the hardware IP specified. 1599 * Returns the error code from the last instance. 1600 */ 1601 int amdgpu_device_ip_set_clockgating_state(void *dev, 1602 enum amd_ip_block_type block_type, 1603 enum amd_clockgating_state state) 1604 { 1605 struct amdgpu_device *adev = dev; 1606 int i, r = 0; 1607 1608 for (i = 0; i < adev->num_ip_blocks; i++) { 1609 if (!adev->ip_blocks[i].status.valid) 1610 continue; 1611 if (adev->ip_blocks[i].version->type != block_type) 1612 continue; 1613 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) 1614 continue; 1615 r = adev->ip_blocks[i].version->funcs->set_clockgating_state( 1616 (void *)adev, state); 1617 if (r) 1618 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", 1619 adev->ip_blocks[i].version->funcs->name, r); 1620 } 1621 return r; 1622 } 1623 1624 /** 1625 * amdgpu_device_ip_set_powergating_state - set the PG state 1626 * 1627 * @dev: amdgpu_device pointer 1628 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1629 * @state: powergating state (gate or ungate) 1630 * 1631 * Sets the requested powergating state for all instances of 1632 * the hardware IP specified. 1633 * Returns the error code from the last instance. 1634 */ 1635 int amdgpu_device_ip_set_powergating_state(void *dev, 1636 enum amd_ip_block_type block_type, 1637 enum amd_powergating_state state) 1638 { 1639 struct amdgpu_device *adev = dev; 1640 int i, r = 0; 1641 1642 for (i = 0; i < adev->num_ip_blocks; i++) { 1643 if (!adev->ip_blocks[i].status.valid) 1644 continue; 1645 if (adev->ip_blocks[i].version->type != block_type) 1646 continue; 1647 if (!adev->ip_blocks[i].version->funcs->set_powergating_state) 1648 continue; 1649 r = adev->ip_blocks[i].version->funcs->set_powergating_state( 1650 (void *)adev, state); 1651 if (r) 1652 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", 1653 adev->ip_blocks[i].version->funcs->name, r); 1654 } 1655 return r; 1656 } 1657 1658 /** 1659 * amdgpu_device_ip_get_clockgating_state - get the CG state 1660 * 1661 * @adev: amdgpu_device pointer 1662 * @flags: clockgating feature flags 1663 * 1664 * Walks the list of IPs on the device and updates the clockgating 1665 * flags for each IP. 1666 * Updates @flags with the feature flags for each hardware IP where 1667 * clockgating is enabled. 1668 */ 1669 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, 1670 u32 *flags) 1671 { 1672 int i; 1673 1674 for (i = 0; i < adev->num_ip_blocks; i++) { 1675 if (!adev->ip_blocks[i].status.valid) 1676 continue; 1677 if (adev->ip_blocks[i].version->funcs->get_clockgating_state) 1678 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags); 1679 } 1680 } 1681 1682 /** 1683 * amdgpu_device_ip_wait_for_idle - wait for idle 1684 * 1685 * @adev: amdgpu_device pointer 1686 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1687 * 1688 * Waits for the request hardware IP to be idle. 1689 * Returns 0 for success or a negative error code on failure. 1690 */ 1691 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, 1692 enum amd_ip_block_type block_type) 1693 { 1694 int i, r; 1695 1696 for (i = 0; i < adev->num_ip_blocks; i++) { 1697 if (!adev->ip_blocks[i].status.valid) 1698 continue; 1699 if (adev->ip_blocks[i].version->type == block_type) { 1700 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev); 1701 if (r) 1702 return r; 1703 break; 1704 } 1705 } 1706 return 0; 1707 1708 } 1709 1710 /** 1711 * amdgpu_device_ip_is_idle - is the hardware IP idle 1712 * 1713 * @adev: amdgpu_device pointer 1714 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1715 * 1716 * Check if the hardware IP is idle or not. 1717 * Returns true if it the IP is idle, false if not. 1718 */ 1719 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, 1720 enum amd_ip_block_type block_type) 1721 { 1722 int i; 1723 1724 for (i = 0; i < adev->num_ip_blocks; i++) { 1725 if (!adev->ip_blocks[i].status.valid) 1726 continue; 1727 if (adev->ip_blocks[i].version->type == block_type) 1728 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev); 1729 } 1730 return true; 1731 1732 } 1733 1734 /** 1735 * amdgpu_device_ip_get_ip_block - get a hw IP pointer 1736 * 1737 * @adev: amdgpu_device pointer 1738 * @type: Type of hardware IP (SMU, GFX, UVD, etc.) 1739 * 1740 * Returns a pointer to the hardware IP block structure 1741 * if it exists for the asic, otherwise NULL. 1742 */ 1743 struct amdgpu_ip_block * 1744 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, 1745 enum amd_ip_block_type type) 1746 { 1747 int i; 1748 1749 for (i = 0; i < adev->num_ip_blocks; i++) 1750 if (adev->ip_blocks[i].version->type == type) 1751 return &adev->ip_blocks[i]; 1752 1753 return NULL; 1754 } 1755 1756 /** 1757 * amdgpu_device_ip_block_version_cmp 1758 * 1759 * @adev: amdgpu_device pointer 1760 * @type: enum amd_ip_block_type 1761 * @major: major version 1762 * @minor: minor version 1763 * 1764 * return 0 if equal or greater 1765 * return 1 if smaller or the ip_block doesn't exist 1766 */ 1767 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, 1768 enum amd_ip_block_type type, 1769 u32 major, u32 minor) 1770 { 1771 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type); 1772 1773 if (ip_block && ((ip_block->version->major > major) || 1774 ((ip_block->version->major == major) && 1775 (ip_block->version->minor >= minor)))) 1776 return 0; 1777 1778 return 1; 1779 } 1780 1781 /** 1782 * amdgpu_device_ip_block_add 1783 * 1784 * @adev: amdgpu_device pointer 1785 * @ip_block_version: pointer to the IP to add 1786 * 1787 * Adds the IP block driver information to the collection of IPs 1788 * on the asic. 1789 */ 1790 int amdgpu_device_ip_block_add(struct amdgpu_device *adev, 1791 const struct amdgpu_ip_block_version *ip_block_version) 1792 { 1793 if (!ip_block_version) 1794 return -EINVAL; 1795 1796 switch (ip_block_version->type) { 1797 case AMD_IP_BLOCK_TYPE_VCN: 1798 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK) 1799 return 0; 1800 break; 1801 case AMD_IP_BLOCK_TYPE_JPEG: 1802 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK) 1803 return 0; 1804 break; 1805 default: 1806 break; 1807 } 1808 1809 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, 1810 ip_block_version->funcs->name); 1811 1812 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; 1813 1814 return 0; 1815 } 1816 1817 /** 1818 * amdgpu_device_enable_virtual_display - enable virtual display feature 1819 * 1820 * @adev: amdgpu_device pointer 1821 * 1822 * Enabled the virtual display feature if the user has enabled it via 1823 * the module parameter virtual_display. This feature provides a virtual 1824 * display hardware on headless boards or in virtualized environments. 1825 * This function parses and validates the configuration string specified by 1826 * the user and configues the virtual display configuration (number of 1827 * virtual connectors, crtcs, etc.) specified. 1828 */ 1829 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) 1830 { 1831 adev->enable_virtual_display = false; 1832 1833 if (amdgpu_virtual_display) { 1834 const char *pci_address_name = pci_name(adev->pdev); 1835 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname; 1836 1837 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL); 1838 pciaddstr_tmp = pciaddstr; 1839 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) { 1840 pciaddname = strsep(&pciaddname_tmp, ","); 1841 if (!strcmp("all", pciaddname) 1842 || !strcmp(pci_address_name, pciaddname)) { 1843 long num_crtc; 1844 int res = -1; 1845 1846 adev->enable_virtual_display = true; 1847 1848 if (pciaddname_tmp) 1849 res = kstrtol(pciaddname_tmp, 10, 1850 &num_crtc); 1851 1852 if (!res) { 1853 if (num_crtc < 1) 1854 num_crtc = 1; 1855 if (num_crtc > 6) 1856 num_crtc = 6; 1857 adev->mode_info.num_crtc = num_crtc; 1858 } else { 1859 adev->mode_info.num_crtc = 1; 1860 } 1861 break; 1862 } 1863 } 1864 1865 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n", 1866 amdgpu_virtual_display, pci_address_name, 1867 adev->enable_virtual_display, adev->mode_info.num_crtc); 1868 1869 kfree(pciaddstr); 1870 } 1871 } 1872 1873 /** 1874 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware 1875 * 1876 * @adev: amdgpu_device pointer 1877 * 1878 * Parses the asic configuration parameters specified in the gpu info 1879 * firmware and makes them availale to the driver for use in configuring 1880 * the asic. 1881 * Returns 0 on success, -EINVAL on failure. 1882 */ 1883 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) 1884 { 1885 const char *chip_name; 1886 char fw_name[40]; 1887 int err; 1888 const struct gpu_info_firmware_header_v1_0 *hdr; 1889 1890 adev->firmware.gpu_info_fw = NULL; 1891 1892 if (adev->mman.discovery_bin) { 1893 amdgpu_discovery_get_gfx_info(adev); 1894 1895 /* 1896 * FIXME: The bounding box is still needed by Navi12, so 1897 * temporarily read it from gpu_info firmware. Should be droped 1898 * when DAL no longer needs it. 1899 */ 1900 if (adev->asic_type != CHIP_NAVI12) 1901 return 0; 1902 } 1903 1904 switch (adev->asic_type) { 1905 #ifdef CONFIG_DRM_AMDGPU_SI 1906 case CHIP_VERDE: 1907 case CHIP_TAHITI: 1908 case CHIP_PITCAIRN: 1909 case CHIP_OLAND: 1910 case CHIP_HAINAN: 1911 #endif 1912 #ifdef CONFIG_DRM_AMDGPU_CIK 1913 case CHIP_BONAIRE: 1914 case CHIP_HAWAII: 1915 case CHIP_KAVERI: 1916 case CHIP_KABINI: 1917 case CHIP_MULLINS: 1918 #endif 1919 case CHIP_TOPAZ: 1920 case CHIP_TONGA: 1921 case CHIP_FIJI: 1922 case CHIP_POLARIS10: 1923 case CHIP_POLARIS11: 1924 case CHIP_POLARIS12: 1925 case CHIP_VEGAM: 1926 case CHIP_CARRIZO: 1927 case CHIP_STONEY: 1928 case CHIP_VEGA20: 1929 case CHIP_ALDEBARAN: 1930 case CHIP_SIENNA_CICHLID: 1931 case CHIP_NAVY_FLOUNDER: 1932 case CHIP_DIMGREY_CAVEFISH: 1933 case CHIP_BEIGE_GOBY: 1934 default: 1935 return 0; 1936 case CHIP_VEGA10: 1937 chip_name = "vega10"; 1938 break; 1939 case CHIP_VEGA12: 1940 chip_name = "vega12"; 1941 break; 1942 case CHIP_RAVEN: 1943 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 1944 chip_name = "raven2"; 1945 else if (adev->apu_flags & AMD_APU_IS_PICASSO) 1946 chip_name = "picasso"; 1947 else 1948 chip_name = "raven"; 1949 break; 1950 case CHIP_ARCTURUS: 1951 chip_name = "arcturus"; 1952 break; 1953 case CHIP_RENOIR: 1954 if (adev->apu_flags & AMD_APU_IS_RENOIR) 1955 chip_name = "renoir"; 1956 else 1957 chip_name = "green_sardine"; 1958 break; 1959 case CHIP_NAVI10: 1960 chip_name = "navi10"; 1961 break; 1962 case CHIP_NAVI14: 1963 chip_name = "navi14"; 1964 break; 1965 case CHIP_NAVI12: 1966 chip_name = "navi12"; 1967 break; 1968 case CHIP_VANGOGH: 1969 chip_name = "vangogh"; 1970 break; 1971 case CHIP_YELLOW_CARP: 1972 chip_name = "yellow_carp"; 1973 break; 1974 } 1975 1976 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); 1977 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev); 1978 if (err) { 1979 dev_err(adev->dev, 1980 "Failed to load gpu_info firmware \"%s\"\n", 1981 fw_name); 1982 goto out; 1983 } 1984 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw); 1985 if (err) { 1986 dev_err(adev->dev, 1987 "Failed to validate gpu_info firmware \"%s\"\n", 1988 fw_name); 1989 goto out; 1990 } 1991 1992 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data; 1993 amdgpu_ucode_print_gpu_info_hdr(&hdr->header); 1994 1995 switch (hdr->version_major) { 1996 case 1: 1997 { 1998 const struct gpu_info_firmware_v1_0 *gpu_info_fw = 1999 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + 2000 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2001 2002 /* 2003 * Should be droped when DAL no longer needs it. 2004 */ 2005 if (adev->asic_type == CHIP_NAVI12) 2006 goto parse_soc_bounding_box; 2007 2008 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); 2009 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); 2010 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); 2011 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se); 2012 adev->gfx.config.max_texture_channel_caches = 2013 le32_to_cpu(gpu_info_fw->gc_num_tccs); 2014 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs); 2015 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds); 2016 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth); 2017 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth); 2018 adev->gfx.config.double_offchip_lds_buf = 2019 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer); 2020 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size); 2021 adev->gfx.cu_info.max_waves_per_simd = 2022 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd); 2023 adev->gfx.cu_info.max_scratch_slots_per_cu = 2024 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); 2025 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); 2026 if (hdr->version_minor >= 1) { 2027 const struct gpu_info_firmware_v1_1 *gpu_info_fw = 2028 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data + 2029 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2030 adev->gfx.config.num_sc_per_sh = 2031 le32_to_cpu(gpu_info_fw->num_sc_per_sh); 2032 adev->gfx.config.num_packer_per_sc = 2033 le32_to_cpu(gpu_info_fw->num_packer_per_sc); 2034 } 2035 2036 parse_soc_bounding_box: 2037 /* 2038 * soc bounding box info is not integrated in disocovery table, 2039 * we always need to parse it from gpu info firmware if needed. 2040 */ 2041 if (hdr->version_minor == 2) { 2042 const struct gpu_info_firmware_v1_2 *gpu_info_fw = 2043 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data + 2044 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2045 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box; 2046 } 2047 break; 2048 } 2049 default: 2050 dev_err(adev->dev, 2051 "Unsupported gpu_info table %d\n", hdr->header.ucode_version); 2052 err = -EINVAL; 2053 goto out; 2054 } 2055 out: 2056 return err; 2057 } 2058 2059 /** 2060 * amdgpu_device_ip_early_init - run early init for hardware IPs 2061 * 2062 * @adev: amdgpu_device pointer 2063 * 2064 * Early initialization pass for hardware IPs. The hardware IPs that make 2065 * up each asic are discovered each IP's early_init callback is run. This 2066 * is the first stage in initializing the asic. 2067 * Returns 0 on success, negative error code on failure. 2068 */ 2069 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) 2070 { 2071 int i, r; 2072 2073 amdgpu_device_enable_virtual_display(adev); 2074 2075 if (amdgpu_sriov_vf(adev)) { 2076 r = amdgpu_virt_request_full_gpu(adev, true); 2077 if (r) 2078 return r; 2079 } 2080 2081 switch (adev->asic_type) { 2082 #ifdef CONFIG_DRM_AMDGPU_SI 2083 case CHIP_VERDE: 2084 case CHIP_TAHITI: 2085 case CHIP_PITCAIRN: 2086 case CHIP_OLAND: 2087 case CHIP_HAINAN: 2088 adev->family = AMDGPU_FAMILY_SI; 2089 r = si_set_ip_blocks(adev); 2090 if (r) 2091 return r; 2092 break; 2093 #endif 2094 #ifdef CONFIG_DRM_AMDGPU_CIK 2095 case CHIP_BONAIRE: 2096 case CHIP_HAWAII: 2097 case CHIP_KAVERI: 2098 case CHIP_KABINI: 2099 case CHIP_MULLINS: 2100 if (adev->flags & AMD_IS_APU) 2101 adev->family = AMDGPU_FAMILY_KV; 2102 else 2103 adev->family = AMDGPU_FAMILY_CI; 2104 2105 r = cik_set_ip_blocks(adev); 2106 if (r) 2107 return r; 2108 break; 2109 #endif 2110 case CHIP_TOPAZ: 2111 case CHIP_TONGA: 2112 case CHIP_FIJI: 2113 case CHIP_POLARIS10: 2114 case CHIP_POLARIS11: 2115 case CHIP_POLARIS12: 2116 case CHIP_VEGAM: 2117 case CHIP_CARRIZO: 2118 case CHIP_STONEY: 2119 if (adev->flags & AMD_IS_APU) 2120 adev->family = AMDGPU_FAMILY_CZ; 2121 else 2122 adev->family = AMDGPU_FAMILY_VI; 2123 2124 r = vi_set_ip_blocks(adev); 2125 if (r) 2126 return r; 2127 break; 2128 case CHIP_VEGA10: 2129 case CHIP_VEGA12: 2130 case CHIP_VEGA20: 2131 case CHIP_RAVEN: 2132 case CHIP_ARCTURUS: 2133 case CHIP_RENOIR: 2134 case CHIP_ALDEBARAN: 2135 if (adev->flags & AMD_IS_APU) 2136 adev->family = AMDGPU_FAMILY_RV; 2137 else 2138 adev->family = AMDGPU_FAMILY_AI; 2139 2140 r = soc15_set_ip_blocks(adev); 2141 if (r) 2142 return r; 2143 break; 2144 case CHIP_NAVI10: 2145 case CHIP_NAVI14: 2146 case CHIP_NAVI12: 2147 case CHIP_SIENNA_CICHLID: 2148 case CHIP_NAVY_FLOUNDER: 2149 case CHIP_DIMGREY_CAVEFISH: 2150 case CHIP_BEIGE_GOBY: 2151 case CHIP_VANGOGH: 2152 case CHIP_YELLOW_CARP: 2153 case CHIP_CYAN_SKILLFISH: 2154 if (adev->asic_type == CHIP_VANGOGH) 2155 adev->family = AMDGPU_FAMILY_VGH; 2156 else if (adev->asic_type == CHIP_YELLOW_CARP) 2157 adev->family = AMDGPU_FAMILY_YC; 2158 else 2159 adev->family = AMDGPU_FAMILY_NV; 2160 2161 r = nv_set_ip_blocks(adev); 2162 if (r) 2163 return r; 2164 break; 2165 default: 2166 /* FIXME: not supported yet */ 2167 return -EINVAL; 2168 } 2169 2170 amdgpu_amdkfd_device_probe(adev); 2171 2172 adev->pm.pp_feature = amdgpu_pp_feature_mask; 2173 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) 2174 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 2175 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID) 2176 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK; 2177 2178 for (i = 0; i < adev->num_ip_blocks; i++) { 2179 if ((amdgpu_ip_block_mask & (1 << i)) == 0) { 2180 DRM_ERROR("disabled ip block: %d <%s>\n", 2181 i, adev->ip_blocks[i].version->funcs->name); 2182 adev->ip_blocks[i].status.valid = false; 2183 } else { 2184 if (adev->ip_blocks[i].version->funcs->early_init) { 2185 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev); 2186 if (r == -ENOENT) { 2187 adev->ip_blocks[i].status.valid = false; 2188 } else if (r) { 2189 DRM_ERROR("early_init of IP block <%s> failed %d\n", 2190 adev->ip_blocks[i].version->funcs->name, r); 2191 return r; 2192 } else { 2193 adev->ip_blocks[i].status.valid = true; 2194 } 2195 } else { 2196 adev->ip_blocks[i].status.valid = true; 2197 } 2198 } 2199 /* get the vbios after the asic_funcs are set up */ 2200 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { 2201 r = amdgpu_device_parse_gpu_info_fw(adev); 2202 if (r) 2203 return r; 2204 2205 /* Read BIOS */ 2206 if (!amdgpu_get_bios(adev)) 2207 return -EINVAL; 2208 2209 r = amdgpu_atombios_init(adev); 2210 if (r) { 2211 dev_err(adev->dev, "amdgpu_atombios_init failed\n"); 2212 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); 2213 return r; 2214 } 2215 2216 /*get pf2vf msg info at it's earliest time*/ 2217 if (amdgpu_sriov_vf(adev)) 2218 amdgpu_virt_init_data_exchange(adev); 2219 2220 } 2221 } 2222 2223 adev->cg_flags &= amdgpu_cg_mask; 2224 adev->pg_flags &= amdgpu_pg_mask; 2225 2226 return 0; 2227 } 2228 2229 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) 2230 { 2231 int i, r; 2232 2233 for (i = 0; i < adev->num_ip_blocks; i++) { 2234 if (!adev->ip_blocks[i].status.sw) 2235 continue; 2236 if (adev->ip_blocks[i].status.hw) 2237 continue; 2238 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2239 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) || 2240 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 2241 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 2242 if (r) { 2243 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 2244 adev->ip_blocks[i].version->funcs->name, r); 2245 return r; 2246 } 2247 adev->ip_blocks[i].status.hw = true; 2248 } 2249 } 2250 2251 return 0; 2252 } 2253 2254 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) 2255 { 2256 int i, r; 2257 2258 for (i = 0; i < adev->num_ip_blocks; i++) { 2259 if (!adev->ip_blocks[i].status.sw) 2260 continue; 2261 if (adev->ip_blocks[i].status.hw) 2262 continue; 2263 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 2264 if (r) { 2265 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 2266 adev->ip_blocks[i].version->funcs->name, r); 2267 return r; 2268 } 2269 adev->ip_blocks[i].status.hw = true; 2270 } 2271 2272 return 0; 2273 } 2274 2275 static int amdgpu_device_fw_loading(struct amdgpu_device *adev) 2276 { 2277 int r = 0; 2278 int i; 2279 uint32_t smu_version; 2280 2281 if (adev->asic_type >= CHIP_VEGA10) { 2282 for (i = 0; i < adev->num_ip_blocks; i++) { 2283 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) 2284 continue; 2285 2286 if (!adev->ip_blocks[i].status.sw) 2287 continue; 2288 2289 /* no need to do the fw loading again if already done*/ 2290 if (adev->ip_blocks[i].status.hw == true) 2291 break; 2292 2293 if (amdgpu_in_reset(adev) || adev->in_suspend) { 2294 r = adev->ip_blocks[i].version->funcs->resume(adev); 2295 if (r) { 2296 DRM_ERROR("resume of IP block <%s> failed %d\n", 2297 adev->ip_blocks[i].version->funcs->name, r); 2298 return r; 2299 } 2300 } else { 2301 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 2302 if (r) { 2303 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 2304 adev->ip_blocks[i].version->funcs->name, r); 2305 return r; 2306 } 2307 } 2308 2309 adev->ip_blocks[i].status.hw = true; 2310 break; 2311 } 2312 } 2313 2314 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA) 2315 r = amdgpu_pm_load_smu_firmware(adev, &smu_version); 2316 2317 return r; 2318 } 2319 2320 /** 2321 * amdgpu_device_ip_init - run init for hardware IPs 2322 * 2323 * @adev: amdgpu_device pointer 2324 * 2325 * Main initialization pass for hardware IPs. The list of all the hardware 2326 * IPs that make up the asic is walked and the sw_init and hw_init callbacks 2327 * are run. sw_init initializes the software state associated with each IP 2328 * and hw_init initializes the hardware associated with each IP. 2329 * Returns 0 on success, negative error code on failure. 2330 */ 2331 static int amdgpu_device_ip_init(struct amdgpu_device *adev) 2332 { 2333 int i, r; 2334 2335 r = amdgpu_ras_init(adev); 2336 if (r) 2337 return r; 2338 2339 for (i = 0; i < adev->num_ip_blocks; i++) { 2340 if (!adev->ip_blocks[i].status.valid) 2341 continue; 2342 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev); 2343 if (r) { 2344 DRM_ERROR("sw_init of IP block <%s> failed %d\n", 2345 adev->ip_blocks[i].version->funcs->name, r); 2346 goto init_failed; 2347 } 2348 adev->ip_blocks[i].status.sw = true; 2349 2350 /* need to do gmc hw init early so we can allocate gpu mem */ 2351 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 2352 r = amdgpu_device_vram_scratch_init(adev); 2353 if (r) { 2354 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r); 2355 goto init_failed; 2356 } 2357 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); 2358 if (r) { 2359 DRM_ERROR("hw_init %d failed %d\n", i, r); 2360 goto init_failed; 2361 } 2362 r = amdgpu_device_wb_init(adev); 2363 if (r) { 2364 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); 2365 goto init_failed; 2366 } 2367 adev->ip_blocks[i].status.hw = true; 2368 2369 /* right after GMC hw init, we create CSA */ 2370 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { 2371 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, 2372 AMDGPU_GEM_DOMAIN_VRAM, 2373 AMDGPU_CSA_SIZE); 2374 if (r) { 2375 DRM_ERROR("allocate CSA failed %d\n", r); 2376 goto init_failed; 2377 } 2378 } 2379 } 2380 } 2381 2382 if (amdgpu_sriov_vf(adev)) 2383 amdgpu_virt_init_data_exchange(adev); 2384 2385 r = amdgpu_ib_pool_init(adev); 2386 if (r) { 2387 dev_err(adev->dev, "IB initialization failed (%d).\n", r); 2388 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); 2389 goto init_failed; 2390 } 2391 2392 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ 2393 if (r) 2394 goto init_failed; 2395 2396 r = amdgpu_device_ip_hw_init_phase1(adev); 2397 if (r) 2398 goto init_failed; 2399 2400 r = amdgpu_device_fw_loading(adev); 2401 if (r) 2402 goto init_failed; 2403 2404 r = amdgpu_device_ip_hw_init_phase2(adev); 2405 if (r) 2406 goto init_failed; 2407 2408 /* 2409 * retired pages will be loaded from eeprom and reserved here, 2410 * it should be called after amdgpu_device_ip_hw_init_phase2 since 2411 * for some ASICs the RAS EEPROM code relies on SMU fully functioning 2412 * for I2C communication which only true at this point. 2413 * 2414 * amdgpu_ras_recovery_init may fail, but the upper only cares the 2415 * failure from bad gpu situation and stop amdgpu init process 2416 * accordingly. For other failed cases, it will still release all 2417 * the resource and print error message, rather than returning one 2418 * negative value to upper level. 2419 * 2420 * Note: theoretically, this should be called before all vram allocations 2421 * to protect retired page from abusing 2422 */ 2423 r = amdgpu_ras_recovery_init(adev); 2424 if (r) 2425 goto init_failed; 2426 2427 if (adev->gmc.xgmi.num_physical_nodes > 1) 2428 amdgpu_xgmi_add_device(adev); 2429 2430 /* Don't init kfd if whole hive need to be reset during init */ 2431 if (!adev->gmc.xgmi.pending_reset) 2432 amdgpu_amdkfd_device_init(adev); 2433 2434 amdgpu_fru_get_product_info(adev); 2435 2436 init_failed: 2437 if (amdgpu_sriov_vf(adev)) 2438 amdgpu_virt_release_full_gpu(adev, true); 2439 2440 return r; 2441 } 2442 2443 /** 2444 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer 2445 * 2446 * @adev: amdgpu_device pointer 2447 * 2448 * Writes a reset magic value to the gart pointer in VRAM. The driver calls 2449 * this function before a GPU reset. If the value is retained after a 2450 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents. 2451 */ 2452 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) 2453 { 2454 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); 2455 } 2456 2457 /** 2458 * amdgpu_device_check_vram_lost - check if vram is valid 2459 * 2460 * @adev: amdgpu_device pointer 2461 * 2462 * Checks the reset magic value written to the gart pointer in VRAM. 2463 * The driver calls this after a GPU reset to see if the contents of 2464 * VRAM is lost or now. 2465 * returns true if vram is lost, false if not. 2466 */ 2467 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) 2468 { 2469 if (memcmp(adev->gart.ptr, adev->reset_magic, 2470 AMDGPU_RESET_MAGIC_NUM)) 2471 return true; 2472 2473 if (!amdgpu_in_reset(adev)) 2474 return false; 2475 2476 /* 2477 * For all ASICs with baco/mode1 reset, the VRAM is 2478 * always assumed to be lost. 2479 */ 2480 switch (amdgpu_asic_reset_method(adev)) { 2481 case AMD_RESET_METHOD_BACO: 2482 case AMD_RESET_METHOD_MODE1: 2483 return true; 2484 default: 2485 return false; 2486 } 2487 } 2488 2489 /** 2490 * amdgpu_device_set_cg_state - set clockgating for amdgpu device 2491 * 2492 * @adev: amdgpu_device pointer 2493 * @state: clockgating state (gate or ungate) 2494 * 2495 * The list of all the hardware IPs that make up the asic is walked and the 2496 * set_clockgating_state callbacks are run. 2497 * Late initialization pass enabling clockgating for hardware IPs. 2498 * Fini or suspend, pass disabling clockgating for hardware IPs. 2499 * Returns 0 on success, negative error code on failure. 2500 */ 2501 2502 int amdgpu_device_set_cg_state(struct amdgpu_device *adev, 2503 enum amd_clockgating_state state) 2504 { 2505 int i, j, r; 2506 2507 if (amdgpu_emu_mode == 1) 2508 return 0; 2509 2510 for (j = 0; j < adev->num_ip_blocks; j++) { 2511 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 2512 if (!adev->ip_blocks[i].status.late_initialized) 2513 continue; 2514 /* skip CG for GFX on S0ix */ 2515 if (adev->in_s0ix && 2516 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX) 2517 continue; 2518 /* skip CG for VCE/UVD, it's handled specially */ 2519 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 2520 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 2521 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 2522 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && 2523 adev->ip_blocks[i].version->funcs->set_clockgating_state) { 2524 /* enable clockgating to save power */ 2525 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, 2526 state); 2527 if (r) { 2528 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", 2529 adev->ip_blocks[i].version->funcs->name, r); 2530 return r; 2531 } 2532 } 2533 } 2534 2535 return 0; 2536 } 2537 2538 int amdgpu_device_set_pg_state(struct amdgpu_device *adev, 2539 enum amd_powergating_state state) 2540 { 2541 int i, j, r; 2542 2543 if (amdgpu_emu_mode == 1) 2544 return 0; 2545 2546 for (j = 0; j < adev->num_ip_blocks; j++) { 2547 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 2548 if (!adev->ip_blocks[i].status.late_initialized) 2549 continue; 2550 /* skip PG for GFX on S0ix */ 2551 if (adev->in_s0ix && 2552 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX) 2553 continue; 2554 /* skip CG for VCE/UVD, it's handled specially */ 2555 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 2556 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 2557 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 2558 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && 2559 adev->ip_blocks[i].version->funcs->set_powergating_state) { 2560 /* enable powergating to save power */ 2561 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, 2562 state); 2563 if (r) { 2564 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", 2565 adev->ip_blocks[i].version->funcs->name, r); 2566 return r; 2567 } 2568 } 2569 } 2570 return 0; 2571 } 2572 2573 static int amdgpu_device_enable_mgpu_fan_boost(void) 2574 { 2575 struct amdgpu_gpu_instance *gpu_ins; 2576 struct amdgpu_device *adev; 2577 int i, ret = 0; 2578 2579 mutex_lock(&mgpu_info.mutex); 2580 2581 /* 2582 * MGPU fan boost feature should be enabled 2583 * only when there are two or more dGPUs in 2584 * the system 2585 */ 2586 if (mgpu_info.num_dgpu < 2) 2587 goto out; 2588 2589 for (i = 0; i < mgpu_info.num_dgpu; i++) { 2590 gpu_ins = &(mgpu_info.gpu_ins[i]); 2591 adev = gpu_ins->adev; 2592 if (!(adev->flags & AMD_IS_APU) && 2593 !gpu_ins->mgpu_fan_enabled) { 2594 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev); 2595 if (ret) 2596 break; 2597 2598 gpu_ins->mgpu_fan_enabled = 1; 2599 } 2600 } 2601 2602 out: 2603 mutex_unlock(&mgpu_info.mutex); 2604 2605 return ret; 2606 } 2607 2608 /** 2609 * amdgpu_device_ip_late_init - run late init for hardware IPs 2610 * 2611 * @adev: amdgpu_device pointer 2612 * 2613 * Late initialization pass for hardware IPs. The list of all the hardware 2614 * IPs that make up the asic is walked and the late_init callbacks are run. 2615 * late_init covers any special initialization that an IP requires 2616 * after all of the have been initialized or something that needs to happen 2617 * late in the init process. 2618 * Returns 0 on success, negative error code on failure. 2619 */ 2620 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) 2621 { 2622 struct amdgpu_gpu_instance *gpu_instance; 2623 int i = 0, r; 2624 2625 for (i = 0; i < adev->num_ip_blocks; i++) { 2626 if (!adev->ip_blocks[i].status.hw) 2627 continue; 2628 if (adev->ip_blocks[i].version->funcs->late_init) { 2629 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); 2630 if (r) { 2631 DRM_ERROR("late_init of IP block <%s> failed %d\n", 2632 adev->ip_blocks[i].version->funcs->name, r); 2633 return r; 2634 } 2635 } 2636 adev->ip_blocks[i].status.late_initialized = true; 2637 } 2638 2639 amdgpu_ras_set_error_query_ready(adev, true); 2640 2641 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); 2642 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); 2643 2644 amdgpu_device_fill_reset_magic(adev); 2645 2646 r = amdgpu_device_enable_mgpu_fan_boost(); 2647 if (r) 2648 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); 2649 2650 /* For XGMI + passthrough configuration on arcturus, enable light SBR */ 2651 if (adev->asic_type == CHIP_ARCTURUS && 2652 amdgpu_passthrough(adev) && 2653 adev->gmc.xgmi.num_physical_nodes > 1) 2654 smu_set_light_sbr(&adev->smu, true); 2655 2656 if (adev->gmc.xgmi.num_physical_nodes > 1) { 2657 mutex_lock(&mgpu_info.mutex); 2658 2659 /* 2660 * Reset device p-state to low as this was booted with high. 2661 * 2662 * This should be performed only after all devices from the same 2663 * hive get initialized. 2664 * 2665 * However, it's unknown how many device in the hive in advance. 2666 * As this is counted one by one during devices initializations. 2667 * 2668 * So, we wait for all XGMI interlinked devices initialized. 2669 * This may bring some delays as those devices may come from 2670 * different hives. But that should be OK. 2671 */ 2672 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) { 2673 for (i = 0; i < mgpu_info.num_gpu; i++) { 2674 gpu_instance = &(mgpu_info.gpu_ins[i]); 2675 if (gpu_instance->adev->flags & AMD_IS_APU) 2676 continue; 2677 2678 r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 2679 AMDGPU_XGMI_PSTATE_MIN); 2680 if (r) { 2681 DRM_ERROR("pstate setting failed (%d).\n", r); 2682 break; 2683 } 2684 } 2685 } 2686 2687 mutex_unlock(&mgpu_info.mutex); 2688 } 2689 2690 return 0; 2691 } 2692 2693 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) 2694 { 2695 int i, r; 2696 2697 for (i = 0; i < adev->num_ip_blocks; i++) { 2698 if (!adev->ip_blocks[i].version->funcs->early_fini) 2699 continue; 2700 2701 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev); 2702 if (r) { 2703 DRM_DEBUG("early_fini of IP block <%s> failed %d\n", 2704 adev->ip_blocks[i].version->funcs->name, r); 2705 } 2706 } 2707 2708 amdgpu_amdkfd_suspend(adev, false); 2709 2710 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 2711 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 2712 2713 /* need to disable SMC first */ 2714 for (i = 0; i < adev->num_ip_blocks; i++) { 2715 if (!adev->ip_blocks[i].status.hw) 2716 continue; 2717 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 2718 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 2719 /* XXX handle errors */ 2720 if (r) { 2721 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 2722 adev->ip_blocks[i].version->funcs->name, r); 2723 } 2724 adev->ip_blocks[i].status.hw = false; 2725 break; 2726 } 2727 } 2728 2729 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2730 if (!adev->ip_blocks[i].status.hw) 2731 continue; 2732 2733 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 2734 /* XXX handle errors */ 2735 if (r) { 2736 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 2737 adev->ip_blocks[i].version->funcs->name, r); 2738 } 2739 2740 adev->ip_blocks[i].status.hw = false; 2741 } 2742 2743 return 0; 2744 } 2745 2746 /** 2747 * amdgpu_device_ip_fini - run fini for hardware IPs 2748 * 2749 * @adev: amdgpu_device pointer 2750 * 2751 * Main teardown pass for hardware IPs. The list of all the hardware 2752 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks 2753 * are run. hw_fini tears down the hardware associated with each IP 2754 * and sw_fini tears down any software state associated with each IP. 2755 * Returns 0 on success, negative error code on failure. 2756 */ 2757 static int amdgpu_device_ip_fini(struct amdgpu_device *adev) 2758 { 2759 int i, r; 2760 2761 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) 2762 amdgpu_virt_release_ras_err_handler_data(adev); 2763 2764 amdgpu_ras_pre_fini(adev); 2765 2766 if (adev->gmc.xgmi.num_physical_nodes > 1) 2767 amdgpu_xgmi_remove_device(adev); 2768 2769 amdgpu_amdkfd_device_fini_sw(adev); 2770 2771 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2772 if (!adev->ip_blocks[i].status.sw) 2773 continue; 2774 2775 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 2776 amdgpu_ucode_free_bo(adev); 2777 amdgpu_free_static_csa(&adev->virt.csa_obj); 2778 amdgpu_device_wb_fini(adev); 2779 amdgpu_device_vram_scratch_fini(adev); 2780 amdgpu_ib_pool_fini(adev); 2781 } 2782 2783 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); 2784 /* XXX handle errors */ 2785 if (r) { 2786 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", 2787 adev->ip_blocks[i].version->funcs->name, r); 2788 } 2789 adev->ip_blocks[i].status.sw = false; 2790 adev->ip_blocks[i].status.valid = false; 2791 } 2792 2793 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2794 if (!adev->ip_blocks[i].status.late_initialized) 2795 continue; 2796 if (adev->ip_blocks[i].version->funcs->late_fini) 2797 adev->ip_blocks[i].version->funcs->late_fini((void *)adev); 2798 adev->ip_blocks[i].status.late_initialized = false; 2799 } 2800 2801 amdgpu_ras_fini(adev); 2802 2803 if (amdgpu_sriov_vf(adev)) 2804 if (amdgpu_virt_release_full_gpu(adev, false)) 2805 DRM_ERROR("failed to release exclusive mode on fini\n"); 2806 2807 return 0; 2808 } 2809 2810 /** 2811 * amdgpu_device_delayed_init_work_handler - work handler for IB tests 2812 * 2813 * @work: work_struct. 2814 */ 2815 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work) 2816 { 2817 struct amdgpu_device *adev = 2818 container_of(work, struct amdgpu_device, delayed_init_work.work); 2819 int r; 2820 2821 r = amdgpu_ib_ring_tests(adev); 2822 if (r) 2823 DRM_ERROR("ib ring test failed (%d).\n", r); 2824 } 2825 2826 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) 2827 { 2828 struct amdgpu_device *adev = 2829 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work); 2830 2831 mutex_lock(&adev->gfx.gfx_off_mutex); 2832 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) { 2833 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) 2834 adev->gfx.gfx_off_state = true; 2835 } 2836 mutex_unlock(&adev->gfx.gfx_off_mutex); 2837 } 2838 2839 /** 2840 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1) 2841 * 2842 * @adev: amdgpu_device pointer 2843 * 2844 * Main suspend function for hardware IPs. The list of all the hardware 2845 * IPs that make up the asic is walked, clockgating is disabled and the 2846 * suspend callbacks are run. suspend puts the hardware and software state 2847 * in each IP into a state suitable for suspend. 2848 * Returns 0 on success, negative error code on failure. 2849 */ 2850 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) 2851 { 2852 int i, r; 2853 2854 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 2855 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 2856 2857 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2858 if (!adev->ip_blocks[i].status.valid) 2859 continue; 2860 2861 /* displays are handled separately */ 2862 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE) 2863 continue; 2864 2865 /* XXX handle errors */ 2866 r = adev->ip_blocks[i].version->funcs->suspend(adev); 2867 /* XXX handle errors */ 2868 if (r) { 2869 DRM_ERROR("suspend of IP block <%s> failed %d\n", 2870 adev->ip_blocks[i].version->funcs->name, r); 2871 return r; 2872 } 2873 2874 adev->ip_blocks[i].status.hw = false; 2875 } 2876 2877 return 0; 2878 } 2879 2880 /** 2881 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2) 2882 * 2883 * @adev: amdgpu_device pointer 2884 * 2885 * Main suspend function for hardware IPs. The list of all the hardware 2886 * IPs that make up the asic is walked, clockgating is disabled and the 2887 * suspend callbacks are run. suspend puts the hardware and software state 2888 * in each IP into a state suitable for suspend. 2889 * Returns 0 on success, negative error code on failure. 2890 */ 2891 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) 2892 { 2893 int i, r; 2894 2895 if (adev->in_s0ix) 2896 amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry); 2897 2898 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2899 if (!adev->ip_blocks[i].status.valid) 2900 continue; 2901 /* displays are handled in phase1 */ 2902 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) 2903 continue; 2904 /* PSP lost connection when err_event_athub occurs */ 2905 if (amdgpu_ras_intr_triggered() && 2906 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 2907 adev->ip_blocks[i].status.hw = false; 2908 continue; 2909 } 2910 2911 /* skip unnecessary suspend if we do not initialize them yet */ 2912 if (adev->gmc.xgmi.pending_reset && 2913 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2914 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC || 2915 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2916 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) { 2917 adev->ip_blocks[i].status.hw = false; 2918 continue; 2919 } 2920 2921 /* skip suspend of gfx and psp for S0ix 2922 * gfx is in gfxoff state, so on resume it will exit gfxoff just 2923 * like at runtime. PSP is also part of the always on hardware 2924 * so no need to suspend it. 2925 */ 2926 if (adev->in_s0ix && 2927 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP || 2928 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)) 2929 continue; 2930 2931 /* XXX handle errors */ 2932 r = adev->ip_blocks[i].version->funcs->suspend(adev); 2933 /* XXX handle errors */ 2934 if (r) { 2935 DRM_ERROR("suspend of IP block <%s> failed %d\n", 2936 adev->ip_blocks[i].version->funcs->name, r); 2937 } 2938 adev->ip_blocks[i].status.hw = false; 2939 /* handle putting the SMC in the appropriate state */ 2940 if(!amdgpu_sriov_vf(adev)){ 2941 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 2942 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state); 2943 if (r) { 2944 DRM_ERROR("SMC failed to set mp1 state %d, %d\n", 2945 adev->mp1_state, r); 2946 return r; 2947 } 2948 } 2949 } 2950 } 2951 2952 return 0; 2953 } 2954 2955 /** 2956 * amdgpu_device_ip_suspend - run suspend for hardware IPs 2957 * 2958 * @adev: amdgpu_device pointer 2959 * 2960 * Main suspend function for hardware IPs. The list of all the hardware 2961 * IPs that make up the asic is walked, clockgating is disabled and the 2962 * suspend callbacks are run. suspend puts the hardware and software state 2963 * in each IP into a state suitable for suspend. 2964 * Returns 0 on success, negative error code on failure. 2965 */ 2966 int amdgpu_device_ip_suspend(struct amdgpu_device *adev) 2967 { 2968 int r; 2969 2970 if (amdgpu_sriov_vf(adev)) { 2971 amdgpu_virt_fini_data_exchange(adev); 2972 amdgpu_virt_request_full_gpu(adev, false); 2973 } 2974 2975 r = amdgpu_device_ip_suspend_phase1(adev); 2976 if (r) 2977 return r; 2978 r = amdgpu_device_ip_suspend_phase2(adev); 2979 2980 if (amdgpu_sriov_vf(adev)) 2981 amdgpu_virt_release_full_gpu(adev, false); 2982 2983 return r; 2984 } 2985 2986 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) 2987 { 2988 int i, r; 2989 2990 static enum amd_ip_block_type ip_order[] = { 2991 AMD_IP_BLOCK_TYPE_GMC, 2992 AMD_IP_BLOCK_TYPE_COMMON, 2993 AMD_IP_BLOCK_TYPE_PSP, 2994 AMD_IP_BLOCK_TYPE_IH, 2995 }; 2996 2997 for (i = 0; i < adev->num_ip_blocks; i++) { 2998 int j; 2999 struct amdgpu_ip_block *block; 3000 3001 block = &adev->ip_blocks[i]; 3002 block->status.hw = false; 3003 3004 for (j = 0; j < ARRAY_SIZE(ip_order); j++) { 3005 3006 if (block->version->type != ip_order[j] || 3007 !block->status.valid) 3008 continue; 3009 3010 r = block->version->funcs->hw_init(adev); 3011 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 3012 if (r) 3013 return r; 3014 block->status.hw = true; 3015 } 3016 } 3017 3018 return 0; 3019 } 3020 3021 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) 3022 { 3023 int i, r; 3024 3025 static enum amd_ip_block_type ip_order[] = { 3026 AMD_IP_BLOCK_TYPE_SMC, 3027 AMD_IP_BLOCK_TYPE_DCE, 3028 AMD_IP_BLOCK_TYPE_GFX, 3029 AMD_IP_BLOCK_TYPE_SDMA, 3030 AMD_IP_BLOCK_TYPE_UVD, 3031 AMD_IP_BLOCK_TYPE_VCE, 3032 AMD_IP_BLOCK_TYPE_VCN 3033 }; 3034 3035 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 3036 int j; 3037 struct amdgpu_ip_block *block; 3038 3039 for (j = 0; j < adev->num_ip_blocks; j++) { 3040 block = &adev->ip_blocks[j]; 3041 3042 if (block->version->type != ip_order[i] || 3043 !block->status.valid || 3044 block->status.hw) 3045 continue; 3046 3047 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) 3048 r = block->version->funcs->resume(adev); 3049 else 3050 r = block->version->funcs->hw_init(adev); 3051 3052 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 3053 if (r) 3054 return r; 3055 block->status.hw = true; 3056 } 3057 } 3058 3059 return 0; 3060 } 3061 3062 /** 3063 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs 3064 * 3065 * @adev: amdgpu_device pointer 3066 * 3067 * First resume function for hardware IPs. The list of all the hardware 3068 * IPs that make up the asic is walked and the resume callbacks are run for 3069 * COMMON, GMC, and IH. resume puts the hardware into a functional state 3070 * after a suspend and updates the software state as necessary. This 3071 * function is also used for restoring the GPU after a GPU reset. 3072 * Returns 0 on success, negative error code on failure. 3073 */ 3074 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) 3075 { 3076 int i, r; 3077 3078 for (i = 0; i < adev->num_ip_blocks; i++) { 3079 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) 3080 continue; 3081 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 3082 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 3083 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 3084 3085 r = adev->ip_blocks[i].version->funcs->resume(adev); 3086 if (r) { 3087 DRM_ERROR("resume of IP block <%s> failed %d\n", 3088 adev->ip_blocks[i].version->funcs->name, r); 3089 return r; 3090 } 3091 adev->ip_blocks[i].status.hw = true; 3092 } 3093 } 3094 3095 return 0; 3096 } 3097 3098 /** 3099 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs 3100 * 3101 * @adev: amdgpu_device pointer 3102 * 3103 * First resume function for hardware IPs. The list of all the hardware 3104 * IPs that make up the asic is walked and the resume callbacks are run for 3105 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a 3106 * functional state after a suspend and updates the software state as 3107 * necessary. This function is also used for restoring the GPU after a GPU 3108 * reset. 3109 * Returns 0 on success, negative error code on failure. 3110 */ 3111 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) 3112 { 3113 int i, r; 3114 3115 for (i = 0; i < adev->num_ip_blocks; i++) { 3116 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) 3117 continue; 3118 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 3119 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 3120 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || 3121 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) 3122 continue; 3123 r = adev->ip_blocks[i].version->funcs->resume(adev); 3124 if (r) { 3125 DRM_ERROR("resume of IP block <%s> failed %d\n", 3126 adev->ip_blocks[i].version->funcs->name, r); 3127 return r; 3128 } 3129 adev->ip_blocks[i].status.hw = true; 3130 } 3131 3132 return 0; 3133 } 3134 3135 /** 3136 * amdgpu_device_ip_resume - run resume for hardware IPs 3137 * 3138 * @adev: amdgpu_device pointer 3139 * 3140 * Main resume function for hardware IPs. The hardware IPs 3141 * are split into two resume functions because they are 3142 * are also used in in recovering from a GPU reset and some additional 3143 * steps need to be take between them. In this case (S3/S4) they are 3144 * run sequentially. 3145 * Returns 0 on success, negative error code on failure. 3146 */ 3147 static int amdgpu_device_ip_resume(struct amdgpu_device *adev) 3148 { 3149 int r; 3150 3151 r = amdgpu_device_ip_resume_phase1(adev); 3152 if (r) 3153 return r; 3154 3155 r = amdgpu_device_fw_loading(adev); 3156 if (r) 3157 return r; 3158 3159 r = amdgpu_device_ip_resume_phase2(adev); 3160 3161 return r; 3162 } 3163 3164 /** 3165 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV 3166 * 3167 * @adev: amdgpu_device pointer 3168 * 3169 * Query the VBIOS data tables to determine if the board supports SR-IOV. 3170 */ 3171 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) 3172 { 3173 if (amdgpu_sriov_vf(adev)) { 3174 if (adev->is_atom_fw) { 3175 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev)) 3176 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 3177 } else { 3178 if (amdgpu_atombios_has_gpu_virtualization_table(adev)) 3179 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 3180 } 3181 3182 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)) 3183 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); 3184 } 3185 } 3186 3187 /** 3188 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic 3189 * 3190 * @asic_type: AMD asic type 3191 * 3192 * Check if there is DC (new modesetting infrastructre) support for an asic. 3193 * returns true if DC has support, false if not. 3194 */ 3195 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) 3196 { 3197 switch (asic_type) { 3198 #if defined(CONFIG_DRM_AMD_DC) 3199 #if defined(CONFIG_DRM_AMD_DC_SI) 3200 case CHIP_TAHITI: 3201 case CHIP_PITCAIRN: 3202 case CHIP_VERDE: 3203 case CHIP_OLAND: 3204 #endif 3205 case CHIP_BONAIRE: 3206 case CHIP_KAVERI: 3207 case CHIP_KABINI: 3208 case CHIP_MULLINS: 3209 /* 3210 * We have systems in the wild with these ASICs that require 3211 * LVDS and VGA support which is not supported with DC. 3212 * 3213 * Fallback to the non-DC driver here by default so as not to 3214 * cause regressions. 3215 */ 3216 return amdgpu_dc > 0; 3217 case CHIP_HAWAII: 3218 case CHIP_CARRIZO: 3219 case CHIP_STONEY: 3220 case CHIP_POLARIS10: 3221 case CHIP_POLARIS11: 3222 case CHIP_POLARIS12: 3223 case CHIP_VEGAM: 3224 case CHIP_TONGA: 3225 case CHIP_FIJI: 3226 case CHIP_VEGA10: 3227 case CHIP_VEGA12: 3228 case CHIP_VEGA20: 3229 #if defined(CONFIG_DRM_AMD_DC_DCN) 3230 case CHIP_RAVEN: 3231 case CHIP_NAVI10: 3232 case CHIP_NAVI14: 3233 case CHIP_NAVI12: 3234 case CHIP_RENOIR: 3235 case CHIP_SIENNA_CICHLID: 3236 case CHIP_NAVY_FLOUNDER: 3237 case CHIP_DIMGREY_CAVEFISH: 3238 case CHIP_BEIGE_GOBY: 3239 case CHIP_VANGOGH: 3240 case CHIP_YELLOW_CARP: 3241 #endif 3242 return amdgpu_dc != 0; 3243 #endif 3244 default: 3245 if (amdgpu_dc > 0) 3246 DRM_INFO_ONCE("Display Core has been requested via kernel parameter " 3247 "but isn't supported by ASIC, ignoring\n"); 3248 return false; 3249 } 3250 } 3251 3252 /** 3253 * amdgpu_device_has_dc_support - check if dc is supported 3254 * 3255 * @adev: amdgpu_device pointer 3256 * 3257 * Returns true for supported, false for not supported 3258 */ 3259 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) 3260 { 3261 if (amdgpu_sriov_vf(adev) || 3262 adev->enable_virtual_display || 3263 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)) 3264 return false; 3265 3266 return amdgpu_device_asic_has_dc_support(adev->asic_type); 3267 } 3268 3269 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) 3270 { 3271 struct amdgpu_device *adev = 3272 container_of(__work, struct amdgpu_device, xgmi_reset_work); 3273 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); 3274 3275 /* It's a bug to not have a hive within this function */ 3276 if (WARN_ON(!hive)) 3277 return; 3278 3279 /* 3280 * Use task barrier to synchronize all xgmi reset works across the 3281 * hive. task_barrier_enter and task_barrier_exit will block 3282 * until all the threads running the xgmi reset works reach 3283 * those points. task_barrier_full will do both blocks. 3284 */ 3285 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { 3286 3287 task_barrier_enter(&hive->tb); 3288 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev)); 3289 3290 if (adev->asic_reset_res) 3291 goto fail; 3292 3293 task_barrier_exit(&hive->tb); 3294 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev)); 3295 3296 if (adev->asic_reset_res) 3297 goto fail; 3298 3299 if (adev->mmhub.ras_funcs && 3300 adev->mmhub.ras_funcs->reset_ras_error_count) 3301 adev->mmhub.ras_funcs->reset_ras_error_count(adev); 3302 } else { 3303 3304 task_barrier_full(&hive->tb); 3305 adev->asic_reset_res = amdgpu_asic_reset(adev); 3306 } 3307 3308 fail: 3309 if (adev->asic_reset_res) 3310 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s", 3311 adev->asic_reset_res, adev_to_drm(adev)->unique); 3312 amdgpu_put_xgmi_hive(hive); 3313 } 3314 3315 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) 3316 { 3317 char *input = amdgpu_lockup_timeout; 3318 char *timeout_setting = NULL; 3319 int index = 0; 3320 long timeout; 3321 int ret = 0; 3322 3323 /* 3324 * By default timeout for non compute jobs is 10000 3325 * and 60000 for compute jobs. 3326 * In SR-IOV or passthrough mode, timeout for compute 3327 * jobs are 60000 by default. 3328 */ 3329 adev->gfx_timeout = msecs_to_jiffies(10000); 3330 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; 3331 if (amdgpu_sriov_vf(adev)) 3332 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ? 3333 msecs_to_jiffies(60000) : msecs_to_jiffies(10000); 3334 else 3335 adev->compute_timeout = msecs_to_jiffies(60000); 3336 3337 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { 3338 while ((timeout_setting = strsep(&input, ",")) && 3339 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { 3340 ret = kstrtol(timeout_setting, 0, &timeout); 3341 if (ret) 3342 return ret; 3343 3344 if (timeout == 0) { 3345 index++; 3346 continue; 3347 } else if (timeout < 0) { 3348 timeout = MAX_SCHEDULE_TIMEOUT; 3349 } else { 3350 timeout = msecs_to_jiffies(timeout); 3351 } 3352 3353 switch (index++) { 3354 case 0: 3355 adev->gfx_timeout = timeout; 3356 break; 3357 case 1: 3358 adev->compute_timeout = timeout; 3359 break; 3360 case 2: 3361 adev->sdma_timeout = timeout; 3362 break; 3363 case 3: 3364 adev->video_timeout = timeout; 3365 break; 3366 default: 3367 break; 3368 } 3369 } 3370 /* 3371 * There is only one value specified and 3372 * it should apply to all non-compute jobs. 3373 */ 3374 if (index == 1) { 3375 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; 3376 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) 3377 adev->compute_timeout = adev->gfx_timeout; 3378 } 3379 } 3380 3381 return ret; 3382 } 3383 3384 static const struct attribute *amdgpu_dev_attributes[] = { 3385 &dev_attr_product_name.attr, 3386 &dev_attr_product_number.attr, 3387 &dev_attr_serial_number.attr, 3388 &dev_attr_pcie_replay_count.attr, 3389 NULL 3390 }; 3391 3392 /** 3393 * amdgpu_device_init - initialize the driver 3394 * 3395 * @adev: amdgpu_device pointer 3396 * @flags: driver flags 3397 * 3398 * Initializes the driver info and hw (all asics). 3399 * Returns 0 for success or an error on failure. 3400 * Called at driver startup. 3401 */ 3402 int amdgpu_device_init(struct amdgpu_device *adev, 3403 uint32_t flags) 3404 { 3405 struct drm_device *ddev = adev_to_drm(adev); 3406 struct pci_dev *pdev = adev->pdev; 3407 int r, i; 3408 bool px = false; 3409 u32 max_MBps; 3410 3411 adev->shutdown = false; 3412 adev->flags = flags; 3413 3414 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST) 3415 adev->asic_type = amdgpu_force_asic_type; 3416 else 3417 adev->asic_type = flags & AMD_ASIC_MASK; 3418 3419 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; 3420 if (amdgpu_emu_mode == 1) 3421 adev->usec_timeout *= 10; 3422 adev->gmc.gart_size = 512 * 1024 * 1024; 3423 adev->accel_working = false; 3424 adev->num_rings = 0; 3425 adev->mman.buffer_funcs = NULL; 3426 adev->mman.buffer_funcs_ring = NULL; 3427 adev->vm_manager.vm_pte_funcs = NULL; 3428 adev->vm_manager.vm_pte_num_scheds = 0; 3429 adev->gmc.gmc_funcs = NULL; 3430 adev->harvest_ip_mask = 0x0; 3431 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); 3432 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 3433 3434 adev->smc_rreg = &amdgpu_invalid_rreg; 3435 adev->smc_wreg = &amdgpu_invalid_wreg; 3436 adev->pcie_rreg = &amdgpu_invalid_rreg; 3437 adev->pcie_wreg = &amdgpu_invalid_wreg; 3438 adev->pciep_rreg = &amdgpu_invalid_rreg; 3439 adev->pciep_wreg = &amdgpu_invalid_wreg; 3440 adev->pcie_rreg64 = &amdgpu_invalid_rreg64; 3441 adev->pcie_wreg64 = &amdgpu_invalid_wreg64; 3442 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; 3443 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; 3444 adev->didt_rreg = &amdgpu_invalid_rreg; 3445 adev->didt_wreg = &amdgpu_invalid_wreg; 3446 adev->gc_cac_rreg = &amdgpu_invalid_rreg; 3447 adev->gc_cac_wreg = &amdgpu_invalid_wreg; 3448 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg; 3449 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg; 3450 3451 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", 3452 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, 3453 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); 3454 3455 /* mutex initialization are all done here so we 3456 * can recall function without having locking issues */ 3457 mutex_init(&adev->firmware.mutex); 3458 mutex_init(&adev->pm.mutex); 3459 mutex_init(&adev->gfx.gpu_clock_mutex); 3460 mutex_init(&adev->srbm_mutex); 3461 mutex_init(&adev->gfx.pipe_reserve_mutex); 3462 mutex_init(&adev->gfx.gfx_off_mutex); 3463 mutex_init(&adev->grbm_idx_mutex); 3464 mutex_init(&adev->mn_lock); 3465 mutex_init(&adev->virt.vf_errors.lock); 3466 hash_init(adev->mn_hash); 3467 atomic_set(&adev->in_gpu_reset, 0); 3468 init_rwsem(&adev->reset_sem); 3469 mutex_init(&adev->psp.mutex); 3470 mutex_init(&adev->notifier_lock); 3471 3472 r = amdgpu_device_init_apu_flags(adev); 3473 if (r) 3474 return r; 3475 3476 r = amdgpu_device_check_arguments(adev); 3477 if (r) 3478 return r; 3479 3480 spin_lock_init(&adev->mmio_idx_lock); 3481 spin_lock_init(&adev->smc_idx_lock); 3482 spin_lock_init(&adev->pcie_idx_lock); 3483 spin_lock_init(&adev->uvd_ctx_idx_lock); 3484 spin_lock_init(&adev->didt_idx_lock); 3485 spin_lock_init(&adev->gc_cac_idx_lock); 3486 spin_lock_init(&adev->se_cac_idx_lock); 3487 spin_lock_init(&adev->audio_endpt_idx_lock); 3488 spin_lock_init(&adev->mm_stats.lock); 3489 3490 INIT_LIST_HEAD(&adev->shadow_list); 3491 mutex_init(&adev->shadow_list_lock); 3492 3493 INIT_LIST_HEAD(&adev->reset_list); 3494 3495 INIT_DELAYED_WORK(&adev->delayed_init_work, 3496 amdgpu_device_delayed_init_work_handler); 3497 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, 3498 amdgpu_device_delay_enable_gfx_off); 3499 3500 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); 3501 3502 adev->gfx.gfx_off_req_count = 1; 3503 adev->pm.ac_power = power_supply_is_system_supplied() > 0; 3504 3505 atomic_set(&adev->throttling_logging_enabled, 1); 3506 /* 3507 * If throttling continues, logging will be performed every minute 3508 * to avoid log flooding. "-1" is subtracted since the thermal 3509 * throttling interrupt comes every second. Thus, the total logging 3510 * interval is 59 seconds(retelimited printk interval) + 1(waiting 3511 * for throttling interrupt) = 60 seconds. 3512 */ 3513 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1); 3514 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE); 3515 3516 /* Registers mapping */ 3517 /* TODO: block userspace mapping of io register */ 3518 if (adev->asic_type >= CHIP_BONAIRE) { 3519 adev->rmmio_base = pci_resource_start(adev->pdev, 5); 3520 adev->rmmio_size = pci_resource_len(adev->pdev, 5); 3521 } else { 3522 adev->rmmio_base = pci_resource_start(adev->pdev, 2); 3523 adev->rmmio_size = pci_resource_len(adev->pdev, 2); 3524 } 3525 3526 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size); 3527 if (adev->rmmio == NULL) { 3528 return -ENOMEM; 3529 } 3530 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); 3531 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); 3532 3533 /* enable PCIE atomic ops */ 3534 r = pci_enable_atomic_ops_to_root(adev->pdev, 3535 PCI_EXP_DEVCAP2_ATOMIC_COMP32 | 3536 PCI_EXP_DEVCAP2_ATOMIC_COMP64); 3537 if (r) { 3538 adev->have_atomics_support = false; 3539 DRM_INFO("PCIE atomic ops is not supported\n"); 3540 } else { 3541 adev->have_atomics_support = true; 3542 } 3543 3544 amdgpu_device_get_pcie_info(adev); 3545 3546 if (amdgpu_mcbp) 3547 DRM_INFO("MCBP is enabled\n"); 3548 3549 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10) 3550 adev->enable_mes = true; 3551 3552 /* detect hw virtualization here */ 3553 amdgpu_detect_virtualization(adev); 3554 3555 r = amdgpu_device_get_job_timeout_settings(adev); 3556 if (r) { 3557 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); 3558 return r; 3559 } 3560 3561 /* early init functions */ 3562 r = amdgpu_device_ip_early_init(adev); 3563 if (r) 3564 return r; 3565 3566 /* doorbell bar mapping and doorbell index init*/ 3567 amdgpu_device_doorbell_init(adev); 3568 3569 if (amdgpu_emu_mode == 1) { 3570 /* post the asic on emulation mode */ 3571 emu_soc_asic_init(adev); 3572 goto fence_driver_init; 3573 } 3574 3575 amdgpu_reset_init(adev); 3576 3577 /* detect if we are with an SRIOV vbios */ 3578 amdgpu_device_detect_sriov_bios(adev); 3579 3580 /* check if we need to reset the asic 3581 * E.g., driver was not cleanly unloaded previously, etc. 3582 */ 3583 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) { 3584 if (adev->gmc.xgmi.num_physical_nodes) { 3585 dev_info(adev->dev, "Pending hive reset.\n"); 3586 adev->gmc.xgmi.pending_reset = true; 3587 /* Only need to init necessary block for SMU to handle the reset */ 3588 for (i = 0; i < adev->num_ip_blocks; i++) { 3589 if (!adev->ip_blocks[i].status.valid) 3590 continue; 3591 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 3592 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 3593 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || 3594 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) { 3595 DRM_DEBUG("IP %s disabled for hw_init.\n", 3596 adev->ip_blocks[i].version->funcs->name); 3597 adev->ip_blocks[i].status.hw = true; 3598 } 3599 } 3600 } else { 3601 r = amdgpu_asic_reset(adev); 3602 if (r) { 3603 dev_err(adev->dev, "asic reset on init failed\n"); 3604 goto failed; 3605 } 3606 } 3607 } 3608 3609 pci_enable_pcie_error_reporting(adev->pdev); 3610 3611 /* Post card if necessary */ 3612 if (amdgpu_device_need_post(adev)) { 3613 if (!adev->bios) { 3614 dev_err(adev->dev, "no vBIOS found\n"); 3615 r = -EINVAL; 3616 goto failed; 3617 } 3618 DRM_INFO("GPU posting now...\n"); 3619 r = amdgpu_device_asic_init(adev); 3620 if (r) { 3621 dev_err(adev->dev, "gpu post error!\n"); 3622 goto failed; 3623 } 3624 } 3625 3626 if (adev->is_atom_fw) { 3627 /* Initialize clocks */ 3628 r = amdgpu_atomfirmware_get_clock_info(adev); 3629 if (r) { 3630 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); 3631 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 3632 goto failed; 3633 } 3634 } else { 3635 /* Initialize clocks */ 3636 r = amdgpu_atombios_get_clock_info(adev); 3637 if (r) { 3638 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); 3639 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 3640 goto failed; 3641 } 3642 /* init i2c buses */ 3643 if (!amdgpu_device_has_dc_support(adev)) 3644 amdgpu_atombios_i2c_init(adev); 3645 } 3646 3647 fence_driver_init: 3648 /* Fence driver */ 3649 r = amdgpu_fence_driver_init(adev); 3650 if (r) { 3651 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); 3652 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0); 3653 goto failed; 3654 } 3655 3656 /* init the mode config */ 3657 drm_mode_config_init(adev_to_drm(adev)); 3658 3659 r = amdgpu_device_ip_init(adev); 3660 if (r) { 3661 /* failed in exclusive mode due to timeout */ 3662 if (amdgpu_sriov_vf(adev) && 3663 !amdgpu_sriov_runtime(adev) && 3664 amdgpu_virt_mmio_blocked(adev) && 3665 !amdgpu_virt_wait_reset(adev)) { 3666 dev_err(adev->dev, "VF exclusive mode timeout\n"); 3667 /* Don't send request since VF is inactive. */ 3668 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; 3669 adev->virt.ops = NULL; 3670 r = -EAGAIN; 3671 goto release_ras_con; 3672 } 3673 dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); 3674 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); 3675 goto release_ras_con; 3676 } 3677 3678 dev_info(adev->dev, 3679 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", 3680 adev->gfx.config.max_shader_engines, 3681 adev->gfx.config.max_sh_per_se, 3682 adev->gfx.config.max_cu_per_sh, 3683 adev->gfx.cu_info.number); 3684 3685 adev->accel_working = true; 3686 3687 amdgpu_vm_check_compute_bug(adev); 3688 3689 /* Initialize the buffer migration limit. */ 3690 if (amdgpu_moverate >= 0) 3691 max_MBps = amdgpu_moverate; 3692 else 3693 max_MBps = 8; /* Allow 8 MB/s. */ 3694 /* Get a log2 for easy divisions. */ 3695 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); 3696 3697 amdgpu_fbdev_init(adev); 3698 3699 r = amdgpu_pm_sysfs_init(adev); 3700 if (r) { 3701 adev->pm_sysfs_en = false; 3702 DRM_ERROR("registering pm debugfs failed (%d).\n", r); 3703 } else 3704 adev->pm_sysfs_en = true; 3705 3706 r = amdgpu_ucode_sysfs_init(adev); 3707 if (r) { 3708 adev->ucode_sysfs_en = false; 3709 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); 3710 } else 3711 adev->ucode_sysfs_en = true; 3712 3713 if ((amdgpu_testing & 1)) { 3714 if (adev->accel_working) 3715 amdgpu_test_moves(adev); 3716 else 3717 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n"); 3718 } 3719 if (amdgpu_benchmarking) { 3720 if (adev->accel_working) 3721 amdgpu_benchmark(adev, amdgpu_benchmarking); 3722 else 3723 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n"); 3724 } 3725 3726 /* 3727 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost. 3728 * Otherwise the mgpu fan boost feature will be skipped due to the 3729 * gpu instance is counted less. 3730 */ 3731 amdgpu_register_gpu_instance(adev); 3732 3733 /* enable clockgating, etc. after ib tests, etc. since some blocks require 3734 * explicit gating rather than handling it automatically. 3735 */ 3736 if (!adev->gmc.xgmi.pending_reset) { 3737 r = amdgpu_device_ip_late_init(adev); 3738 if (r) { 3739 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); 3740 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); 3741 goto release_ras_con; 3742 } 3743 /* must succeed. */ 3744 amdgpu_ras_resume(adev); 3745 queue_delayed_work(system_wq, &adev->delayed_init_work, 3746 msecs_to_jiffies(AMDGPU_RESUME_MS)); 3747 } 3748 3749 if (amdgpu_sriov_vf(adev)) 3750 flush_delayed_work(&adev->delayed_init_work); 3751 3752 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); 3753 if (r) 3754 dev_err(adev->dev, "Could not create amdgpu device attr\n"); 3755 3756 if (IS_ENABLED(CONFIG_PERF_EVENTS)) 3757 r = amdgpu_pmu_init(adev); 3758 if (r) 3759 dev_err(adev->dev, "amdgpu_pmu_init failed\n"); 3760 3761 /* Have stored pci confspace at hand for restore in sudden PCI error */ 3762 if (amdgpu_device_cache_pci_state(adev->pdev)) 3763 pci_restore_state(pdev); 3764 3765 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ 3766 /* this will fail for cards that aren't VGA class devices, just 3767 * ignore it */ 3768 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) 3769 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); 3770 3771 if (amdgpu_device_supports_px(ddev)) { 3772 px = true; 3773 vga_switcheroo_register_client(adev->pdev, 3774 &amdgpu_switcheroo_ops, px); 3775 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); 3776 } 3777 3778 if (adev->gmc.xgmi.pending_reset) 3779 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work, 3780 msecs_to_jiffies(AMDGPU_RESUME_MS)); 3781 3782 return 0; 3783 3784 release_ras_con: 3785 amdgpu_release_ras_context(adev); 3786 3787 failed: 3788 amdgpu_vf_error_trans_all(adev); 3789 3790 return r; 3791 } 3792 3793 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) 3794 { 3795 /* Clear all CPU mappings pointing to this device */ 3796 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1); 3797 3798 /* Unmap all mapped bars - Doorbell, registers and VRAM */ 3799 amdgpu_device_doorbell_fini(adev); 3800 3801 iounmap(adev->rmmio); 3802 adev->rmmio = NULL; 3803 if (adev->mman.aper_base_kaddr) 3804 iounmap(adev->mman.aper_base_kaddr); 3805 adev->mman.aper_base_kaddr = NULL; 3806 3807 /* Memory manager related */ 3808 if (!adev->gmc.xgmi.connected_to_cpu) { 3809 arch_phys_wc_del(adev->gmc.vram_mtrr); 3810 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); 3811 } 3812 } 3813 3814 /** 3815 * amdgpu_device_fini - tear down the driver 3816 * 3817 * @adev: amdgpu_device pointer 3818 * 3819 * Tear down the driver info (all asics). 3820 * Called at driver shutdown. 3821 */ 3822 void amdgpu_device_fini_hw(struct amdgpu_device *adev) 3823 { 3824 dev_info(adev->dev, "amdgpu: finishing device.\n"); 3825 flush_delayed_work(&adev->delayed_init_work); 3826 ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); 3827 adev->shutdown = true; 3828 3829 /* make sure IB test finished before entering exclusive mode 3830 * to avoid preemption on IB test 3831 * */ 3832 if (amdgpu_sriov_vf(adev)) { 3833 amdgpu_virt_request_full_gpu(adev, false); 3834 amdgpu_virt_fini_data_exchange(adev); 3835 } 3836 3837 /* disable all interrupts */ 3838 amdgpu_irq_disable_all(adev); 3839 if (adev->mode_info.mode_config_initialized){ 3840 if (!amdgpu_device_has_dc_support(adev)) 3841 drm_helper_force_disable_all(adev_to_drm(adev)); 3842 else 3843 drm_atomic_helper_shutdown(adev_to_drm(adev)); 3844 } 3845 amdgpu_fence_driver_fini_hw(adev); 3846 3847 if (adev->pm_sysfs_en) 3848 amdgpu_pm_sysfs_fini(adev); 3849 if (adev->ucode_sysfs_en) 3850 amdgpu_ucode_sysfs_fini(adev); 3851 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); 3852 3853 amdgpu_fbdev_fini(adev); 3854 3855 amdgpu_irq_fini_hw(adev); 3856 3857 amdgpu_device_ip_fini_early(adev); 3858 3859 amdgpu_gart_dummy_page_fini(adev); 3860 3861 amdgpu_device_unmap_mmio(adev); 3862 } 3863 3864 void amdgpu_device_fini_sw(struct amdgpu_device *adev) 3865 { 3866 amdgpu_device_ip_fini(adev); 3867 amdgpu_fence_driver_fini_sw(adev); 3868 release_firmware(adev->firmware.gpu_info_fw); 3869 adev->firmware.gpu_info_fw = NULL; 3870 adev->accel_working = false; 3871 3872 amdgpu_reset_fini(adev); 3873 3874 /* free i2c buses */ 3875 if (!amdgpu_device_has_dc_support(adev)) 3876 amdgpu_i2c_fini(adev); 3877 3878 if (amdgpu_emu_mode != 1) 3879 amdgpu_atombios_fini(adev); 3880 3881 kfree(adev->bios); 3882 adev->bios = NULL; 3883 if (amdgpu_device_supports_px(adev_to_drm(adev))) { 3884 vga_switcheroo_unregister_client(adev->pdev); 3885 vga_switcheroo_fini_domain_pm_ops(adev->dev); 3886 } 3887 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) 3888 vga_client_register(adev->pdev, NULL, NULL, NULL); 3889 3890 if (IS_ENABLED(CONFIG_PERF_EVENTS)) 3891 amdgpu_pmu_fini(adev); 3892 if (adev->mman.discovery_bin) 3893 amdgpu_discovery_fini(adev); 3894 3895 kfree(adev->pci_state); 3896 3897 } 3898 3899 3900 /* 3901 * Suspend & resume. 3902 */ 3903 /** 3904 * amdgpu_device_suspend - initiate device suspend 3905 * 3906 * @dev: drm dev pointer 3907 * @fbcon : notify the fbdev of suspend 3908 * 3909 * Puts the hw in the suspend state (all asics). 3910 * Returns 0 for success or an error on failure. 3911 * Called at driver suspend. 3912 */ 3913 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) 3914 { 3915 struct amdgpu_device *adev = drm_to_adev(dev); 3916 3917 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 3918 return 0; 3919 3920 adev->in_suspend = true; 3921 3922 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3)) 3923 DRM_WARN("smart shift update failed\n"); 3924 3925 drm_kms_helper_poll_disable(dev); 3926 3927 if (fbcon) 3928 amdgpu_fbdev_set_suspend(adev, 1); 3929 3930 cancel_delayed_work_sync(&adev->delayed_init_work); 3931 3932 amdgpu_ras_suspend(adev); 3933 3934 amdgpu_device_ip_suspend_phase1(adev); 3935 3936 if (!adev->in_s0ix) 3937 amdgpu_amdkfd_suspend(adev, adev->in_runpm); 3938 3939 /* evict vram memory */ 3940 amdgpu_bo_evict_vram(adev); 3941 3942 amdgpu_fence_driver_suspend(adev); 3943 3944 amdgpu_device_ip_suspend_phase2(adev); 3945 /* evict remaining vram memory 3946 * This second call to evict vram is to evict the gart page table 3947 * using the CPU. 3948 */ 3949 amdgpu_bo_evict_vram(adev); 3950 3951 return 0; 3952 } 3953 3954 /** 3955 * amdgpu_device_resume - initiate device resume 3956 * 3957 * @dev: drm dev pointer 3958 * @fbcon : notify the fbdev of resume 3959 * 3960 * Bring the hw back to operating state (all asics). 3961 * Returns 0 for success or an error on failure. 3962 * Called at driver resume. 3963 */ 3964 int amdgpu_device_resume(struct drm_device *dev, bool fbcon) 3965 { 3966 struct amdgpu_device *adev = drm_to_adev(dev); 3967 int r = 0; 3968 3969 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 3970 return 0; 3971 3972 if (adev->in_s0ix) 3973 amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry); 3974 3975 /* post card */ 3976 if (amdgpu_device_need_post(adev)) { 3977 r = amdgpu_device_asic_init(adev); 3978 if (r) 3979 dev_err(adev->dev, "amdgpu asic init failed\n"); 3980 } 3981 3982 r = amdgpu_device_ip_resume(adev); 3983 if (r) { 3984 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r); 3985 return r; 3986 } 3987 amdgpu_fence_driver_resume(adev); 3988 3989 3990 r = amdgpu_device_ip_late_init(adev); 3991 if (r) 3992 return r; 3993 3994 queue_delayed_work(system_wq, &adev->delayed_init_work, 3995 msecs_to_jiffies(AMDGPU_RESUME_MS)); 3996 3997 if (!adev->in_s0ix) { 3998 r = amdgpu_amdkfd_resume(adev, adev->in_runpm); 3999 if (r) 4000 return r; 4001 } 4002 4003 /* Make sure IB tests flushed */ 4004 flush_delayed_work(&adev->delayed_init_work); 4005 4006 if (fbcon) 4007 amdgpu_fbdev_set_suspend(adev, 0); 4008 4009 drm_kms_helper_poll_enable(dev); 4010 4011 amdgpu_ras_resume(adev); 4012 4013 /* 4014 * Most of the connector probing functions try to acquire runtime pm 4015 * refs to ensure that the GPU is powered on when connector polling is 4016 * performed. Since we're calling this from a runtime PM callback, 4017 * trying to acquire rpm refs will cause us to deadlock. 4018 * 4019 * Since we're guaranteed to be holding the rpm lock, it's safe to 4020 * temporarily disable the rpm helpers so this doesn't deadlock us. 4021 */ 4022 #ifdef CONFIG_PM 4023 dev->dev->power.disable_depth++; 4024 #endif 4025 if (!amdgpu_device_has_dc_support(adev)) 4026 drm_helper_hpd_irq_event(dev); 4027 else 4028 drm_kms_helper_hotplug_event(dev); 4029 #ifdef CONFIG_PM 4030 dev->dev->power.disable_depth--; 4031 #endif 4032 adev->in_suspend = false; 4033 4034 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0)) 4035 DRM_WARN("smart shift update failed\n"); 4036 4037 return 0; 4038 } 4039 4040 /** 4041 * amdgpu_device_ip_check_soft_reset - did soft reset succeed 4042 * 4043 * @adev: amdgpu_device pointer 4044 * 4045 * The list of all the hardware IPs that make up the asic is walked and 4046 * the check_soft_reset callbacks are run. check_soft_reset determines 4047 * if the asic is still hung or not. 4048 * Returns true if any of the IPs are still in a hung state, false if not. 4049 */ 4050 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) 4051 { 4052 int i; 4053 bool asic_hang = false; 4054 4055 if (amdgpu_sriov_vf(adev)) 4056 return true; 4057 4058 if (amdgpu_asic_need_full_reset(adev)) 4059 return true; 4060 4061 for (i = 0; i < adev->num_ip_blocks; i++) { 4062 if (!adev->ip_blocks[i].status.valid) 4063 continue; 4064 if (adev->ip_blocks[i].version->funcs->check_soft_reset) 4065 adev->ip_blocks[i].status.hang = 4066 adev->ip_blocks[i].version->funcs->check_soft_reset(adev); 4067 if (adev->ip_blocks[i].status.hang) { 4068 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name); 4069 asic_hang = true; 4070 } 4071 } 4072 return asic_hang; 4073 } 4074 4075 /** 4076 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset 4077 * 4078 * @adev: amdgpu_device pointer 4079 * 4080 * The list of all the hardware IPs that make up the asic is walked and the 4081 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset 4082 * handles any IP specific hardware or software state changes that are 4083 * necessary for a soft reset to succeed. 4084 * Returns 0 on success, negative error code on failure. 4085 */ 4086 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) 4087 { 4088 int i, r = 0; 4089 4090 for (i = 0; i < adev->num_ip_blocks; i++) { 4091 if (!adev->ip_blocks[i].status.valid) 4092 continue; 4093 if (adev->ip_blocks[i].status.hang && 4094 adev->ip_blocks[i].version->funcs->pre_soft_reset) { 4095 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev); 4096 if (r) 4097 return r; 4098 } 4099 } 4100 4101 return 0; 4102 } 4103 4104 /** 4105 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed 4106 * 4107 * @adev: amdgpu_device pointer 4108 * 4109 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu 4110 * reset is necessary to recover. 4111 * Returns true if a full asic reset is required, false if not. 4112 */ 4113 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) 4114 { 4115 int i; 4116 4117 if (amdgpu_asic_need_full_reset(adev)) 4118 return true; 4119 4120 for (i = 0; i < adev->num_ip_blocks; i++) { 4121 if (!adev->ip_blocks[i].status.valid) 4122 continue; 4123 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) || 4124 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) || 4125 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) || 4126 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) || 4127 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 4128 if (adev->ip_blocks[i].status.hang) { 4129 dev_info(adev->dev, "Some block need full reset!\n"); 4130 return true; 4131 } 4132 } 4133 } 4134 return false; 4135 } 4136 4137 /** 4138 * amdgpu_device_ip_soft_reset - do a soft reset 4139 * 4140 * @adev: amdgpu_device pointer 4141 * 4142 * The list of all the hardware IPs that make up the asic is walked and the 4143 * soft_reset callbacks are run if the block is hung. soft_reset handles any 4144 * IP specific hardware or software state changes that are necessary to soft 4145 * reset the IP. 4146 * Returns 0 on success, negative error code on failure. 4147 */ 4148 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) 4149 { 4150 int i, r = 0; 4151 4152 for (i = 0; i < adev->num_ip_blocks; i++) { 4153 if (!adev->ip_blocks[i].status.valid) 4154 continue; 4155 if (adev->ip_blocks[i].status.hang && 4156 adev->ip_blocks[i].version->funcs->soft_reset) { 4157 r = adev->ip_blocks[i].version->funcs->soft_reset(adev); 4158 if (r) 4159 return r; 4160 } 4161 } 4162 4163 return 0; 4164 } 4165 4166 /** 4167 * amdgpu_device_ip_post_soft_reset - clean up from soft reset 4168 * 4169 * @adev: amdgpu_device pointer 4170 * 4171 * The list of all the hardware IPs that make up the asic is walked and the 4172 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset 4173 * handles any IP specific hardware or software state changes that are 4174 * necessary after the IP has been soft reset. 4175 * Returns 0 on success, negative error code on failure. 4176 */ 4177 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) 4178 { 4179 int i, r = 0; 4180 4181 for (i = 0; i < adev->num_ip_blocks; i++) { 4182 if (!adev->ip_blocks[i].status.valid) 4183 continue; 4184 if (adev->ip_blocks[i].status.hang && 4185 adev->ip_blocks[i].version->funcs->post_soft_reset) 4186 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev); 4187 if (r) 4188 return r; 4189 } 4190 4191 return 0; 4192 } 4193 4194 /** 4195 * amdgpu_device_recover_vram - Recover some VRAM contents 4196 * 4197 * @adev: amdgpu_device pointer 4198 * 4199 * Restores the contents of VRAM buffers from the shadows in GTT. Used to 4200 * restore things like GPUVM page tables after a GPU reset where 4201 * the contents of VRAM might be lost. 4202 * 4203 * Returns: 4204 * 0 on success, negative error code on failure. 4205 */ 4206 static int amdgpu_device_recover_vram(struct amdgpu_device *adev) 4207 { 4208 struct dma_fence *fence = NULL, *next = NULL; 4209 struct amdgpu_bo *shadow; 4210 struct amdgpu_bo_vm *vmbo; 4211 long r = 1, tmo; 4212 4213 if (amdgpu_sriov_runtime(adev)) 4214 tmo = msecs_to_jiffies(8000); 4215 else 4216 tmo = msecs_to_jiffies(100); 4217 4218 dev_info(adev->dev, "recover vram bo from shadow start\n"); 4219 mutex_lock(&adev->shadow_list_lock); 4220 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) { 4221 shadow = &vmbo->bo; 4222 /* No need to recover an evicted BO */ 4223 if (shadow->tbo.resource->mem_type != TTM_PL_TT || 4224 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET || 4225 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM) 4226 continue; 4227 4228 r = amdgpu_bo_restore_shadow(shadow, &next); 4229 if (r) 4230 break; 4231 4232 if (fence) { 4233 tmo = dma_fence_wait_timeout(fence, false, tmo); 4234 dma_fence_put(fence); 4235 fence = next; 4236 if (tmo == 0) { 4237 r = -ETIMEDOUT; 4238 break; 4239 } else if (tmo < 0) { 4240 r = tmo; 4241 break; 4242 } 4243 } else { 4244 fence = next; 4245 } 4246 } 4247 mutex_unlock(&adev->shadow_list_lock); 4248 4249 if (fence) 4250 tmo = dma_fence_wait_timeout(fence, false, tmo); 4251 dma_fence_put(fence); 4252 4253 if (r < 0 || tmo <= 0) { 4254 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo); 4255 return -EIO; 4256 } 4257 4258 dev_info(adev->dev, "recover vram bo from shadow done\n"); 4259 return 0; 4260 } 4261 4262 4263 /** 4264 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf 4265 * 4266 * @adev: amdgpu_device pointer 4267 * @from_hypervisor: request from hypervisor 4268 * 4269 * do VF FLR and reinitialize Asic 4270 * return 0 means succeeded otherwise failed 4271 */ 4272 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, 4273 bool from_hypervisor) 4274 { 4275 int r; 4276 4277 if (from_hypervisor) 4278 r = amdgpu_virt_request_full_gpu(adev, true); 4279 else 4280 r = amdgpu_virt_reset_gpu(adev); 4281 if (r) 4282 return r; 4283 4284 amdgpu_amdkfd_pre_reset(adev); 4285 4286 /* Resume IP prior to SMC */ 4287 r = amdgpu_device_ip_reinit_early_sriov(adev); 4288 if (r) 4289 goto error; 4290 4291 amdgpu_virt_init_data_exchange(adev); 4292 /* we need recover gart prior to run SMC/CP/SDMA resume */ 4293 amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT)); 4294 4295 r = amdgpu_device_fw_loading(adev); 4296 if (r) 4297 return r; 4298 4299 /* now we are okay to resume SMC/CP/SDMA */ 4300 r = amdgpu_device_ip_reinit_late_sriov(adev); 4301 if (r) 4302 goto error; 4303 4304 amdgpu_irq_gpu_reset_resume_helper(adev); 4305 r = amdgpu_ib_ring_tests(adev); 4306 amdgpu_amdkfd_post_reset(adev); 4307 4308 error: 4309 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { 4310 amdgpu_inc_vram_lost(adev); 4311 r = amdgpu_device_recover_vram(adev); 4312 } 4313 amdgpu_virt_release_full_gpu(adev, true); 4314 4315 return r; 4316 } 4317 4318 /** 4319 * amdgpu_device_has_job_running - check if there is any job in mirror list 4320 * 4321 * @adev: amdgpu_device pointer 4322 * 4323 * check if there is any job in mirror list 4324 */ 4325 bool amdgpu_device_has_job_running(struct amdgpu_device *adev) 4326 { 4327 int i; 4328 struct drm_sched_job *job; 4329 4330 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 4331 struct amdgpu_ring *ring = adev->rings[i]; 4332 4333 if (!ring || !ring->sched.thread) 4334 continue; 4335 4336 spin_lock(&ring->sched.job_list_lock); 4337 job = list_first_entry_or_null(&ring->sched.pending_list, 4338 struct drm_sched_job, list); 4339 spin_unlock(&ring->sched.job_list_lock); 4340 if (job) 4341 return true; 4342 } 4343 return false; 4344 } 4345 4346 /** 4347 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery 4348 * 4349 * @adev: amdgpu_device pointer 4350 * 4351 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover 4352 * a hung GPU. 4353 */ 4354 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) 4355 { 4356 if (!amdgpu_device_ip_check_soft_reset(adev)) { 4357 dev_info(adev->dev, "Timeout, but no hardware hang detected.\n"); 4358 return false; 4359 } 4360 4361 if (amdgpu_gpu_recovery == 0) 4362 goto disabled; 4363 4364 if (amdgpu_sriov_vf(adev)) 4365 return true; 4366 4367 if (amdgpu_gpu_recovery == -1) { 4368 switch (adev->asic_type) { 4369 case CHIP_BONAIRE: 4370 case CHIP_HAWAII: 4371 case CHIP_TOPAZ: 4372 case CHIP_TONGA: 4373 case CHIP_FIJI: 4374 case CHIP_POLARIS10: 4375 case CHIP_POLARIS11: 4376 case CHIP_POLARIS12: 4377 case CHIP_VEGAM: 4378 case CHIP_VEGA20: 4379 case CHIP_VEGA10: 4380 case CHIP_VEGA12: 4381 case CHIP_RAVEN: 4382 case CHIP_ARCTURUS: 4383 case CHIP_RENOIR: 4384 case CHIP_NAVI10: 4385 case CHIP_NAVI14: 4386 case CHIP_NAVI12: 4387 case CHIP_SIENNA_CICHLID: 4388 case CHIP_NAVY_FLOUNDER: 4389 case CHIP_DIMGREY_CAVEFISH: 4390 case CHIP_BEIGE_GOBY: 4391 case CHIP_VANGOGH: 4392 case CHIP_ALDEBARAN: 4393 break; 4394 default: 4395 goto disabled; 4396 } 4397 } 4398 4399 return true; 4400 4401 disabled: 4402 dev_info(adev->dev, "GPU recovery disabled.\n"); 4403 return false; 4404 } 4405 4406 int amdgpu_device_mode1_reset(struct amdgpu_device *adev) 4407 { 4408 u32 i; 4409 int ret = 0; 4410 4411 amdgpu_atombios_scratch_regs_engine_hung(adev, true); 4412 4413 dev_info(adev->dev, "GPU mode1 reset\n"); 4414 4415 /* disable BM */ 4416 pci_clear_master(adev->pdev); 4417 4418 amdgpu_device_cache_pci_state(adev->pdev); 4419 4420 if (amdgpu_dpm_is_mode1_reset_supported(adev)) { 4421 dev_info(adev->dev, "GPU smu mode1 reset\n"); 4422 ret = amdgpu_dpm_mode1_reset(adev); 4423 } else { 4424 dev_info(adev->dev, "GPU psp mode1 reset\n"); 4425 ret = psp_gpu_reset(adev); 4426 } 4427 4428 if (ret) 4429 dev_err(adev->dev, "GPU mode1 reset failed\n"); 4430 4431 amdgpu_device_load_pci_state(adev->pdev); 4432 4433 /* wait for asic to come out of reset */ 4434 for (i = 0; i < adev->usec_timeout; i++) { 4435 u32 memsize = adev->nbio.funcs->get_memsize(adev); 4436 4437 if (memsize != 0xffffffff) 4438 break; 4439 udelay(1); 4440 } 4441 4442 amdgpu_atombios_scratch_regs_engine_hung(adev, false); 4443 return ret; 4444 } 4445 4446 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, 4447 struct amdgpu_reset_context *reset_context) 4448 { 4449 int i, r = 0; 4450 struct amdgpu_job *job = NULL; 4451 bool need_full_reset = 4452 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 4453 4454 if (reset_context->reset_req_dev == adev) 4455 job = reset_context->job; 4456 4457 /* no need to dump if device is not in good state during probe period */ 4458 if (!adev->gmc.xgmi.pending_reset) 4459 amdgpu_debugfs_wait_dump(adev); 4460 4461 if (amdgpu_sriov_vf(adev)) { 4462 /* stop the data exchange thread */ 4463 amdgpu_virt_fini_data_exchange(adev); 4464 } 4465 4466 /* block all schedulers and reset given job's ring */ 4467 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 4468 struct amdgpu_ring *ring = adev->rings[i]; 4469 4470 if (!ring || !ring->sched.thread) 4471 continue; 4472 4473 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 4474 amdgpu_fence_driver_force_completion(ring); 4475 } 4476 4477 if (job && job->vm) 4478 drm_sched_increase_karma(&job->base); 4479 4480 r = amdgpu_reset_prepare_hwcontext(adev, reset_context); 4481 /* If reset handler not implemented, continue; otherwise return */ 4482 if (r == -ENOSYS) 4483 r = 0; 4484 else 4485 return r; 4486 4487 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */ 4488 if (!amdgpu_sriov_vf(adev)) { 4489 4490 if (!need_full_reset) 4491 need_full_reset = amdgpu_device_ip_need_full_reset(adev); 4492 4493 if (!need_full_reset) { 4494 amdgpu_device_ip_pre_soft_reset(adev); 4495 r = amdgpu_device_ip_soft_reset(adev); 4496 amdgpu_device_ip_post_soft_reset(adev); 4497 if (r || amdgpu_device_ip_check_soft_reset(adev)) { 4498 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n"); 4499 need_full_reset = true; 4500 } 4501 } 4502 4503 if (need_full_reset) 4504 r = amdgpu_device_ip_suspend(adev); 4505 if (need_full_reset) 4506 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 4507 else 4508 clear_bit(AMDGPU_NEED_FULL_RESET, 4509 &reset_context->flags); 4510 } 4511 4512 return r; 4513 } 4514 4515 int amdgpu_do_asic_reset(struct list_head *device_list_handle, 4516 struct amdgpu_reset_context *reset_context) 4517 { 4518 struct amdgpu_device *tmp_adev = NULL; 4519 bool need_full_reset, skip_hw_reset, vram_lost = false; 4520 int r = 0; 4521 4522 /* Try reset handler method first */ 4523 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, 4524 reset_list); 4525 r = amdgpu_reset_perform_reset(tmp_adev, reset_context); 4526 /* If reset handler not implemented, continue; otherwise return */ 4527 if (r == -ENOSYS) 4528 r = 0; 4529 else 4530 return r; 4531 4532 /* Reset handler not implemented, use the default method */ 4533 need_full_reset = 4534 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 4535 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags); 4536 4537 /* 4538 * ASIC reset has to be done on all XGMI hive nodes ASAP 4539 * to allow proper links negotiation in FW (within 1 sec) 4540 */ 4541 if (!skip_hw_reset && need_full_reset) { 4542 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 4543 /* For XGMI run all resets in parallel to speed up the process */ 4544 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 4545 tmp_adev->gmc.xgmi.pending_reset = false; 4546 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work)) 4547 r = -EALREADY; 4548 } else 4549 r = amdgpu_asic_reset(tmp_adev); 4550 4551 if (r) { 4552 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s", 4553 r, adev_to_drm(tmp_adev)->unique); 4554 break; 4555 } 4556 } 4557 4558 /* For XGMI wait for all resets to complete before proceed */ 4559 if (!r) { 4560 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 4561 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 4562 flush_work(&tmp_adev->xgmi_reset_work); 4563 r = tmp_adev->asic_reset_res; 4564 if (r) 4565 break; 4566 } 4567 } 4568 } 4569 } 4570 4571 if (!r && amdgpu_ras_intr_triggered()) { 4572 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 4573 if (tmp_adev->mmhub.ras_funcs && 4574 tmp_adev->mmhub.ras_funcs->reset_ras_error_count) 4575 tmp_adev->mmhub.ras_funcs->reset_ras_error_count(tmp_adev); 4576 } 4577 4578 amdgpu_ras_intr_cleared(); 4579 } 4580 4581 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 4582 if (need_full_reset) { 4583 /* post card */ 4584 r = amdgpu_device_asic_init(tmp_adev); 4585 if (r) { 4586 dev_warn(tmp_adev->dev, "asic atom init failed!"); 4587 } else { 4588 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); 4589 r = amdgpu_device_ip_resume_phase1(tmp_adev); 4590 if (r) 4591 goto out; 4592 4593 vram_lost = amdgpu_device_check_vram_lost(tmp_adev); 4594 if (vram_lost) { 4595 DRM_INFO("VRAM is lost due to GPU reset!\n"); 4596 amdgpu_inc_vram_lost(tmp_adev); 4597 } 4598 4599 r = amdgpu_gtt_mgr_recover(ttm_manager_type(&tmp_adev->mman.bdev, TTM_PL_TT)); 4600 if (r) 4601 goto out; 4602 4603 r = amdgpu_device_fw_loading(tmp_adev); 4604 if (r) 4605 return r; 4606 4607 r = amdgpu_device_ip_resume_phase2(tmp_adev); 4608 if (r) 4609 goto out; 4610 4611 if (vram_lost) 4612 amdgpu_device_fill_reset_magic(tmp_adev); 4613 4614 /* 4615 * Add this ASIC as tracked as reset was already 4616 * complete successfully. 4617 */ 4618 amdgpu_register_gpu_instance(tmp_adev); 4619 4620 if (!reset_context->hive && 4621 tmp_adev->gmc.xgmi.num_physical_nodes > 1) 4622 amdgpu_xgmi_add_device(tmp_adev); 4623 4624 r = amdgpu_device_ip_late_init(tmp_adev); 4625 if (r) 4626 goto out; 4627 4628 amdgpu_fbdev_set_suspend(tmp_adev, 0); 4629 4630 /* 4631 * The GPU enters bad state once faulty pages 4632 * by ECC has reached the threshold, and ras 4633 * recovery is scheduled next. So add one check 4634 * here to break recovery if it indeed exceeds 4635 * bad page threshold, and remind user to 4636 * retire this GPU or setting one bigger 4637 * bad_page_threshold value to fix this once 4638 * probing driver again. 4639 */ 4640 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) { 4641 /* must succeed. */ 4642 amdgpu_ras_resume(tmp_adev); 4643 } else { 4644 r = -EINVAL; 4645 goto out; 4646 } 4647 4648 /* Update PSP FW topology after reset */ 4649 if (reset_context->hive && 4650 tmp_adev->gmc.xgmi.num_physical_nodes > 1) 4651 r = amdgpu_xgmi_update_topology( 4652 reset_context->hive, tmp_adev); 4653 } 4654 } 4655 4656 out: 4657 if (!r) { 4658 amdgpu_irq_gpu_reset_resume_helper(tmp_adev); 4659 r = amdgpu_ib_ring_tests(tmp_adev); 4660 if (r) { 4661 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r); 4662 need_full_reset = true; 4663 r = -EAGAIN; 4664 goto end; 4665 } 4666 } 4667 4668 if (!r) 4669 r = amdgpu_device_recover_vram(tmp_adev); 4670 else 4671 tmp_adev->asic_reset_res = r; 4672 } 4673 4674 end: 4675 if (need_full_reset) 4676 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 4677 else 4678 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 4679 return r; 4680 } 4681 4682 static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, 4683 struct amdgpu_hive_info *hive) 4684 { 4685 if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0) 4686 return false; 4687 4688 if (hive) { 4689 down_write_nest_lock(&adev->reset_sem, &hive->hive_lock); 4690 } else { 4691 down_write(&adev->reset_sem); 4692 } 4693 4694 switch (amdgpu_asic_reset_method(adev)) { 4695 case AMD_RESET_METHOD_MODE1: 4696 adev->mp1_state = PP_MP1_STATE_SHUTDOWN; 4697 break; 4698 case AMD_RESET_METHOD_MODE2: 4699 adev->mp1_state = PP_MP1_STATE_RESET; 4700 break; 4701 default: 4702 adev->mp1_state = PP_MP1_STATE_NONE; 4703 break; 4704 } 4705 4706 return true; 4707 } 4708 4709 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) 4710 { 4711 amdgpu_vf_error_trans_all(adev); 4712 adev->mp1_state = PP_MP1_STATE_NONE; 4713 atomic_set(&adev->in_gpu_reset, 0); 4714 up_write(&adev->reset_sem); 4715 } 4716 4717 /* 4718 * to lockup a list of amdgpu devices in a hive safely, if not a hive 4719 * with multiple nodes, it will be similar as amdgpu_device_lock_adev. 4720 * 4721 * unlock won't require roll back. 4722 */ 4723 static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive) 4724 { 4725 struct amdgpu_device *tmp_adev = NULL; 4726 4727 if (adev->gmc.xgmi.num_physical_nodes > 1) { 4728 if (!hive) { 4729 dev_err(adev->dev, "Hive is NULL while device has multiple xgmi nodes"); 4730 return -ENODEV; 4731 } 4732 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { 4733 if (!amdgpu_device_lock_adev(tmp_adev, hive)) 4734 goto roll_back; 4735 } 4736 } else if (!amdgpu_device_lock_adev(adev, hive)) 4737 return -EAGAIN; 4738 4739 return 0; 4740 roll_back: 4741 if (!list_is_first(&tmp_adev->gmc.xgmi.head, &hive->device_list)) { 4742 /* 4743 * if the lockup iteration break in the middle of a hive, 4744 * it may means there may has a race issue, 4745 * or a hive device locked up independently. 4746 * we may be in trouble and may not, so will try to roll back 4747 * the lock and give out a warnning. 4748 */ 4749 dev_warn(tmp_adev->dev, "Hive lock iteration broke in the middle. Rolling back to unlock"); 4750 list_for_each_entry_continue_reverse(tmp_adev, &hive->device_list, gmc.xgmi.head) { 4751 amdgpu_device_unlock_adev(tmp_adev); 4752 } 4753 } 4754 return -EAGAIN; 4755 } 4756 4757 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) 4758 { 4759 struct pci_dev *p = NULL; 4760 4761 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), 4762 adev->pdev->bus->number, 1); 4763 if (p) { 4764 pm_runtime_enable(&(p->dev)); 4765 pm_runtime_resume(&(p->dev)); 4766 } 4767 } 4768 4769 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) 4770 { 4771 enum amd_reset_method reset_method; 4772 struct pci_dev *p = NULL; 4773 u64 expires; 4774 4775 /* 4776 * For now, only BACO and mode1 reset are confirmed 4777 * to suffer the audio issue without proper suspended. 4778 */ 4779 reset_method = amdgpu_asic_reset_method(adev); 4780 if ((reset_method != AMD_RESET_METHOD_BACO) && 4781 (reset_method != AMD_RESET_METHOD_MODE1)) 4782 return -EINVAL; 4783 4784 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), 4785 adev->pdev->bus->number, 1); 4786 if (!p) 4787 return -ENODEV; 4788 4789 expires = pm_runtime_autosuspend_expiration(&(p->dev)); 4790 if (!expires) 4791 /* 4792 * If we cannot get the audio device autosuspend delay, 4793 * a fixed 4S interval will be used. Considering 3S is 4794 * the audio controller default autosuspend delay setting. 4795 * 4S used here is guaranteed to cover that. 4796 */ 4797 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL; 4798 4799 while (!pm_runtime_status_suspended(&(p->dev))) { 4800 if (!pm_runtime_suspend(&(p->dev))) 4801 break; 4802 4803 if (expires < ktime_get_mono_fast_ns()) { 4804 dev_warn(adev->dev, "failed to suspend display audio\n"); 4805 /* TODO: abort the succeeding gpu reset? */ 4806 return -ETIMEDOUT; 4807 } 4808 } 4809 4810 pm_runtime_disable(&(p->dev)); 4811 4812 return 0; 4813 } 4814 4815 static void amdgpu_device_recheck_guilty_jobs( 4816 struct amdgpu_device *adev, struct list_head *device_list_handle, 4817 struct amdgpu_reset_context *reset_context) 4818 { 4819 int i, r = 0; 4820 4821 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 4822 struct amdgpu_ring *ring = adev->rings[i]; 4823 int ret = 0; 4824 struct drm_sched_job *s_job; 4825 4826 if (!ring || !ring->sched.thread) 4827 continue; 4828 4829 s_job = list_first_entry_or_null(&ring->sched.pending_list, 4830 struct drm_sched_job, list); 4831 if (s_job == NULL) 4832 continue; 4833 4834 /* clear job's guilty and depend the folowing step to decide the real one */ 4835 drm_sched_reset_karma(s_job); 4836 drm_sched_resubmit_jobs_ext(&ring->sched, 1); 4837 4838 ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout); 4839 if (ret == 0) { /* timeout */ 4840 DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n", 4841 ring->sched.name, s_job->id); 4842 4843 /* set guilty */ 4844 drm_sched_increase_karma(s_job); 4845 retry: 4846 /* do hw reset */ 4847 if (amdgpu_sriov_vf(adev)) { 4848 amdgpu_virt_fini_data_exchange(adev); 4849 r = amdgpu_device_reset_sriov(adev, false); 4850 if (r) 4851 adev->asic_reset_res = r; 4852 } else { 4853 clear_bit(AMDGPU_SKIP_HW_RESET, 4854 &reset_context->flags); 4855 r = amdgpu_do_asic_reset(device_list_handle, 4856 reset_context); 4857 if (r && r == -EAGAIN) 4858 goto retry; 4859 } 4860 4861 /* 4862 * add reset counter so that the following 4863 * resubmitted job could flush vmid 4864 */ 4865 atomic_inc(&adev->gpu_reset_counter); 4866 continue; 4867 } 4868 4869 /* got the hw fence, signal finished fence */ 4870 atomic_dec(ring->sched.score); 4871 dma_fence_get(&s_job->s_fence->finished); 4872 dma_fence_signal(&s_job->s_fence->finished); 4873 dma_fence_put(&s_job->s_fence->finished); 4874 4875 /* remove node from list and free the job */ 4876 spin_lock(&ring->sched.job_list_lock); 4877 list_del_init(&s_job->list); 4878 spin_unlock(&ring->sched.job_list_lock); 4879 ring->sched.ops->free_job(s_job); 4880 } 4881 } 4882 4883 /** 4884 * amdgpu_device_gpu_recover - reset the asic and recover scheduler 4885 * 4886 * @adev: amdgpu_device pointer 4887 * @job: which job trigger hang 4888 * 4889 * Attempt to reset the GPU if it has hung (all asics). 4890 * Attempt to do soft-reset or full-reset and reinitialize Asic 4891 * Returns 0 for success or an error on failure. 4892 */ 4893 4894 int amdgpu_device_gpu_recover(struct amdgpu_device *adev, 4895 struct amdgpu_job *job) 4896 { 4897 struct list_head device_list, *device_list_handle = NULL; 4898 bool job_signaled = false; 4899 struct amdgpu_hive_info *hive = NULL; 4900 struct amdgpu_device *tmp_adev = NULL; 4901 int i, r = 0; 4902 bool need_emergency_restart = false; 4903 bool audio_suspended = false; 4904 int tmp_vram_lost_counter; 4905 struct amdgpu_reset_context reset_context; 4906 4907 memset(&reset_context, 0, sizeof(reset_context)); 4908 4909 /* 4910 * Special case: RAS triggered and full reset isn't supported 4911 */ 4912 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); 4913 4914 /* 4915 * Flush RAM to disk so that after reboot 4916 * the user can read log and see why the system rebooted. 4917 */ 4918 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) { 4919 DRM_WARN("Emergency reboot."); 4920 4921 ksys_sync_helper(); 4922 emergency_restart(); 4923 } 4924 4925 dev_info(adev->dev, "GPU %s begin!\n", 4926 need_emergency_restart ? "jobs stop":"reset"); 4927 4928 /* 4929 * Here we trylock to avoid chain of resets executing from 4930 * either trigger by jobs on different adevs in XGMI hive or jobs on 4931 * different schedulers for same device while this TO handler is running. 4932 * We always reset all schedulers for device and all devices for XGMI 4933 * hive so that should take care of them too. 4934 */ 4935 hive = amdgpu_get_xgmi_hive(adev); 4936 if (hive) { 4937 if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) { 4938 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", 4939 job ? job->base.id : -1, hive->hive_id); 4940 amdgpu_put_xgmi_hive(hive); 4941 if (job && job->vm) 4942 drm_sched_increase_karma(&job->base); 4943 return 0; 4944 } 4945 mutex_lock(&hive->hive_lock); 4946 } 4947 4948 reset_context.method = AMD_RESET_METHOD_NONE; 4949 reset_context.reset_req_dev = adev; 4950 reset_context.job = job; 4951 reset_context.hive = hive; 4952 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); 4953 4954 /* 4955 * lock the device before we try to operate the linked list 4956 * if didn't get the device lock, don't touch the linked list since 4957 * others may iterating it. 4958 */ 4959 r = amdgpu_device_lock_hive_adev(adev, hive); 4960 if (r) { 4961 dev_info(adev->dev, "Bailing on TDR for s_job:%llx, as another already in progress", 4962 job ? job->base.id : -1); 4963 4964 /* even we skipped this reset, still need to set the job to guilty */ 4965 if (job && job->vm) 4966 drm_sched_increase_karma(&job->base); 4967 goto skip_recovery; 4968 } 4969 4970 /* 4971 * Build list of devices to reset. 4972 * In case we are in XGMI hive mode, resort the device list 4973 * to put adev in the 1st position. 4974 */ 4975 INIT_LIST_HEAD(&device_list); 4976 if (adev->gmc.xgmi.num_physical_nodes > 1) { 4977 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) 4978 list_add_tail(&tmp_adev->reset_list, &device_list); 4979 if (!list_is_first(&adev->reset_list, &device_list)) 4980 list_rotate_to_front(&adev->reset_list, &device_list); 4981 device_list_handle = &device_list; 4982 } else { 4983 list_add_tail(&adev->reset_list, &device_list); 4984 device_list_handle = &device_list; 4985 } 4986 4987 /* block all schedulers and reset given job's ring */ 4988 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 4989 /* 4990 * Try to put the audio codec into suspend state 4991 * before gpu reset started. 4992 * 4993 * Due to the power domain of the graphics device 4994 * is shared with AZ power domain. Without this, 4995 * we may change the audio hardware from behind 4996 * the audio driver's back. That will trigger 4997 * some audio codec errors. 4998 */ 4999 if (!amdgpu_device_suspend_display_audio(tmp_adev)) 5000 audio_suspended = true; 5001 5002 amdgpu_ras_set_error_query_ready(tmp_adev, false); 5003 5004 cancel_delayed_work_sync(&tmp_adev->delayed_init_work); 5005 5006 if (!amdgpu_sriov_vf(tmp_adev)) 5007 amdgpu_amdkfd_pre_reset(tmp_adev); 5008 5009 /* 5010 * Mark these ASICs to be reseted as untracked first 5011 * And add them back after reset completed 5012 */ 5013 amdgpu_unregister_gpu_instance(tmp_adev); 5014 5015 amdgpu_fbdev_set_suspend(tmp_adev, 1); 5016 5017 /* disable ras on ALL IPs */ 5018 if (!need_emergency_restart && 5019 amdgpu_device_ip_need_full_reset(tmp_adev)) 5020 amdgpu_ras_suspend(tmp_adev); 5021 5022 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 5023 struct amdgpu_ring *ring = tmp_adev->rings[i]; 5024 5025 if (!ring || !ring->sched.thread) 5026 continue; 5027 5028 drm_sched_stop(&ring->sched, job ? &job->base : NULL); 5029 5030 if (need_emergency_restart) 5031 amdgpu_job_stop_all_jobs_on_sched(&ring->sched); 5032 } 5033 atomic_inc(&tmp_adev->gpu_reset_counter); 5034 } 5035 5036 if (need_emergency_restart) 5037 goto skip_sched_resume; 5038 5039 /* 5040 * Must check guilty signal here since after this point all old 5041 * HW fences are force signaled. 5042 * 5043 * job->base holds a reference to parent fence 5044 */ 5045 if (job && job->base.s_fence->parent && 5046 dma_fence_is_signaled(job->base.s_fence->parent)) { 5047 job_signaled = true; 5048 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); 5049 goto skip_hw_reset; 5050 } 5051 5052 retry: /* Rest of adevs pre asic reset from XGMI hive. */ 5053 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5054 r = amdgpu_device_pre_asic_reset(tmp_adev, &reset_context); 5055 /*TODO Should we stop ?*/ 5056 if (r) { 5057 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ", 5058 r, adev_to_drm(tmp_adev)->unique); 5059 tmp_adev->asic_reset_res = r; 5060 } 5061 } 5062 5063 tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter)); 5064 /* Actual ASIC resets if needed.*/ 5065 /* TODO Implement XGMI hive reset logic for SRIOV */ 5066 if (amdgpu_sriov_vf(adev)) { 5067 r = amdgpu_device_reset_sriov(adev, job ? false : true); 5068 if (r) 5069 adev->asic_reset_res = r; 5070 } else { 5071 r = amdgpu_do_asic_reset(device_list_handle, &reset_context); 5072 if (r && r == -EAGAIN) 5073 goto retry; 5074 } 5075 5076 skip_hw_reset: 5077 5078 /* Post ASIC reset for all devs .*/ 5079 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5080 5081 /* 5082 * Sometimes a later bad compute job can block a good gfx job as gfx 5083 * and compute ring share internal GC HW mutually. We add an additional 5084 * guilty jobs recheck step to find the real guilty job, it synchronously 5085 * submits and pends for the first job being signaled. If it gets timeout, 5086 * we identify it as a real guilty job. 5087 */ 5088 if (amdgpu_gpu_recovery == 2 && 5089 !(tmp_vram_lost_counter < atomic_read(&adev->vram_lost_counter))) 5090 amdgpu_device_recheck_guilty_jobs( 5091 tmp_adev, device_list_handle, &reset_context); 5092 5093 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 5094 struct amdgpu_ring *ring = tmp_adev->rings[i]; 5095 5096 if (!ring || !ring->sched.thread) 5097 continue; 5098 5099 /* No point to resubmit jobs if we didn't HW reset*/ 5100 if (!tmp_adev->asic_reset_res && !job_signaled) 5101 drm_sched_resubmit_jobs(&ring->sched); 5102 5103 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res); 5104 } 5105 5106 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) { 5107 drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); 5108 } 5109 5110 tmp_adev->asic_reset_res = 0; 5111 5112 if (r) { 5113 /* bad news, how to tell it to userspace ? */ 5114 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter)); 5115 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); 5116 } else { 5117 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); 5118 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0)) 5119 DRM_WARN("smart shift update failed\n"); 5120 } 5121 } 5122 5123 skip_sched_resume: 5124 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5125 /* unlock kfd: SRIOV would do it separately */ 5126 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev)) 5127 amdgpu_amdkfd_post_reset(tmp_adev); 5128 5129 /* kfd_post_reset will do nothing if kfd device is not initialized, 5130 * need to bring up kfd here if it's not be initialized before 5131 */ 5132 if (!adev->kfd.init_complete) 5133 amdgpu_amdkfd_device_init(adev); 5134 5135 if (audio_suspended) 5136 amdgpu_device_resume_display_audio(tmp_adev); 5137 amdgpu_device_unlock_adev(tmp_adev); 5138 } 5139 5140 skip_recovery: 5141 if (hive) { 5142 atomic_set(&hive->in_reset, 0); 5143 mutex_unlock(&hive->hive_lock); 5144 amdgpu_put_xgmi_hive(hive); 5145 } 5146 5147 if (r && r != -EAGAIN) 5148 dev_info(adev->dev, "GPU reset end with ret = %d\n", r); 5149 return r; 5150 } 5151 5152 /** 5153 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot 5154 * 5155 * @adev: amdgpu_device pointer 5156 * 5157 * Fetchs and stores in the driver the PCIE capabilities (gen speed 5158 * and lanes) of the slot the device is in. Handles APUs and 5159 * virtualized environments where PCIE config space may not be available. 5160 */ 5161 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) 5162 { 5163 struct pci_dev *pdev; 5164 enum pci_bus_speed speed_cap, platform_speed_cap; 5165 enum pcie_link_width platform_link_width; 5166 5167 if (amdgpu_pcie_gen_cap) 5168 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; 5169 5170 if (amdgpu_pcie_lane_cap) 5171 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap; 5172 5173 /* covers APUs as well */ 5174 if (pci_is_root_bus(adev->pdev->bus)) { 5175 if (adev->pm.pcie_gen_mask == 0) 5176 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK; 5177 if (adev->pm.pcie_mlw_mask == 0) 5178 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK; 5179 return; 5180 } 5181 5182 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) 5183 return; 5184 5185 pcie_bandwidth_available(adev->pdev, NULL, 5186 &platform_speed_cap, &platform_link_width); 5187 5188 if (adev->pm.pcie_gen_mask == 0) { 5189 /* asic caps */ 5190 pdev = adev->pdev; 5191 speed_cap = pcie_get_speed_cap(pdev); 5192 if (speed_cap == PCI_SPEED_UNKNOWN) { 5193 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5194 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5195 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 5196 } else { 5197 if (speed_cap == PCIE_SPEED_32_0GT) 5198 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5199 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5200 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | 5201 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 | 5202 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5); 5203 else if (speed_cap == PCIE_SPEED_16_0GT) 5204 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5205 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5206 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | 5207 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4); 5208 else if (speed_cap == PCIE_SPEED_8_0GT) 5209 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5210 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5211 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 5212 else if (speed_cap == PCIE_SPEED_5_0GT) 5213 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5214 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2); 5215 else 5216 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; 5217 } 5218 /* platform caps */ 5219 if (platform_speed_cap == PCI_SPEED_UNKNOWN) { 5220 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5221 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 5222 } else { 5223 if (platform_speed_cap == PCIE_SPEED_32_0GT) 5224 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5225 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5226 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | 5227 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 | 5228 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5); 5229 else if (platform_speed_cap == PCIE_SPEED_16_0GT) 5230 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5231 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5232 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | 5233 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); 5234 else if (platform_speed_cap == PCIE_SPEED_8_0GT) 5235 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5236 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5237 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); 5238 else if (platform_speed_cap == PCIE_SPEED_5_0GT) 5239 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5240 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 5241 else 5242 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1; 5243 5244 } 5245 } 5246 if (adev->pm.pcie_mlw_mask == 0) { 5247 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) { 5248 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; 5249 } else { 5250 switch (platform_link_width) { 5251 case PCIE_LNK_X32: 5252 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | 5253 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 5254 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 5255 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 5256 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5257 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5258 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5259 break; 5260 case PCIE_LNK_X16: 5261 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 5262 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 5263 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 5264 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5265 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5266 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5267 break; 5268 case PCIE_LNK_X12: 5269 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 5270 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 5271 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5272 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5273 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5274 break; 5275 case PCIE_LNK_X8: 5276 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 5277 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5278 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5279 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5280 break; 5281 case PCIE_LNK_X4: 5282 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5283 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5284 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5285 break; 5286 case PCIE_LNK_X2: 5287 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5288 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5289 break; 5290 case PCIE_LNK_X1: 5291 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; 5292 break; 5293 default: 5294 break; 5295 } 5296 } 5297 } 5298 } 5299 5300 int amdgpu_device_baco_enter(struct drm_device *dev) 5301 { 5302 struct amdgpu_device *adev = drm_to_adev(dev); 5303 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 5304 5305 if (!amdgpu_device_supports_baco(adev_to_drm(adev))) 5306 return -ENOTSUPP; 5307 5308 if (ras && adev->ras_enabled && 5309 adev->nbio.funcs->enable_doorbell_interrupt) 5310 adev->nbio.funcs->enable_doorbell_interrupt(adev, false); 5311 5312 return amdgpu_dpm_baco_enter(adev); 5313 } 5314 5315 int amdgpu_device_baco_exit(struct drm_device *dev) 5316 { 5317 struct amdgpu_device *adev = drm_to_adev(dev); 5318 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 5319 int ret = 0; 5320 5321 if (!amdgpu_device_supports_baco(adev_to_drm(adev))) 5322 return -ENOTSUPP; 5323 5324 ret = amdgpu_dpm_baco_exit(adev); 5325 if (ret) 5326 return ret; 5327 5328 if (ras && adev->ras_enabled && 5329 adev->nbio.funcs->enable_doorbell_interrupt) 5330 adev->nbio.funcs->enable_doorbell_interrupt(adev, true); 5331 5332 if (amdgpu_passthrough(adev) && 5333 adev->nbio.funcs->clear_doorbell_interrupt) 5334 adev->nbio.funcs->clear_doorbell_interrupt(adev); 5335 5336 return 0; 5337 } 5338 5339 static void amdgpu_cancel_all_tdr(struct amdgpu_device *adev) 5340 { 5341 int i; 5342 5343 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 5344 struct amdgpu_ring *ring = adev->rings[i]; 5345 5346 if (!ring || !ring->sched.thread) 5347 continue; 5348 5349 cancel_delayed_work_sync(&ring->sched.work_tdr); 5350 } 5351 } 5352 5353 /** 5354 * amdgpu_pci_error_detected - Called when a PCI error is detected. 5355 * @pdev: PCI device struct 5356 * @state: PCI channel state 5357 * 5358 * Description: Called when a PCI error is detected. 5359 * 5360 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT. 5361 */ 5362 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) 5363 { 5364 struct drm_device *dev = pci_get_drvdata(pdev); 5365 struct amdgpu_device *adev = drm_to_adev(dev); 5366 int i; 5367 5368 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state); 5369 5370 if (adev->gmc.xgmi.num_physical_nodes > 1) { 5371 DRM_WARN("No support for XGMI hive yet..."); 5372 return PCI_ERS_RESULT_DISCONNECT; 5373 } 5374 5375 switch (state) { 5376 case pci_channel_io_normal: 5377 return PCI_ERS_RESULT_CAN_RECOVER; 5378 /* Fatal error, prepare for slot reset */ 5379 case pci_channel_io_frozen: 5380 /* 5381 * Cancel and wait for all TDRs in progress if failing to 5382 * set adev->in_gpu_reset in amdgpu_device_lock_adev 5383 * 5384 * Locking adev->reset_sem will prevent any external access 5385 * to GPU during PCI error recovery 5386 */ 5387 while (!amdgpu_device_lock_adev(adev, NULL)) 5388 amdgpu_cancel_all_tdr(adev); 5389 5390 /* 5391 * Block any work scheduling as we do for regular GPU reset 5392 * for the duration of the recovery 5393 */ 5394 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 5395 struct amdgpu_ring *ring = adev->rings[i]; 5396 5397 if (!ring || !ring->sched.thread) 5398 continue; 5399 5400 drm_sched_stop(&ring->sched, NULL); 5401 } 5402 atomic_inc(&adev->gpu_reset_counter); 5403 return PCI_ERS_RESULT_NEED_RESET; 5404 case pci_channel_io_perm_failure: 5405 /* Permanent error, prepare for device removal */ 5406 return PCI_ERS_RESULT_DISCONNECT; 5407 } 5408 5409 return PCI_ERS_RESULT_NEED_RESET; 5410 } 5411 5412 /** 5413 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers 5414 * @pdev: pointer to PCI device 5415 */ 5416 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev) 5417 { 5418 5419 DRM_INFO("PCI error: mmio enabled callback!!\n"); 5420 5421 /* TODO - dump whatever for debugging purposes */ 5422 5423 /* This called only if amdgpu_pci_error_detected returns 5424 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still 5425 * works, no need to reset slot. 5426 */ 5427 5428 return PCI_ERS_RESULT_RECOVERED; 5429 } 5430 5431 /** 5432 * amdgpu_pci_slot_reset - Called when PCI slot has been reset. 5433 * @pdev: PCI device struct 5434 * 5435 * Description: This routine is called by the pci error recovery 5436 * code after the PCI slot has been reset, just before we 5437 * should resume normal operations. 5438 */ 5439 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) 5440 { 5441 struct drm_device *dev = pci_get_drvdata(pdev); 5442 struct amdgpu_device *adev = drm_to_adev(dev); 5443 int r, i; 5444 struct amdgpu_reset_context reset_context; 5445 u32 memsize; 5446 struct list_head device_list; 5447 5448 DRM_INFO("PCI error: slot reset callback!!\n"); 5449 5450 memset(&reset_context, 0, sizeof(reset_context)); 5451 5452 INIT_LIST_HEAD(&device_list); 5453 list_add_tail(&adev->reset_list, &device_list); 5454 5455 /* wait for asic to come out of reset */ 5456 msleep(500); 5457 5458 /* Restore PCI confspace */ 5459 amdgpu_device_load_pci_state(pdev); 5460 5461 /* confirm ASIC came out of reset */ 5462 for (i = 0; i < adev->usec_timeout; i++) { 5463 memsize = amdgpu_asic_get_config_memsize(adev); 5464 5465 if (memsize != 0xffffffff) 5466 break; 5467 udelay(1); 5468 } 5469 if (memsize == 0xffffffff) { 5470 r = -ETIME; 5471 goto out; 5472 } 5473 5474 reset_context.method = AMD_RESET_METHOD_NONE; 5475 reset_context.reset_req_dev = adev; 5476 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); 5477 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); 5478 5479 adev->no_hw_access = true; 5480 r = amdgpu_device_pre_asic_reset(adev, &reset_context); 5481 adev->no_hw_access = false; 5482 if (r) 5483 goto out; 5484 5485 r = amdgpu_do_asic_reset(&device_list, &reset_context); 5486 5487 out: 5488 if (!r) { 5489 if (amdgpu_device_cache_pci_state(adev->pdev)) 5490 pci_restore_state(adev->pdev); 5491 5492 DRM_INFO("PCIe error recovery succeeded\n"); 5493 } else { 5494 DRM_ERROR("PCIe error recovery failed, err:%d", r); 5495 amdgpu_device_unlock_adev(adev); 5496 } 5497 5498 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; 5499 } 5500 5501 /** 5502 * amdgpu_pci_resume() - resume normal ops after PCI reset 5503 * @pdev: pointer to PCI device 5504 * 5505 * Called when the error recovery driver tells us that its 5506 * OK to resume normal operation. 5507 */ 5508 void amdgpu_pci_resume(struct pci_dev *pdev) 5509 { 5510 struct drm_device *dev = pci_get_drvdata(pdev); 5511 struct amdgpu_device *adev = drm_to_adev(dev); 5512 int i; 5513 5514 5515 DRM_INFO("PCI error: resume callback!!\n"); 5516 5517 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 5518 struct amdgpu_ring *ring = adev->rings[i]; 5519 5520 if (!ring || !ring->sched.thread) 5521 continue; 5522 5523 5524 drm_sched_resubmit_jobs(&ring->sched); 5525 drm_sched_start(&ring->sched, true); 5526 } 5527 5528 amdgpu_device_unlock_adev(adev); 5529 } 5530 5531 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) 5532 { 5533 struct drm_device *dev = pci_get_drvdata(pdev); 5534 struct amdgpu_device *adev = drm_to_adev(dev); 5535 int r; 5536 5537 r = pci_save_state(pdev); 5538 if (!r) { 5539 kfree(adev->pci_state); 5540 5541 adev->pci_state = pci_store_saved_state(pdev); 5542 5543 if (!adev->pci_state) { 5544 DRM_ERROR("Failed to store PCI saved state"); 5545 return false; 5546 } 5547 } else { 5548 DRM_WARN("Failed to save PCI state, err:%d\n", r); 5549 return false; 5550 } 5551 5552 return true; 5553 } 5554 5555 bool amdgpu_device_load_pci_state(struct pci_dev *pdev) 5556 { 5557 struct drm_device *dev = pci_get_drvdata(pdev); 5558 struct amdgpu_device *adev = drm_to_adev(dev); 5559 int r; 5560 5561 if (!adev->pci_state) 5562 return false; 5563 5564 r = pci_load_saved_state(pdev, adev->pci_state); 5565 5566 if (!r) { 5567 pci_restore_state(pdev); 5568 } else { 5569 DRM_WARN("Failed to load PCI state, err:%d\n", r); 5570 return false; 5571 } 5572 5573 return true; 5574 } 5575 5576 void amdgpu_device_flush_hdp(struct amdgpu_device *adev, 5577 struct amdgpu_ring *ring) 5578 { 5579 #ifdef CONFIG_X86_64 5580 if (adev->flags & AMD_IS_APU) 5581 return; 5582 #endif 5583 if (adev->gmc.xgmi.connected_to_cpu) 5584 return; 5585 5586 if (ring && ring->funcs->emit_hdp_flush) 5587 amdgpu_ring_emit_hdp_flush(ring); 5588 else 5589 amdgpu_asic_flush_hdp(adev, ring); 5590 } 5591 5592 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, 5593 struct amdgpu_ring *ring) 5594 { 5595 #ifdef CONFIG_X86_64 5596 if (adev->flags & AMD_IS_APU) 5597 return; 5598 #endif 5599 if (adev->gmc.xgmi.connected_to_cpu) 5600 return; 5601 5602 amdgpu_asic_invalidate_hdp(adev, ring); 5603 } 5604