1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/power_supply.h> 29 #include <linux/kthread.h> 30 #include <linux/module.h> 31 #include <linux/console.h> 32 #include <linux/slab.h> 33 34 #include <drm/drm_atomic_helper.h> 35 #include <drm/drm_probe_helper.h> 36 #include <drm/amdgpu_drm.h> 37 #include <linux/vgaarb.h> 38 #include <linux/vga_switcheroo.h> 39 #include <linux/efi.h> 40 #include "amdgpu.h" 41 #include "amdgpu_trace.h" 42 #include "amdgpu_i2c.h" 43 #include "atom.h" 44 #include "amdgpu_atombios.h" 45 #include "amdgpu_atomfirmware.h" 46 #include "amd_pcie.h" 47 #ifdef CONFIG_DRM_AMDGPU_SI 48 #include "si.h" 49 #endif 50 #ifdef CONFIG_DRM_AMDGPU_CIK 51 #include "cik.h" 52 #endif 53 #include "vi.h" 54 #include "soc15.h" 55 #include "nv.h" 56 #include "bif/bif_4_1_d.h" 57 #include <linux/pci.h> 58 #include <linux/firmware.h> 59 #include "amdgpu_vf_error.h" 60 61 #include "amdgpu_amdkfd.h" 62 #include "amdgpu_pm.h" 63 64 #include "amdgpu_xgmi.h" 65 #include "amdgpu_ras.h" 66 #include "amdgpu_pmu.h" 67 #include "amdgpu_fru_eeprom.h" 68 69 #include <linux/suspend.h> 70 #include <drm/task_barrier.h> 71 72 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); 73 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); 74 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); 75 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); 76 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); 77 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); 78 MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin"); 79 MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); 80 MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); 81 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); 82 83 #define AMDGPU_RESUME_MS 2000 84 85 const char *amdgpu_asic_name[] = { 86 "TAHITI", 87 "PITCAIRN", 88 "VERDE", 89 "OLAND", 90 "HAINAN", 91 "BONAIRE", 92 "KAVERI", 93 "KABINI", 94 "HAWAII", 95 "MULLINS", 96 "TOPAZ", 97 "TONGA", 98 "FIJI", 99 "CARRIZO", 100 "STONEY", 101 "POLARIS10", 102 "POLARIS11", 103 "POLARIS12", 104 "VEGAM", 105 "VEGA10", 106 "VEGA12", 107 "VEGA20", 108 "RAVEN", 109 "ARCTURUS", 110 "RENOIR", 111 "NAVI10", 112 "NAVI14", 113 "NAVI12", 114 "LAST", 115 }; 116 117 /** 118 * DOC: pcie_replay_count 119 * 120 * The amdgpu driver provides a sysfs API for reporting the total number 121 * of PCIe replays (NAKs) 122 * The file pcie_replay_count is used for this and returns the total 123 * number of replays as a sum of the NAKs generated and NAKs received 124 */ 125 126 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, 127 struct device_attribute *attr, char *buf) 128 { 129 struct drm_device *ddev = dev_get_drvdata(dev); 130 struct amdgpu_device *adev = ddev->dev_private; 131 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev); 132 133 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt); 134 } 135 136 static DEVICE_ATTR(pcie_replay_count, S_IRUGO, 137 amdgpu_device_get_pcie_replay_count, NULL); 138 139 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); 140 141 /** 142 * DOC: product_name 143 * 144 * The amdgpu driver provides a sysfs API for reporting the product name 145 * for the device 146 * The file serial_number is used for this and returns the product name 147 * as returned from the FRU. 148 * NOTE: This is only available for certain server cards 149 */ 150 151 static ssize_t amdgpu_device_get_product_name(struct device *dev, 152 struct device_attribute *attr, char *buf) 153 { 154 struct drm_device *ddev = dev_get_drvdata(dev); 155 struct amdgpu_device *adev = ddev->dev_private; 156 157 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name); 158 } 159 160 static DEVICE_ATTR(product_name, S_IRUGO, 161 amdgpu_device_get_product_name, NULL); 162 163 /** 164 * DOC: product_number 165 * 166 * The amdgpu driver provides a sysfs API for reporting the part number 167 * for the device 168 * The file serial_number is used for this and returns the part number 169 * as returned from the FRU. 170 * NOTE: This is only available for certain server cards 171 */ 172 173 static ssize_t amdgpu_device_get_product_number(struct device *dev, 174 struct device_attribute *attr, char *buf) 175 { 176 struct drm_device *ddev = dev_get_drvdata(dev); 177 struct amdgpu_device *adev = ddev->dev_private; 178 179 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number); 180 } 181 182 static DEVICE_ATTR(product_number, S_IRUGO, 183 amdgpu_device_get_product_number, NULL); 184 185 /** 186 * DOC: serial_number 187 * 188 * The amdgpu driver provides a sysfs API for reporting the serial number 189 * for the device 190 * The file serial_number is used for this and returns the serial number 191 * as returned from the FRU. 192 * NOTE: This is only available for certain server cards 193 */ 194 195 static ssize_t amdgpu_device_get_serial_number(struct device *dev, 196 struct device_attribute *attr, char *buf) 197 { 198 struct drm_device *ddev = dev_get_drvdata(dev); 199 struct amdgpu_device *adev = ddev->dev_private; 200 201 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial); 202 } 203 204 static DEVICE_ATTR(serial_number, S_IRUGO, 205 amdgpu_device_get_serial_number, NULL); 206 207 /** 208 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control 209 * 210 * @dev: drm_device pointer 211 * 212 * Returns true if the device is a dGPU with HG/PX power control, 213 * otherwise return false. 214 */ 215 bool amdgpu_device_supports_boco(struct drm_device *dev) 216 { 217 struct amdgpu_device *adev = dev->dev_private; 218 219 if (adev->flags & AMD_IS_PX) 220 return true; 221 return false; 222 } 223 224 /** 225 * amdgpu_device_supports_baco - Does the device support BACO 226 * 227 * @dev: drm_device pointer 228 * 229 * Returns true if the device supporte BACO, 230 * otherwise return false. 231 */ 232 bool amdgpu_device_supports_baco(struct drm_device *dev) 233 { 234 struct amdgpu_device *adev = dev->dev_private; 235 236 return amdgpu_asic_supports_baco(adev); 237 } 238 239 /** 240 * VRAM access helper functions. 241 * 242 * amdgpu_device_vram_access - read/write a buffer in vram 243 * 244 * @adev: amdgpu_device pointer 245 * @pos: offset of the buffer in vram 246 * @buf: virtual address of the buffer in system memory 247 * @size: read/write size, sizeof(@buf) must > @size 248 * @write: true - write to vram, otherwise - read from vram 249 */ 250 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, 251 uint32_t *buf, size_t size, bool write) 252 { 253 unsigned long flags; 254 uint32_t hi = ~0; 255 uint64_t last; 256 257 258 #ifdef CONFIG_64BIT 259 last = min(pos + size, adev->gmc.visible_vram_size); 260 if (last > pos) { 261 void __iomem *addr = adev->mman.aper_base_kaddr + pos; 262 size_t count = last - pos; 263 264 if (write) { 265 memcpy_toio(addr, buf, count); 266 mb(); 267 amdgpu_asic_flush_hdp(adev, NULL); 268 } else { 269 amdgpu_asic_invalidate_hdp(adev, NULL); 270 mb(); 271 memcpy_fromio(buf, addr, count); 272 } 273 274 if (count == size) 275 return; 276 277 pos += count; 278 buf += count / 4; 279 size -= count; 280 } 281 #endif 282 283 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 284 for (last = pos + size; pos < last; pos += 4) { 285 uint32_t tmp = pos >> 31; 286 287 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); 288 if (tmp != hi) { 289 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp); 290 hi = tmp; 291 } 292 if (write) 293 WREG32_NO_KIQ(mmMM_DATA, *buf++); 294 else 295 *buf++ = RREG32_NO_KIQ(mmMM_DATA); 296 } 297 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 298 } 299 300 /* 301 * MMIO register access helper functions. 302 */ 303 /** 304 * amdgpu_mm_rreg - read a memory mapped IO register 305 * 306 * @adev: amdgpu_device pointer 307 * @reg: dword aligned register offset 308 * @acc_flags: access flags which require special behavior 309 * 310 * Returns the 32 bit value from the offset specified. 311 */ 312 uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, 313 uint32_t acc_flags) 314 { 315 uint32_t ret; 316 317 if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))) 318 return amdgpu_kiq_rreg(adev, reg); 319 320 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) 321 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); 322 else { 323 unsigned long flags; 324 325 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 326 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); 327 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); 328 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 329 } 330 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret); 331 return ret; 332 } 333 334 /* 335 * MMIO register read with bytes helper functions 336 * @offset:bytes offset from MMIO start 337 * 338 */ 339 340 /** 341 * amdgpu_mm_rreg8 - read a memory mapped IO register 342 * 343 * @adev: amdgpu_device pointer 344 * @offset: byte aligned register offset 345 * 346 * Returns the 8 bit value from the offset specified. 347 */ 348 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) { 349 if (offset < adev->rmmio_size) 350 return (readb(adev->rmmio + offset)); 351 BUG(); 352 } 353 354 /* 355 * MMIO register write with bytes helper functions 356 * @offset:bytes offset from MMIO start 357 * @value: the value want to be written to the register 358 * 359 */ 360 /** 361 * amdgpu_mm_wreg8 - read a memory mapped IO register 362 * 363 * @adev: amdgpu_device pointer 364 * @offset: byte aligned register offset 365 * @value: 8 bit value to write 366 * 367 * Writes the value specified to the offset specified. 368 */ 369 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) { 370 if (offset < adev->rmmio_size) 371 writeb(value, adev->rmmio + offset); 372 else 373 BUG(); 374 } 375 376 void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags) 377 { 378 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v); 379 380 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) 381 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); 382 else { 383 unsigned long flags; 384 385 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 386 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); 387 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); 388 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 389 } 390 391 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) { 392 udelay(500); 393 } 394 } 395 396 /** 397 * amdgpu_mm_wreg - write to a memory mapped IO register 398 * 399 * @adev: amdgpu_device pointer 400 * @reg: dword aligned register offset 401 * @v: 32 bit value to write to the register 402 * @acc_flags: access flags which require special behavior 403 * 404 * Writes the value specified to the offset specified. 405 */ 406 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, 407 uint32_t acc_flags) 408 { 409 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { 410 adev->last_mm_index = v; 411 } 412 413 if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))) 414 return amdgpu_kiq_wreg(adev, reg, v); 415 416 amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags); 417 } 418 419 /* 420 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range 421 * 422 * this function is invoked only the debugfs register access 423 * */ 424 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v, 425 uint32_t acc_flags) 426 { 427 if (amdgpu_sriov_fullaccess(adev) && 428 adev->gfx.rlc.funcs && 429 adev->gfx.rlc.funcs->is_rlcg_access_range) { 430 431 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) 432 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v); 433 } 434 435 amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags); 436 } 437 438 /** 439 * amdgpu_io_rreg - read an IO register 440 * 441 * @adev: amdgpu_device pointer 442 * @reg: dword aligned register offset 443 * 444 * Returns the 32 bit value from the offset specified. 445 */ 446 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) 447 { 448 if ((reg * 4) < adev->rio_mem_size) 449 return ioread32(adev->rio_mem + (reg * 4)); 450 else { 451 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); 452 return ioread32(adev->rio_mem + (mmMM_DATA * 4)); 453 } 454 } 455 456 /** 457 * amdgpu_io_wreg - write to an IO register 458 * 459 * @adev: amdgpu_device pointer 460 * @reg: dword aligned register offset 461 * @v: 32 bit value to write to the register 462 * 463 * Writes the value specified to the offset specified. 464 */ 465 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) 466 { 467 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { 468 adev->last_mm_index = v; 469 } 470 471 if ((reg * 4) < adev->rio_mem_size) 472 iowrite32(v, adev->rio_mem + (reg * 4)); 473 else { 474 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); 475 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4)); 476 } 477 478 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) { 479 udelay(500); 480 } 481 } 482 483 /** 484 * amdgpu_mm_rdoorbell - read a doorbell dword 485 * 486 * @adev: amdgpu_device pointer 487 * @index: doorbell index 488 * 489 * Returns the value in the doorbell aperture at the 490 * requested doorbell index (CIK). 491 */ 492 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) 493 { 494 if (index < adev->doorbell.num_doorbells) { 495 return readl(adev->doorbell.ptr + index); 496 } else { 497 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 498 return 0; 499 } 500 } 501 502 /** 503 * amdgpu_mm_wdoorbell - write a doorbell dword 504 * 505 * @adev: amdgpu_device pointer 506 * @index: doorbell index 507 * @v: value to write 508 * 509 * Writes @v to the doorbell aperture at the 510 * requested doorbell index (CIK). 511 */ 512 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) 513 { 514 if (index < adev->doorbell.num_doorbells) { 515 writel(v, adev->doorbell.ptr + index); 516 } else { 517 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 518 } 519 } 520 521 /** 522 * amdgpu_mm_rdoorbell64 - read a doorbell Qword 523 * 524 * @adev: amdgpu_device pointer 525 * @index: doorbell index 526 * 527 * Returns the value in the doorbell aperture at the 528 * requested doorbell index (VEGA10+). 529 */ 530 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) 531 { 532 if (index < adev->doorbell.num_doorbells) { 533 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); 534 } else { 535 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 536 return 0; 537 } 538 } 539 540 /** 541 * amdgpu_mm_wdoorbell64 - write a doorbell Qword 542 * 543 * @adev: amdgpu_device pointer 544 * @index: doorbell index 545 * @v: value to write 546 * 547 * Writes @v to the doorbell aperture at the 548 * requested doorbell index (VEGA10+). 549 */ 550 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) 551 { 552 if (index < adev->doorbell.num_doorbells) { 553 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); 554 } else { 555 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 556 } 557 } 558 559 /** 560 * amdgpu_invalid_rreg - dummy reg read function 561 * 562 * @adev: amdgpu device pointer 563 * @reg: offset of register 564 * 565 * Dummy register read function. Used for register blocks 566 * that certain asics don't have (all asics). 567 * Returns the value in the register. 568 */ 569 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) 570 { 571 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg); 572 BUG(); 573 return 0; 574 } 575 576 /** 577 * amdgpu_invalid_wreg - dummy reg write function 578 * 579 * @adev: amdgpu device pointer 580 * @reg: offset of register 581 * @v: value to write to the register 582 * 583 * Dummy register read function. Used for register blocks 584 * that certain asics don't have (all asics). 585 */ 586 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) 587 { 588 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n", 589 reg, v); 590 BUG(); 591 } 592 593 /** 594 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function 595 * 596 * @adev: amdgpu device pointer 597 * @reg: offset of register 598 * 599 * Dummy register read function. Used for register blocks 600 * that certain asics don't have (all asics). 601 * Returns the value in the register. 602 */ 603 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) 604 { 605 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg); 606 BUG(); 607 return 0; 608 } 609 610 /** 611 * amdgpu_invalid_wreg64 - dummy reg write function 612 * 613 * @adev: amdgpu device pointer 614 * @reg: offset of register 615 * @v: value to write to the register 616 * 617 * Dummy register read function. Used for register blocks 618 * that certain asics don't have (all asics). 619 */ 620 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) 621 { 622 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", 623 reg, v); 624 BUG(); 625 } 626 627 /** 628 * amdgpu_block_invalid_rreg - dummy reg read function 629 * 630 * @adev: amdgpu device pointer 631 * @block: offset of instance 632 * @reg: offset of register 633 * 634 * Dummy register read function. Used for register blocks 635 * that certain asics don't have (all asics). 636 * Returns the value in the register. 637 */ 638 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev, 639 uint32_t block, uint32_t reg) 640 { 641 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n", 642 reg, block); 643 BUG(); 644 return 0; 645 } 646 647 /** 648 * amdgpu_block_invalid_wreg - dummy reg write function 649 * 650 * @adev: amdgpu device pointer 651 * @block: offset of instance 652 * @reg: offset of register 653 * @v: value to write to the register 654 * 655 * Dummy register read function. Used for register blocks 656 * that certain asics don't have (all asics). 657 */ 658 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, 659 uint32_t block, 660 uint32_t reg, uint32_t v) 661 { 662 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n", 663 reg, block, v); 664 BUG(); 665 } 666 667 /** 668 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page 669 * 670 * @adev: amdgpu device pointer 671 * 672 * Allocates a scratch page of VRAM for use by various things in the 673 * driver. 674 */ 675 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev) 676 { 677 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, 678 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 679 &adev->vram_scratch.robj, 680 &adev->vram_scratch.gpu_addr, 681 (void **)&adev->vram_scratch.ptr); 682 } 683 684 /** 685 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page 686 * 687 * @adev: amdgpu device pointer 688 * 689 * Frees the VRAM scratch page. 690 */ 691 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev) 692 { 693 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL); 694 } 695 696 /** 697 * amdgpu_device_program_register_sequence - program an array of registers. 698 * 699 * @adev: amdgpu_device pointer 700 * @registers: pointer to the register array 701 * @array_size: size of the register array 702 * 703 * Programs an array or registers with and and or masks. 704 * This is a helper for setting golden registers. 705 */ 706 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, 707 const u32 *registers, 708 const u32 array_size) 709 { 710 u32 tmp, reg, and_mask, or_mask; 711 int i; 712 713 if (array_size % 3) 714 return; 715 716 for (i = 0; i < array_size; i +=3) { 717 reg = registers[i + 0]; 718 and_mask = registers[i + 1]; 719 or_mask = registers[i + 2]; 720 721 if (and_mask == 0xffffffff) { 722 tmp = or_mask; 723 } else { 724 tmp = RREG32(reg); 725 tmp &= ~and_mask; 726 if (adev->family >= AMDGPU_FAMILY_AI) 727 tmp |= (or_mask & and_mask); 728 else 729 tmp |= or_mask; 730 } 731 WREG32(reg, tmp); 732 } 733 } 734 735 /** 736 * amdgpu_device_pci_config_reset - reset the GPU 737 * 738 * @adev: amdgpu_device pointer 739 * 740 * Resets the GPU using the pci config reset sequence. 741 * Only applicable to asics prior to vega10. 742 */ 743 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) 744 { 745 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA); 746 } 747 748 /* 749 * GPU doorbell aperture helpers function. 750 */ 751 /** 752 * amdgpu_device_doorbell_init - Init doorbell driver information. 753 * 754 * @adev: amdgpu_device pointer 755 * 756 * Init doorbell driver information (CIK) 757 * Returns 0 on success, error on failure. 758 */ 759 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) 760 { 761 762 /* No doorbell on SI hardware generation */ 763 if (adev->asic_type < CHIP_BONAIRE) { 764 adev->doorbell.base = 0; 765 adev->doorbell.size = 0; 766 adev->doorbell.num_doorbells = 0; 767 adev->doorbell.ptr = NULL; 768 return 0; 769 } 770 771 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) 772 return -EINVAL; 773 774 amdgpu_asic_init_doorbell_index(adev); 775 776 /* doorbell bar mapping */ 777 adev->doorbell.base = pci_resource_start(adev->pdev, 2); 778 adev->doorbell.size = pci_resource_len(adev->pdev, 2); 779 780 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), 781 adev->doorbell_index.max_assignment+1); 782 if (adev->doorbell.num_doorbells == 0) 783 return -EINVAL; 784 785 /* For Vega, reserve and map two pages on doorbell BAR since SDMA 786 * paging queue doorbell use the second page. The 787 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the 788 * doorbells are in the first page. So with paging queue enabled, 789 * the max num_doorbells should + 1 page (0x400 in dword) 790 */ 791 if (adev->asic_type >= CHIP_VEGA10) 792 adev->doorbell.num_doorbells += 0x400; 793 794 adev->doorbell.ptr = ioremap(adev->doorbell.base, 795 adev->doorbell.num_doorbells * 796 sizeof(u32)); 797 if (adev->doorbell.ptr == NULL) 798 return -ENOMEM; 799 800 return 0; 801 } 802 803 /** 804 * amdgpu_device_doorbell_fini - Tear down doorbell driver information. 805 * 806 * @adev: amdgpu_device pointer 807 * 808 * Tear down doorbell driver information (CIK) 809 */ 810 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev) 811 { 812 iounmap(adev->doorbell.ptr); 813 adev->doorbell.ptr = NULL; 814 } 815 816 817 818 /* 819 * amdgpu_device_wb_*() 820 * Writeback is the method by which the GPU updates special pages in memory 821 * with the status of certain GPU events (fences, ring pointers,etc.). 822 */ 823 824 /** 825 * amdgpu_device_wb_fini - Disable Writeback and free memory 826 * 827 * @adev: amdgpu_device pointer 828 * 829 * Disables Writeback and frees the Writeback memory (all asics). 830 * Used at driver shutdown. 831 */ 832 static void amdgpu_device_wb_fini(struct amdgpu_device *adev) 833 { 834 if (adev->wb.wb_obj) { 835 amdgpu_bo_free_kernel(&adev->wb.wb_obj, 836 &adev->wb.gpu_addr, 837 (void **)&adev->wb.wb); 838 adev->wb.wb_obj = NULL; 839 } 840 } 841 842 /** 843 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory 844 * 845 * @adev: amdgpu_device pointer 846 * 847 * Initializes writeback and allocates writeback memory (all asics). 848 * Used at driver startup. 849 * Returns 0 on success or an -error on failure. 850 */ 851 static int amdgpu_device_wb_init(struct amdgpu_device *adev) 852 { 853 int r; 854 855 if (adev->wb.wb_obj == NULL) { 856 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */ 857 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8, 858 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 859 &adev->wb.wb_obj, &adev->wb.gpu_addr, 860 (void **)&adev->wb.wb); 861 if (r) { 862 dev_warn(adev->dev, "(%d) create WB bo failed\n", r); 863 return r; 864 } 865 866 adev->wb.num_wb = AMDGPU_MAX_WB; 867 memset(&adev->wb.used, 0, sizeof(adev->wb.used)); 868 869 /* clear wb memory */ 870 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8); 871 } 872 873 return 0; 874 } 875 876 /** 877 * amdgpu_device_wb_get - Allocate a wb entry 878 * 879 * @adev: amdgpu_device pointer 880 * @wb: wb index 881 * 882 * Allocate a wb slot for use by the driver (all asics). 883 * Returns 0 on success or -EINVAL on failure. 884 */ 885 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) 886 { 887 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); 888 889 if (offset < adev->wb.num_wb) { 890 __set_bit(offset, adev->wb.used); 891 *wb = offset << 3; /* convert to dw offset */ 892 return 0; 893 } else { 894 return -EINVAL; 895 } 896 } 897 898 /** 899 * amdgpu_device_wb_free - Free a wb entry 900 * 901 * @adev: amdgpu_device pointer 902 * @wb: wb index 903 * 904 * Free a wb slot allocated for use by the driver (all asics) 905 */ 906 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) 907 { 908 wb >>= 3; 909 if (wb < adev->wb.num_wb) 910 __clear_bit(wb, adev->wb.used); 911 } 912 913 /** 914 * amdgpu_device_resize_fb_bar - try to resize FB BAR 915 * 916 * @adev: amdgpu_device pointer 917 * 918 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not 919 * to fail, but if any of the BARs is not accessible after the size we abort 920 * driver loading by returning -ENODEV. 921 */ 922 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) 923 { 924 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size); 925 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1; 926 struct pci_bus *root; 927 struct resource *res; 928 unsigned i; 929 u16 cmd; 930 int r; 931 932 /* Bypass for VF */ 933 if (amdgpu_sriov_vf(adev)) 934 return 0; 935 936 /* Check if the root BUS has 64bit memory resources */ 937 root = adev->pdev->bus; 938 while (root->parent) 939 root = root->parent; 940 941 pci_bus_for_each_resource(root, res, i) { 942 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && 943 res->start > 0x100000000ull) 944 break; 945 } 946 947 /* Trying to resize is pointless without a root hub window above 4GB */ 948 if (!res) 949 return 0; 950 951 /* Disable memory decoding while we change the BAR addresses and size */ 952 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd); 953 pci_write_config_word(adev->pdev, PCI_COMMAND, 954 cmd & ~PCI_COMMAND_MEMORY); 955 956 /* Free the VRAM and doorbell BAR, we most likely need to move both. */ 957 amdgpu_device_doorbell_fini(adev); 958 if (adev->asic_type >= CHIP_BONAIRE) 959 pci_release_resource(adev->pdev, 2); 960 961 pci_release_resource(adev->pdev, 0); 962 963 r = pci_resize_resource(adev->pdev, 0, rbar_size); 964 if (r == -ENOSPC) 965 DRM_INFO("Not enough PCI address space for a large BAR."); 966 else if (r && r != -ENOTSUPP) 967 DRM_ERROR("Problem resizing BAR0 (%d).", r); 968 969 pci_assign_unassigned_bus_resources(adev->pdev->bus); 970 971 /* When the doorbell or fb BAR isn't available we have no chance of 972 * using the device. 973 */ 974 r = amdgpu_device_doorbell_init(adev); 975 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET)) 976 return -ENODEV; 977 978 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd); 979 980 return 0; 981 } 982 983 /* 984 * GPU helpers function. 985 */ 986 /** 987 * amdgpu_device_need_post - check if the hw need post or not 988 * 989 * @adev: amdgpu_device pointer 990 * 991 * Check if the asic has been initialized (all asics) at driver startup 992 * or post is needed if hw reset is performed. 993 * Returns true if need or false if not. 994 */ 995 bool amdgpu_device_need_post(struct amdgpu_device *adev) 996 { 997 uint32_t reg; 998 999 if (amdgpu_sriov_vf(adev)) 1000 return false; 1001 1002 if (amdgpu_passthrough(adev)) { 1003 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot 1004 * some old smc fw still need driver do vPost otherwise gpu hang, while 1005 * those smc fw version above 22.15 doesn't have this flaw, so we force 1006 * vpost executed for smc version below 22.15 1007 */ 1008 if (adev->asic_type == CHIP_FIJI) { 1009 int err; 1010 uint32_t fw_ver; 1011 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev); 1012 /* force vPost if error occured */ 1013 if (err) 1014 return true; 1015 1016 fw_ver = *((uint32_t *)adev->pm.fw->data + 69); 1017 if (fw_ver < 0x00160e00) 1018 return true; 1019 } 1020 } 1021 1022 if (adev->has_hw_reset) { 1023 adev->has_hw_reset = false; 1024 return true; 1025 } 1026 1027 /* bios scratch used on CIK+ */ 1028 if (adev->asic_type >= CHIP_BONAIRE) 1029 return amdgpu_atombios_scratch_need_asic_init(adev); 1030 1031 /* check MEM_SIZE for older asics */ 1032 reg = amdgpu_asic_get_config_memsize(adev); 1033 1034 if ((reg != 0) && (reg != 0xffffffff)) 1035 return false; 1036 1037 return true; 1038 } 1039 1040 /* if we get transitioned to only one device, take VGA back */ 1041 /** 1042 * amdgpu_device_vga_set_decode - enable/disable vga decode 1043 * 1044 * @cookie: amdgpu_device pointer 1045 * @state: enable/disable vga decode 1046 * 1047 * Enable/disable vga decode (all asics). 1048 * Returns VGA resource flags. 1049 */ 1050 static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state) 1051 { 1052 struct amdgpu_device *adev = cookie; 1053 amdgpu_asic_set_vga_state(adev, state); 1054 if (state) 1055 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | 1056 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 1057 else 1058 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 1059 } 1060 1061 /** 1062 * amdgpu_device_check_block_size - validate the vm block size 1063 * 1064 * @adev: amdgpu_device pointer 1065 * 1066 * Validates the vm block size specified via module parameter. 1067 * The vm block size defines number of bits in page table versus page directory, 1068 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 1069 * page table and the remaining bits are in the page directory. 1070 */ 1071 static void amdgpu_device_check_block_size(struct amdgpu_device *adev) 1072 { 1073 /* defines number of bits in page table versus page directory, 1074 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 1075 * page table and the remaining bits are in the page directory */ 1076 if (amdgpu_vm_block_size == -1) 1077 return; 1078 1079 if (amdgpu_vm_block_size < 9) { 1080 dev_warn(adev->dev, "VM page table size (%d) too small\n", 1081 amdgpu_vm_block_size); 1082 amdgpu_vm_block_size = -1; 1083 } 1084 } 1085 1086 /** 1087 * amdgpu_device_check_vm_size - validate the vm size 1088 * 1089 * @adev: amdgpu_device pointer 1090 * 1091 * Validates the vm size in GB specified via module parameter. 1092 * The VM size is the size of the GPU virtual memory space in GB. 1093 */ 1094 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) 1095 { 1096 /* no need to check the default value */ 1097 if (amdgpu_vm_size == -1) 1098 return; 1099 1100 if (amdgpu_vm_size < 1) { 1101 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n", 1102 amdgpu_vm_size); 1103 amdgpu_vm_size = -1; 1104 } 1105 } 1106 1107 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) 1108 { 1109 struct sysinfo si; 1110 bool is_os_64 = (sizeof(void *) == 8); 1111 uint64_t total_memory; 1112 uint64_t dram_size_seven_GB = 0x1B8000000; 1113 uint64_t dram_size_three_GB = 0xB8000000; 1114 1115 if (amdgpu_smu_memory_pool_size == 0) 1116 return; 1117 1118 if (!is_os_64) { 1119 DRM_WARN("Not 64-bit OS, feature not supported\n"); 1120 goto def_value; 1121 } 1122 si_meminfo(&si); 1123 total_memory = (uint64_t)si.totalram * si.mem_unit; 1124 1125 if ((amdgpu_smu_memory_pool_size == 1) || 1126 (amdgpu_smu_memory_pool_size == 2)) { 1127 if (total_memory < dram_size_three_GB) 1128 goto def_value1; 1129 } else if ((amdgpu_smu_memory_pool_size == 4) || 1130 (amdgpu_smu_memory_pool_size == 8)) { 1131 if (total_memory < dram_size_seven_GB) 1132 goto def_value1; 1133 } else { 1134 DRM_WARN("Smu memory pool size not supported\n"); 1135 goto def_value; 1136 } 1137 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; 1138 1139 return; 1140 1141 def_value1: 1142 DRM_WARN("No enough system memory\n"); 1143 def_value: 1144 adev->pm.smu_prv_buffer_size = 0; 1145 } 1146 1147 /** 1148 * amdgpu_device_check_arguments - validate module params 1149 * 1150 * @adev: amdgpu_device pointer 1151 * 1152 * Validates certain module parameters and updates 1153 * the associated values used by the driver (all asics). 1154 */ 1155 static int amdgpu_device_check_arguments(struct amdgpu_device *adev) 1156 { 1157 if (amdgpu_sched_jobs < 4) { 1158 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", 1159 amdgpu_sched_jobs); 1160 amdgpu_sched_jobs = 4; 1161 } else if (!is_power_of_2(amdgpu_sched_jobs)){ 1162 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n", 1163 amdgpu_sched_jobs); 1164 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); 1165 } 1166 1167 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) { 1168 /* gart size must be greater or equal to 32M */ 1169 dev_warn(adev->dev, "gart size (%d) too small\n", 1170 amdgpu_gart_size); 1171 amdgpu_gart_size = -1; 1172 } 1173 1174 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) { 1175 /* gtt size must be greater or equal to 32M */ 1176 dev_warn(adev->dev, "gtt size (%d) too small\n", 1177 amdgpu_gtt_size); 1178 amdgpu_gtt_size = -1; 1179 } 1180 1181 /* valid range is between 4 and 9 inclusive */ 1182 if (amdgpu_vm_fragment_size != -1 && 1183 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) { 1184 dev_warn(adev->dev, "valid range is between 4 and 9\n"); 1185 amdgpu_vm_fragment_size = -1; 1186 } 1187 1188 amdgpu_device_check_smu_prv_buffer_size(adev); 1189 1190 amdgpu_device_check_vm_size(adev); 1191 1192 amdgpu_device_check_block_size(adev); 1193 1194 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); 1195 1196 return 0; 1197 } 1198 1199 /** 1200 * amdgpu_switcheroo_set_state - set switcheroo state 1201 * 1202 * @pdev: pci dev pointer 1203 * @state: vga_switcheroo state 1204 * 1205 * Callback for the switcheroo driver. Suspends or resumes the 1206 * the asics before or after it is powered up using ACPI methods. 1207 */ 1208 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state) 1209 { 1210 struct drm_device *dev = pci_get_drvdata(pdev); 1211 int r; 1212 1213 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF) 1214 return; 1215 1216 if (state == VGA_SWITCHEROO_ON) { 1217 pr_info("amdgpu: switched on\n"); 1218 /* don't suspend or resume card normally */ 1219 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 1220 1221 pci_set_power_state(dev->pdev, PCI_D0); 1222 pci_restore_state(dev->pdev); 1223 r = pci_enable_device(dev->pdev); 1224 if (r) 1225 DRM_WARN("pci_enable_device failed (%d)\n", r); 1226 amdgpu_device_resume(dev, true); 1227 1228 dev->switch_power_state = DRM_SWITCH_POWER_ON; 1229 drm_kms_helper_poll_enable(dev); 1230 } else { 1231 pr_info("amdgpu: switched off\n"); 1232 drm_kms_helper_poll_disable(dev); 1233 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 1234 amdgpu_device_suspend(dev, true); 1235 pci_save_state(dev->pdev); 1236 /* Shut down the device */ 1237 pci_disable_device(dev->pdev); 1238 pci_set_power_state(dev->pdev, PCI_D3cold); 1239 dev->switch_power_state = DRM_SWITCH_POWER_OFF; 1240 } 1241 } 1242 1243 /** 1244 * amdgpu_switcheroo_can_switch - see if switcheroo state can change 1245 * 1246 * @pdev: pci dev pointer 1247 * 1248 * Callback for the switcheroo driver. Check of the switcheroo 1249 * state can be changed. 1250 * Returns true if the state can be changed, false if not. 1251 */ 1252 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev) 1253 { 1254 struct drm_device *dev = pci_get_drvdata(pdev); 1255 1256 /* 1257 * FIXME: open_count is protected by drm_global_mutex but that would lead to 1258 * locking inversion with the driver load path. And the access here is 1259 * completely racy anyway. So don't bother with locking for now. 1260 */ 1261 return atomic_read(&dev->open_count) == 0; 1262 } 1263 1264 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { 1265 .set_gpu_state = amdgpu_switcheroo_set_state, 1266 .reprobe = NULL, 1267 .can_switch = amdgpu_switcheroo_can_switch, 1268 }; 1269 1270 /** 1271 * amdgpu_device_ip_set_clockgating_state - set the CG state 1272 * 1273 * @dev: amdgpu_device pointer 1274 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1275 * @state: clockgating state (gate or ungate) 1276 * 1277 * Sets the requested clockgating state for all instances of 1278 * the hardware IP specified. 1279 * Returns the error code from the last instance. 1280 */ 1281 int amdgpu_device_ip_set_clockgating_state(void *dev, 1282 enum amd_ip_block_type block_type, 1283 enum amd_clockgating_state state) 1284 { 1285 struct amdgpu_device *adev = dev; 1286 int i, r = 0; 1287 1288 for (i = 0; i < adev->num_ip_blocks; i++) { 1289 if (!adev->ip_blocks[i].status.valid) 1290 continue; 1291 if (adev->ip_blocks[i].version->type != block_type) 1292 continue; 1293 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) 1294 continue; 1295 r = adev->ip_blocks[i].version->funcs->set_clockgating_state( 1296 (void *)adev, state); 1297 if (r) 1298 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", 1299 adev->ip_blocks[i].version->funcs->name, r); 1300 } 1301 return r; 1302 } 1303 1304 /** 1305 * amdgpu_device_ip_set_powergating_state - set the PG state 1306 * 1307 * @dev: amdgpu_device pointer 1308 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1309 * @state: powergating state (gate or ungate) 1310 * 1311 * Sets the requested powergating state for all instances of 1312 * the hardware IP specified. 1313 * Returns the error code from the last instance. 1314 */ 1315 int amdgpu_device_ip_set_powergating_state(void *dev, 1316 enum amd_ip_block_type block_type, 1317 enum amd_powergating_state state) 1318 { 1319 struct amdgpu_device *adev = dev; 1320 int i, r = 0; 1321 1322 for (i = 0; i < adev->num_ip_blocks; i++) { 1323 if (!adev->ip_blocks[i].status.valid) 1324 continue; 1325 if (adev->ip_blocks[i].version->type != block_type) 1326 continue; 1327 if (!adev->ip_blocks[i].version->funcs->set_powergating_state) 1328 continue; 1329 r = adev->ip_blocks[i].version->funcs->set_powergating_state( 1330 (void *)adev, state); 1331 if (r) 1332 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", 1333 adev->ip_blocks[i].version->funcs->name, r); 1334 } 1335 return r; 1336 } 1337 1338 /** 1339 * amdgpu_device_ip_get_clockgating_state - get the CG state 1340 * 1341 * @adev: amdgpu_device pointer 1342 * @flags: clockgating feature flags 1343 * 1344 * Walks the list of IPs on the device and updates the clockgating 1345 * flags for each IP. 1346 * Updates @flags with the feature flags for each hardware IP where 1347 * clockgating is enabled. 1348 */ 1349 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, 1350 u32 *flags) 1351 { 1352 int i; 1353 1354 for (i = 0; i < adev->num_ip_blocks; i++) { 1355 if (!adev->ip_blocks[i].status.valid) 1356 continue; 1357 if (adev->ip_blocks[i].version->funcs->get_clockgating_state) 1358 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags); 1359 } 1360 } 1361 1362 /** 1363 * amdgpu_device_ip_wait_for_idle - wait for idle 1364 * 1365 * @adev: amdgpu_device pointer 1366 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1367 * 1368 * Waits for the request hardware IP to be idle. 1369 * Returns 0 for success or a negative error code on failure. 1370 */ 1371 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, 1372 enum amd_ip_block_type block_type) 1373 { 1374 int i, r; 1375 1376 for (i = 0; i < adev->num_ip_blocks; i++) { 1377 if (!adev->ip_blocks[i].status.valid) 1378 continue; 1379 if (adev->ip_blocks[i].version->type == block_type) { 1380 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev); 1381 if (r) 1382 return r; 1383 break; 1384 } 1385 } 1386 return 0; 1387 1388 } 1389 1390 /** 1391 * amdgpu_device_ip_is_idle - is the hardware IP idle 1392 * 1393 * @adev: amdgpu_device pointer 1394 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1395 * 1396 * Check if the hardware IP is idle or not. 1397 * Returns true if it the IP is idle, false if not. 1398 */ 1399 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, 1400 enum amd_ip_block_type block_type) 1401 { 1402 int i; 1403 1404 for (i = 0; i < adev->num_ip_blocks; i++) { 1405 if (!adev->ip_blocks[i].status.valid) 1406 continue; 1407 if (adev->ip_blocks[i].version->type == block_type) 1408 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev); 1409 } 1410 return true; 1411 1412 } 1413 1414 /** 1415 * amdgpu_device_ip_get_ip_block - get a hw IP pointer 1416 * 1417 * @adev: amdgpu_device pointer 1418 * @type: Type of hardware IP (SMU, GFX, UVD, etc.) 1419 * 1420 * Returns a pointer to the hardware IP block structure 1421 * if it exists for the asic, otherwise NULL. 1422 */ 1423 struct amdgpu_ip_block * 1424 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, 1425 enum amd_ip_block_type type) 1426 { 1427 int i; 1428 1429 for (i = 0; i < adev->num_ip_blocks; i++) 1430 if (adev->ip_blocks[i].version->type == type) 1431 return &adev->ip_blocks[i]; 1432 1433 return NULL; 1434 } 1435 1436 /** 1437 * amdgpu_device_ip_block_version_cmp 1438 * 1439 * @adev: amdgpu_device pointer 1440 * @type: enum amd_ip_block_type 1441 * @major: major version 1442 * @minor: minor version 1443 * 1444 * return 0 if equal or greater 1445 * return 1 if smaller or the ip_block doesn't exist 1446 */ 1447 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, 1448 enum amd_ip_block_type type, 1449 u32 major, u32 minor) 1450 { 1451 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type); 1452 1453 if (ip_block && ((ip_block->version->major > major) || 1454 ((ip_block->version->major == major) && 1455 (ip_block->version->minor >= minor)))) 1456 return 0; 1457 1458 return 1; 1459 } 1460 1461 /** 1462 * amdgpu_device_ip_block_add 1463 * 1464 * @adev: amdgpu_device pointer 1465 * @ip_block_version: pointer to the IP to add 1466 * 1467 * Adds the IP block driver information to the collection of IPs 1468 * on the asic. 1469 */ 1470 int amdgpu_device_ip_block_add(struct amdgpu_device *adev, 1471 const struct amdgpu_ip_block_version *ip_block_version) 1472 { 1473 if (!ip_block_version) 1474 return -EINVAL; 1475 1476 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, 1477 ip_block_version->funcs->name); 1478 1479 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; 1480 1481 return 0; 1482 } 1483 1484 /** 1485 * amdgpu_device_enable_virtual_display - enable virtual display feature 1486 * 1487 * @adev: amdgpu_device pointer 1488 * 1489 * Enabled the virtual display feature if the user has enabled it via 1490 * the module parameter virtual_display. This feature provides a virtual 1491 * display hardware on headless boards or in virtualized environments. 1492 * This function parses and validates the configuration string specified by 1493 * the user and configues the virtual display configuration (number of 1494 * virtual connectors, crtcs, etc.) specified. 1495 */ 1496 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) 1497 { 1498 adev->enable_virtual_display = false; 1499 1500 if (amdgpu_virtual_display) { 1501 struct drm_device *ddev = adev->ddev; 1502 const char *pci_address_name = pci_name(ddev->pdev); 1503 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname; 1504 1505 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL); 1506 pciaddstr_tmp = pciaddstr; 1507 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) { 1508 pciaddname = strsep(&pciaddname_tmp, ","); 1509 if (!strcmp("all", pciaddname) 1510 || !strcmp(pci_address_name, pciaddname)) { 1511 long num_crtc; 1512 int res = -1; 1513 1514 adev->enable_virtual_display = true; 1515 1516 if (pciaddname_tmp) 1517 res = kstrtol(pciaddname_tmp, 10, 1518 &num_crtc); 1519 1520 if (!res) { 1521 if (num_crtc < 1) 1522 num_crtc = 1; 1523 if (num_crtc > 6) 1524 num_crtc = 6; 1525 adev->mode_info.num_crtc = num_crtc; 1526 } else { 1527 adev->mode_info.num_crtc = 1; 1528 } 1529 break; 1530 } 1531 } 1532 1533 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n", 1534 amdgpu_virtual_display, pci_address_name, 1535 adev->enable_virtual_display, adev->mode_info.num_crtc); 1536 1537 kfree(pciaddstr); 1538 } 1539 } 1540 1541 /** 1542 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware 1543 * 1544 * @adev: amdgpu_device pointer 1545 * 1546 * Parses the asic configuration parameters specified in the gpu info 1547 * firmware and makes them availale to the driver for use in configuring 1548 * the asic. 1549 * Returns 0 on success, -EINVAL on failure. 1550 */ 1551 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) 1552 { 1553 const char *chip_name; 1554 char fw_name[30]; 1555 int err; 1556 const struct gpu_info_firmware_header_v1_0 *hdr; 1557 1558 adev->firmware.gpu_info_fw = NULL; 1559 1560 switch (adev->asic_type) { 1561 case CHIP_TOPAZ: 1562 case CHIP_TONGA: 1563 case CHIP_FIJI: 1564 case CHIP_POLARIS10: 1565 case CHIP_POLARIS11: 1566 case CHIP_POLARIS12: 1567 case CHIP_VEGAM: 1568 case CHIP_CARRIZO: 1569 case CHIP_STONEY: 1570 #ifdef CONFIG_DRM_AMDGPU_SI 1571 case CHIP_VERDE: 1572 case CHIP_TAHITI: 1573 case CHIP_PITCAIRN: 1574 case CHIP_OLAND: 1575 case CHIP_HAINAN: 1576 #endif 1577 #ifdef CONFIG_DRM_AMDGPU_CIK 1578 case CHIP_BONAIRE: 1579 case CHIP_HAWAII: 1580 case CHIP_KAVERI: 1581 case CHIP_KABINI: 1582 case CHIP_MULLINS: 1583 #endif 1584 case CHIP_VEGA20: 1585 default: 1586 return 0; 1587 case CHIP_VEGA10: 1588 chip_name = "vega10"; 1589 break; 1590 case CHIP_VEGA12: 1591 chip_name = "vega12"; 1592 break; 1593 case CHIP_RAVEN: 1594 if (adev->rev_id >= 8) 1595 chip_name = "raven2"; 1596 else if (adev->pdev->device == 0x15d8) 1597 chip_name = "picasso"; 1598 else 1599 chip_name = "raven"; 1600 break; 1601 case CHIP_ARCTURUS: 1602 chip_name = "arcturus"; 1603 break; 1604 case CHIP_RENOIR: 1605 chip_name = "renoir"; 1606 break; 1607 case CHIP_NAVI10: 1608 chip_name = "navi10"; 1609 break; 1610 case CHIP_NAVI14: 1611 chip_name = "navi14"; 1612 break; 1613 case CHIP_NAVI12: 1614 chip_name = "navi12"; 1615 break; 1616 } 1617 1618 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); 1619 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev); 1620 if (err) { 1621 dev_err(adev->dev, 1622 "Failed to load gpu_info firmware \"%s\"\n", 1623 fw_name); 1624 goto out; 1625 } 1626 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw); 1627 if (err) { 1628 dev_err(adev->dev, 1629 "Failed to validate gpu_info firmware \"%s\"\n", 1630 fw_name); 1631 goto out; 1632 } 1633 1634 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data; 1635 amdgpu_ucode_print_gpu_info_hdr(&hdr->header); 1636 1637 switch (hdr->version_major) { 1638 case 1: 1639 { 1640 const struct gpu_info_firmware_v1_0 *gpu_info_fw = 1641 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + 1642 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1643 1644 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) 1645 goto parse_soc_bounding_box; 1646 1647 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); 1648 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); 1649 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); 1650 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se); 1651 adev->gfx.config.max_texture_channel_caches = 1652 le32_to_cpu(gpu_info_fw->gc_num_tccs); 1653 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs); 1654 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds); 1655 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth); 1656 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth); 1657 adev->gfx.config.double_offchip_lds_buf = 1658 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer); 1659 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size); 1660 adev->gfx.cu_info.max_waves_per_simd = 1661 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd); 1662 adev->gfx.cu_info.max_scratch_slots_per_cu = 1663 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); 1664 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); 1665 if (hdr->version_minor >= 1) { 1666 const struct gpu_info_firmware_v1_1 *gpu_info_fw = 1667 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data + 1668 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1669 adev->gfx.config.num_sc_per_sh = 1670 le32_to_cpu(gpu_info_fw->num_sc_per_sh); 1671 adev->gfx.config.num_packer_per_sc = 1672 le32_to_cpu(gpu_info_fw->num_packer_per_sc); 1673 } 1674 1675 parse_soc_bounding_box: 1676 /* 1677 * soc bounding box info is not integrated in disocovery table, 1678 * we always need to parse it from gpu info firmware. 1679 */ 1680 if (hdr->version_minor == 2) { 1681 const struct gpu_info_firmware_v1_2 *gpu_info_fw = 1682 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data + 1683 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1684 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box; 1685 } 1686 break; 1687 } 1688 default: 1689 dev_err(adev->dev, 1690 "Unsupported gpu_info table %d\n", hdr->header.ucode_version); 1691 err = -EINVAL; 1692 goto out; 1693 } 1694 out: 1695 return err; 1696 } 1697 1698 /** 1699 * amdgpu_device_ip_early_init - run early init for hardware IPs 1700 * 1701 * @adev: amdgpu_device pointer 1702 * 1703 * Early initialization pass for hardware IPs. The hardware IPs that make 1704 * up each asic are discovered each IP's early_init callback is run. This 1705 * is the first stage in initializing the asic. 1706 * Returns 0 on success, negative error code on failure. 1707 */ 1708 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) 1709 { 1710 int i, r; 1711 1712 amdgpu_device_enable_virtual_display(adev); 1713 1714 switch (adev->asic_type) { 1715 case CHIP_TOPAZ: 1716 case CHIP_TONGA: 1717 case CHIP_FIJI: 1718 case CHIP_POLARIS10: 1719 case CHIP_POLARIS11: 1720 case CHIP_POLARIS12: 1721 case CHIP_VEGAM: 1722 case CHIP_CARRIZO: 1723 case CHIP_STONEY: 1724 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) 1725 adev->family = AMDGPU_FAMILY_CZ; 1726 else 1727 adev->family = AMDGPU_FAMILY_VI; 1728 1729 r = vi_set_ip_blocks(adev); 1730 if (r) 1731 return r; 1732 break; 1733 #ifdef CONFIG_DRM_AMDGPU_SI 1734 case CHIP_VERDE: 1735 case CHIP_TAHITI: 1736 case CHIP_PITCAIRN: 1737 case CHIP_OLAND: 1738 case CHIP_HAINAN: 1739 adev->family = AMDGPU_FAMILY_SI; 1740 r = si_set_ip_blocks(adev); 1741 if (r) 1742 return r; 1743 break; 1744 #endif 1745 #ifdef CONFIG_DRM_AMDGPU_CIK 1746 case CHIP_BONAIRE: 1747 case CHIP_HAWAII: 1748 case CHIP_KAVERI: 1749 case CHIP_KABINI: 1750 case CHIP_MULLINS: 1751 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII)) 1752 adev->family = AMDGPU_FAMILY_CI; 1753 else 1754 adev->family = AMDGPU_FAMILY_KV; 1755 1756 r = cik_set_ip_blocks(adev); 1757 if (r) 1758 return r; 1759 break; 1760 #endif 1761 case CHIP_VEGA10: 1762 case CHIP_VEGA12: 1763 case CHIP_VEGA20: 1764 case CHIP_RAVEN: 1765 case CHIP_ARCTURUS: 1766 case CHIP_RENOIR: 1767 if (adev->asic_type == CHIP_RAVEN || 1768 adev->asic_type == CHIP_RENOIR) 1769 adev->family = AMDGPU_FAMILY_RV; 1770 else 1771 adev->family = AMDGPU_FAMILY_AI; 1772 1773 r = soc15_set_ip_blocks(adev); 1774 if (r) 1775 return r; 1776 break; 1777 case CHIP_NAVI10: 1778 case CHIP_NAVI14: 1779 case CHIP_NAVI12: 1780 adev->family = AMDGPU_FAMILY_NV; 1781 1782 r = nv_set_ip_blocks(adev); 1783 if (r) 1784 return r; 1785 break; 1786 default: 1787 /* FIXME: not supported yet */ 1788 return -EINVAL; 1789 } 1790 1791 r = amdgpu_device_parse_gpu_info_fw(adev); 1792 if (r) 1793 return r; 1794 1795 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) 1796 amdgpu_discovery_get_gfx_info(adev); 1797 1798 amdgpu_amdkfd_device_probe(adev); 1799 1800 if (amdgpu_sriov_vf(adev)) { 1801 /* handle vbios stuff prior full access mode for new handshake */ 1802 if (adev->virt.req_init_data_ver == 1) { 1803 if (!amdgpu_get_bios(adev)) { 1804 DRM_ERROR("failed to get vbios\n"); 1805 return -EINVAL; 1806 } 1807 1808 r = amdgpu_atombios_init(adev); 1809 if (r) { 1810 dev_err(adev->dev, "amdgpu_atombios_init failed\n"); 1811 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); 1812 return r; 1813 } 1814 } 1815 } 1816 1817 /* we need to send REQ_GPU here for legacy handshaker otherwise the vbios 1818 * will not be prepared by host for this VF */ 1819 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) { 1820 r = amdgpu_virt_request_full_gpu(adev, true); 1821 if (r) 1822 return r; 1823 } 1824 1825 adev->pm.pp_feature = amdgpu_pp_feature_mask; 1826 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) 1827 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1828 1829 for (i = 0; i < adev->num_ip_blocks; i++) { 1830 if ((amdgpu_ip_block_mask & (1 << i)) == 0) { 1831 DRM_ERROR("disabled ip block: %d <%s>\n", 1832 i, adev->ip_blocks[i].version->funcs->name); 1833 adev->ip_blocks[i].status.valid = false; 1834 } else { 1835 if (adev->ip_blocks[i].version->funcs->early_init) { 1836 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev); 1837 if (r == -ENOENT) { 1838 adev->ip_blocks[i].status.valid = false; 1839 } else if (r) { 1840 DRM_ERROR("early_init of IP block <%s> failed %d\n", 1841 adev->ip_blocks[i].version->funcs->name, r); 1842 return r; 1843 } else { 1844 adev->ip_blocks[i].status.valid = true; 1845 } 1846 } else { 1847 adev->ip_blocks[i].status.valid = true; 1848 } 1849 } 1850 /* get the vbios after the asic_funcs are set up */ 1851 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { 1852 /* skip vbios handling for new handshake */ 1853 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1) 1854 continue; 1855 1856 /* Read BIOS */ 1857 if (!amdgpu_get_bios(adev)) 1858 return -EINVAL; 1859 1860 r = amdgpu_atombios_init(adev); 1861 if (r) { 1862 dev_err(adev->dev, "amdgpu_atombios_init failed\n"); 1863 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); 1864 return r; 1865 } 1866 } 1867 } 1868 1869 adev->cg_flags &= amdgpu_cg_mask; 1870 adev->pg_flags &= amdgpu_pg_mask; 1871 1872 return 0; 1873 } 1874 1875 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) 1876 { 1877 int i, r; 1878 1879 for (i = 0; i < adev->num_ip_blocks; i++) { 1880 if (!adev->ip_blocks[i].status.sw) 1881 continue; 1882 if (adev->ip_blocks[i].status.hw) 1883 continue; 1884 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 1885 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) || 1886 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 1887 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1888 if (r) { 1889 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1890 adev->ip_blocks[i].version->funcs->name, r); 1891 return r; 1892 } 1893 adev->ip_blocks[i].status.hw = true; 1894 } 1895 } 1896 1897 return 0; 1898 } 1899 1900 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) 1901 { 1902 int i, r; 1903 1904 for (i = 0; i < adev->num_ip_blocks; i++) { 1905 if (!adev->ip_blocks[i].status.sw) 1906 continue; 1907 if (adev->ip_blocks[i].status.hw) 1908 continue; 1909 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1910 if (r) { 1911 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1912 adev->ip_blocks[i].version->funcs->name, r); 1913 return r; 1914 } 1915 adev->ip_blocks[i].status.hw = true; 1916 } 1917 1918 return 0; 1919 } 1920 1921 static int amdgpu_device_fw_loading(struct amdgpu_device *adev) 1922 { 1923 int r = 0; 1924 int i; 1925 uint32_t smu_version; 1926 1927 if (adev->asic_type >= CHIP_VEGA10) { 1928 for (i = 0; i < adev->num_ip_blocks; i++) { 1929 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) 1930 continue; 1931 1932 /* no need to do the fw loading again if already done*/ 1933 if (adev->ip_blocks[i].status.hw == true) 1934 break; 1935 1936 if (adev->in_gpu_reset || adev->in_suspend) { 1937 r = adev->ip_blocks[i].version->funcs->resume(adev); 1938 if (r) { 1939 DRM_ERROR("resume of IP block <%s> failed %d\n", 1940 adev->ip_blocks[i].version->funcs->name, r); 1941 return r; 1942 } 1943 } else { 1944 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1945 if (r) { 1946 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1947 adev->ip_blocks[i].version->funcs->name, r); 1948 return r; 1949 } 1950 } 1951 1952 adev->ip_blocks[i].status.hw = true; 1953 break; 1954 } 1955 } 1956 1957 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA) 1958 r = amdgpu_pm_load_smu_firmware(adev, &smu_version); 1959 1960 return r; 1961 } 1962 1963 /** 1964 * amdgpu_device_ip_init - run init for hardware IPs 1965 * 1966 * @adev: amdgpu_device pointer 1967 * 1968 * Main initialization pass for hardware IPs. The list of all the hardware 1969 * IPs that make up the asic is walked and the sw_init and hw_init callbacks 1970 * are run. sw_init initializes the software state associated with each IP 1971 * and hw_init initializes the hardware associated with each IP. 1972 * Returns 0 on success, negative error code on failure. 1973 */ 1974 static int amdgpu_device_ip_init(struct amdgpu_device *adev) 1975 { 1976 int i, r; 1977 1978 r = amdgpu_ras_init(adev); 1979 if (r) 1980 return r; 1981 1982 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) { 1983 r = amdgpu_virt_request_full_gpu(adev, true); 1984 if (r) 1985 return -EAGAIN; 1986 } 1987 1988 for (i = 0; i < adev->num_ip_blocks; i++) { 1989 if (!adev->ip_blocks[i].status.valid) 1990 continue; 1991 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev); 1992 if (r) { 1993 DRM_ERROR("sw_init of IP block <%s> failed %d\n", 1994 adev->ip_blocks[i].version->funcs->name, r); 1995 goto init_failed; 1996 } 1997 adev->ip_blocks[i].status.sw = true; 1998 1999 /* need to do gmc hw init early so we can allocate gpu mem */ 2000 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 2001 r = amdgpu_device_vram_scratch_init(adev); 2002 if (r) { 2003 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r); 2004 goto init_failed; 2005 } 2006 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); 2007 if (r) { 2008 DRM_ERROR("hw_init %d failed %d\n", i, r); 2009 goto init_failed; 2010 } 2011 r = amdgpu_device_wb_init(adev); 2012 if (r) { 2013 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); 2014 goto init_failed; 2015 } 2016 adev->ip_blocks[i].status.hw = true; 2017 2018 /* right after GMC hw init, we create CSA */ 2019 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { 2020 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, 2021 AMDGPU_GEM_DOMAIN_VRAM, 2022 AMDGPU_CSA_SIZE); 2023 if (r) { 2024 DRM_ERROR("allocate CSA failed %d\n", r); 2025 goto init_failed; 2026 } 2027 } 2028 } 2029 } 2030 2031 if (amdgpu_sriov_vf(adev)) 2032 amdgpu_virt_init_data_exchange(adev); 2033 2034 r = amdgpu_ib_pool_init(adev); 2035 if (r) { 2036 dev_err(adev->dev, "IB initialization failed (%d).\n", r); 2037 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); 2038 goto init_failed; 2039 } 2040 2041 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ 2042 if (r) 2043 goto init_failed; 2044 2045 r = amdgpu_device_ip_hw_init_phase1(adev); 2046 if (r) 2047 goto init_failed; 2048 2049 r = amdgpu_device_fw_loading(adev); 2050 if (r) 2051 goto init_failed; 2052 2053 r = amdgpu_device_ip_hw_init_phase2(adev); 2054 if (r) 2055 goto init_failed; 2056 2057 /* 2058 * retired pages will be loaded from eeprom and reserved here, 2059 * it should be called after amdgpu_device_ip_hw_init_phase2 since 2060 * for some ASICs the RAS EEPROM code relies on SMU fully functioning 2061 * for I2C communication which only true at this point. 2062 * recovery_init may fail, but it can free all resources allocated by 2063 * itself and its failure should not stop amdgpu init process. 2064 * 2065 * Note: theoretically, this should be called before all vram allocations 2066 * to protect retired page from abusing 2067 */ 2068 amdgpu_ras_recovery_init(adev); 2069 2070 if (adev->gmc.xgmi.num_physical_nodes > 1) 2071 amdgpu_xgmi_add_device(adev); 2072 amdgpu_amdkfd_device_init(adev); 2073 2074 amdgpu_fru_get_product_info(adev); 2075 2076 init_failed: 2077 if (amdgpu_sriov_vf(adev)) 2078 amdgpu_virt_release_full_gpu(adev, true); 2079 2080 return r; 2081 } 2082 2083 /** 2084 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer 2085 * 2086 * @adev: amdgpu_device pointer 2087 * 2088 * Writes a reset magic value to the gart pointer in VRAM. The driver calls 2089 * this function before a GPU reset. If the value is retained after a 2090 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents. 2091 */ 2092 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) 2093 { 2094 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); 2095 } 2096 2097 /** 2098 * amdgpu_device_check_vram_lost - check if vram is valid 2099 * 2100 * @adev: amdgpu_device pointer 2101 * 2102 * Checks the reset magic value written to the gart pointer in VRAM. 2103 * The driver calls this after a GPU reset to see if the contents of 2104 * VRAM is lost or now. 2105 * returns true if vram is lost, false if not. 2106 */ 2107 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) 2108 { 2109 return !!memcmp(adev->gart.ptr, adev->reset_magic, 2110 AMDGPU_RESET_MAGIC_NUM); 2111 } 2112 2113 /** 2114 * amdgpu_device_set_cg_state - set clockgating for amdgpu device 2115 * 2116 * @adev: amdgpu_device pointer 2117 * @state: clockgating state (gate or ungate) 2118 * 2119 * The list of all the hardware IPs that make up the asic is walked and the 2120 * set_clockgating_state callbacks are run. 2121 * Late initialization pass enabling clockgating for hardware IPs. 2122 * Fini or suspend, pass disabling clockgating for hardware IPs. 2123 * Returns 0 on success, negative error code on failure. 2124 */ 2125 2126 static int amdgpu_device_set_cg_state(struct amdgpu_device *adev, 2127 enum amd_clockgating_state state) 2128 { 2129 int i, j, r; 2130 2131 if (amdgpu_emu_mode == 1) 2132 return 0; 2133 2134 for (j = 0; j < adev->num_ip_blocks; j++) { 2135 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 2136 if (!adev->ip_blocks[i].status.late_initialized) 2137 continue; 2138 /* skip CG for VCE/UVD, it's handled specially */ 2139 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 2140 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 2141 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 2142 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && 2143 adev->ip_blocks[i].version->funcs->set_clockgating_state) { 2144 /* enable clockgating to save power */ 2145 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, 2146 state); 2147 if (r) { 2148 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", 2149 adev->ip_blocks[i].version->funcs->name, r); 2150 return r; 2151 } 2152 } 2153 } 2154 2155 return 0; 2156 } 2157 2158 static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state) 2159 { 2160 int i, j, r; 2161 2162 if (amdgpu_emu_mode == 1) 2163 return 0; 2164 2165 for (j = 0; j < adev->num_ip_blocks; j++) { 2166 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 2167 if (!adev->ip_blocks[i].status.late_initialized) 2168 continue; 2169 /* skip CG for VCE/UVD, it's handled specially */ 2170 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 2171 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 2172 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 2173 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && 2174 adev->ip_blocks[i].version->funcs->set_powergating_state) { 2175 /* enable powergating to save power */ 2176 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, 2177 state); 2178 if (r) { 2179 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", 2180 adev->ip_blocks[i].version->funcs->name, r); 2181 return r; 2182 } 2183 } 2184 } 2185 return 0; 2186 } 2187 2188 static int amdgpu_device_enable_mgpu_fan_boost(void) 2189 { 2190 struct amdgpu_gpu_instance *gpu_ins; 2191 struct amdgpu_device *adev; 2192 int i, ret = 0; 2193 2194 mutex_lock(&mgpu_info.mutex); 2195 2196 /* 2197 * MGPU fan boost feature should be enabled 2198 * only when there are two or more dGPUs in 2199 * the system 2200 */ 2201 if (mgpu_info.num_dgpu < 2) 2202 goto out; 2203 2204 for (i = 0; i < mgpu_info.num_dgpu; i++) { 2205 gpu_ins = &(mgpu_info.gpu_ins[i]); 2206 adev = gpu_ins->adev; 2207 if (!(adev->flags & AMD_IS_APU) && 2208 !gpu_ins->mgpu_fan_enabled && 2209 adev->powerplay.pp_funcs && 2210 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) { 2211 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev); 2212 if (ret) 2213 break; 2214 2215 gpu_ins->mgpu_fan_enabled = 1; 2216 } 2217 } 2218 2219 out: 2220 mutex_unlock(&mgpu_info.mutex); 2221 2222 return ret; 2223 } 2224 2225 /** 2226 * amdgpu_device_ip_late_init - run late init for hardware IPs 2227 * 2228 * @adev: amdgpu_device pointer 2229 * 2230 * Late initialization pass for hardware IPs. The list of all the hardware 2231 * IPs that make up the asic is walked and the late_init callbacks are run. 2232 * late_init covers any special initialization that an IP requires 2233 * after all of the have been initialized or something that needs to happen 2234 * late in the init process. 2235 * Returns 0 on success, negative error code on failure. 2236 */ 2237 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) 2238 { 2239 struct amdgpu_gpu_instance *gpu_instance; 2240 int i = 0, r; 2241 2242 for (i = 0; i < adev->num_ip_blocks; i++) { 2243 if (!adev->ip_blocks[i].status.hw) 2244 continue; 2245 if (adev->ip_blocks[i].version->funcs->late_init) { 2246 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); 2247 if (r) { 2248 DRM_ERROR("late_init of IP block <%s> failed %d\n", 2249 adev->ip_blocks[i].version->funcs->name, r); 2250 return r; 2251 } 2252 } 2253 adev->ip_blocks[i].status.late_initialized = true; 2254 } 2255 2256 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); 2257 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); 2258 2259 amdgpu_device_fill_reset_magic(adev); 2260 2261 r = amdgpu_device_enable_mgpu_fan_boost(); 2262 if (r) 2263 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); 2264 2265 2266 if (adev->gmc.xgmi.num_physical_nodes > 1) { 2267 mutex_lock(&mgpu_info.mutex); 2268 2269 /* 2270 * Reset device p-state to low as this was booted with high. 2271 * 2272 * This should be performed only after all devices from the same 2273 * hive get initialized. 2274 * 2275 * However, it's unknown how many device in the hive in advance. 2276 * As this is counted one by one during devices initializations. 2277 * 2278 * So, we wait for all XGMI interlinked devices initialized. 2279 * This may bring some delays as those devices may come from 2280 * different hives. But that should be OK. 2281 */ 2282 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) { 2283 for (i = 0; i < mgpu_info.num_gpu; i++) { 2284 gpu_instance = &(mgpu_info.gpu_ins[i]); 2285 if (gpu_instance->adev->flags & AMD_IS_APU) 2286 continue; 2287 2288 r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0); 2289 if (r) { 2290 DRM_ERROR("pstate setting failed (%d).\n", r); 2291 break; 2292 } 2293 } 2294 } 2295 2296 mutex_unlock(&mgpu_info.mutex); 2297 } 2298 2299 return 0; 2300 } 2301 2302 /** 2303 * amdgpu_device_ip_fini - run fini for hardware IPs 2304 * 2305 * @adev: amdgpu_device pointer 2306 * 2307 * Main teardown pass for hardware IPs. The list of all the hardware 2308 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks 2309 * are run. hw_fini tears down the hardware associated with each IP 2310 * and sw_fini tears down any software state associated with each IP. 2311 * Returns 0 on success, negative error code on failure. 2312 */ 2313 static int amdgpu_device_ip_fini(struct amdgpu_device *adev) 2314 { 2315 int i, r; 2316 2317 amdgpu_ras_pre_fini(adev); 2318 2319 if (adev->gmc.xgmi.num_physical_nodes > 1) 2320 amdgpu_xgmi_remove_device(adev); 2321 2322 amdgpu_amdkfd_device_fini(adev); 2323 2324 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 2325 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 2326 2327 /* need to disable SMC first */ 2328 for (i = 0; i < adev->num_ip_blocks; i++) { 2329 if (!adev->ip_blocks[i].status.hw) 2330 continue; 2331 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 2332 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 2333 /* XXX handle errors */ 2334 if (r) { 2335 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 2336 adev->ip_blocks[i].version->funcs->name, r); 2337 } 2338 adev->ip_blocks[i].status.hw = false; 2339 break; 2340 } 2341 } 2342 2343 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2344 if (!adev->ip_blocks[i].status.hw) 2345 continue; 2346 2347 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 2348 /* XXX handle errors */ 2349 if (r) { 2350 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 2351 adev->ip_blocks[i].version->funcs->name, r); 2352 } 2353 2354 adev->ip_blocks[i].status.hw = false; 2355 } 2356 2357 2358 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2359 if (!adev->ip_blocks[i].status.sw) 2360 continue; 2361 2362 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 2363 amdgpu_ucode_free_bo(adev); 2364 amdgpu_free_static_csa(&adev->virt.csa_obj); 2365 amdgpu_device_wb_fini(adev); 2366 amdgpu_device_vram_scratch_fini(adev); 2367 amdgpu_ib_pool_fini(adev); 2368 } 2369 2370 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); 2371 /* XXX handle errors */ 2372 if (r) { 2373 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", 2374 adev->ip_blocks[i].version->funcs->name, r); 2375 } 2376 adev->ip_blocks[i].status.sw = false; 2377 adev->ip_blocks[i].status.valid = false; 2378 } 2379 2380 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2381 if (!adev->ip_blocks[i].status.late_initialized) 2382 continue; 2383 if (adev->ip_blocks[i].version->funcs->late_fini) 2384 adev->ip_blocks[i].version->funcs->late_fini((void *)adev); 2385 adev->ip_blocks[i].status.late_initialized = false; 2386 } 2387 2388 amdgpu_ras_fini(adev); 2389 2390 if (amdgpu_sriov_vf(adev)) 2391 if (amdgpu_virt_release_full_gpu(adev, false)) 2392 DRM_ERROR("failed to release exclusive mode on fini\n"); 2393 2394 return 0; 2395 } 2396 2397 /** 2398 * amdgpu_device_delayed_init_work_handler - work handler for IB tests 2399 * 2400 * @work: work_struct. 2401 */ 2402 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work) 2403 { 2404 struct amdgpu_device *adev = 2405 container_of(work, struct amdgpu_device, delayed_init_work.work); 2406 int r; 2407 2408 r = amdgpu_ib_ring_tests(adev); 2409 if (r) 2410 DRM_ERROR("ib ring test failed (%d).\n", r); 2411 } 2412 2413 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) 2414 { 2415 struct amdgpu_device *adev = 2416 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work); 2417 2418 mutex_lock(&adev->gfx.gfx_off_mutex); 2419 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) { 2420 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) 2421 adev->gfx.gfx_off_state = true; 2422 } 2423 mutex_unlock(&adev->gfx.gfx_off_mutex); 2424 } 2425 2426 /** 2427 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1) 2428 * 2429 * @adev: amdgpu_device pointer 2430 * 2431 * Main suspend function for hardware IPs. The list of all the hardware 2432 * IPs that make up the asic is walked, clockgating is disabled and the 2433 * suspend callbacks are run. suspend puts the hardware and software state 2434 * in each IP into a state suitable for suspend. 2435 * Returns 0 on success, negative error code on failure. 2436 */ 2437 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) 2438 { 2439 int i, r; 2440 2441 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 2442 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 2443 2444 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2445 if (!adev->ip_blocks[i].status.valid) 2446 continue; 2447 /* displays are handled separately */ 2448 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { 2449 /* XXX handle errors */ 2450 r = adev->ip_blocks[i].version->funcs->suspend(adev); 2451 /* XXX handle errors */ 2452 if (r) { 2453 DRM_ERROR("suspend of IP block <%s> failed %d\n", 2454 adev->ip_blocks[i].version->funcs->name, r); 2455 return r; 2456 } 2457 adev->ip_blocks[i].status.hw = false; 2458 } 2459 } 2460 2461 return 0; 2462 } 2463 2464 /** 2465 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2) 2466 * 2467 * @adev: amdgpu_device pointer 2468 * 2469 * Main suspend function for hardware IPs. The list of all the hardware 2470 * IPs that make up the asic is walked, clockgating is disabled and the 2471 * suspend callbacks are run. suspend puts the hardware and software state 2472 * in each IP into a state suitable for suspend. 2473 * Returns 0 on success, negative error code on failure. 2474 */ 2475 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) 2476 { 2477 int i, r; 2478 2479 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2480 if (!adev->ip_blocks[i].status.valid) 2481 continue; 2482 /* displays are handled in phase1 */ 2483 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) 2484 continue; 2485 /* PSP lost connection when err_event_athub occurs */ 2486 if (amdgpu_ras_intr_triggered() && 2487 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 2488 adev->ip_blocks[i].status.hw = false; 2489 continue; 2490 } 2491 /* XXX handle errors */ 2492 r = adev->ip_blocks[i].version->funcs->suspend(adev); 2493 /* XXX handle errors */ 2494 if (r) { 2495 DRM_ERROR("suspend of IP block <%s> failed %d\n", 2496 adev->ip_blocks[i].version->funcs->name, r); 2497 } 2498 adev->ip_blocks[i].status.hw = false; 2499 /* handle putting the SMC in the appropriate state */ 2500 if(!amdgpu_sriov_vf(adev)){ 2501 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 2502 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state); 2503 if (r) { 2504 DRM_ERROR("SMC failed to set mp1 state %d, %d\n", 2505 adev->mp1_state, r); 2506 return r; 2507 } 2508 } 2509 } 2510 adev->ip_blocks[i].status.hw = false; 2511 } 2512 2513 return 0; 2514 } 2515 2516 /** 2517 * amdgpu_device_ip_suspend - run suspend for hardware IPs 2518 * 2519 * @adev: amdgpu_device pointer 2520 * 2521 * Main suspend function for hardware IPs. The list of all the hardware 2522 * IPs that make up the asic is walked, clockgating is disabled and the 2523 * suspend callbacks are run. suspend puts the hardware and software state 2524 * in each IP into a state suitable for suspend. 2525 * Returns 0 on success, negative error code on failure. 2526 */ 2527 int amdgpu_device_ip_suspend(struct amdgpu_device *adev) 2528 { 2529 int r; 2530 2531 if (amdgpu_sriov_vf(adev)) 2532 amdgpu_virt_request_full_gpu(adev, false); 2533 2534 r = amdgpu_device_ip_suspend_phase1(adev); 2535 if (r) 2536 return r; 2537 r = amdgpu_device_ip_suspend_phase2(adev); 2538 2539 if (amdgpu_sriov_vf(adev)) 2540 amdgpu_virt_release_full_gpu(adev, false); 2541 2542 return r; 2543 } 2544 2545 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) 2546 { 2547 int i, r; 2548 2549 static enum amd_ip_block_type ip_order[] = { 2550 AMD_IP_BLOCK_TYPE_GMC, 2551 AMD_IP_BLOCK_TYPE_COMMON, 2552 AMD_IP_BLOCK_TYPE_PSP, 2553 AMD_IP_BLOCK_TYPE_IH, 2554 }; 2555 2556 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 2557 int j; 2558 struct amdgpu_ip_block *block; 2559 2560 for (j = 0; j < adev->num_ip_blocks; j++) { 2561 block = &adev->ip_blocks[j]; 2562 2563 block->status.hw = false; 2564 if (block->version->type != ip_order[i] || 2565 !block->status.valid) 2566 continue; 2567 2568 r = block->version->funcs->hw_init(adev); 2569 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 2570 if (r) 2571 return r; 2572 block->status.hw = true; 2573 } 2574 } 2575 2576 return 0; 2577 } 2578 2579 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) 2580 { 2581 int i, r; 2582 2583 static enum amd_ip_block_type ip_order[] = { 2584 AMD_IP_BLOCK_TYPE_SMC, 2585 AMD_IP_BLOCK_TYPE_DCE, 2586 AMD_IP_BLOCK_TYPE_GFX, 2587 AMD_IP_BLOCK_TYPE_SDMA, 2588 AMD_IP_BLOCK_TYPE_UVD, 2589 AMD_IP_BLOCK_TYPE_VCE, 2590 AMD_IP_BLOCK_TYPE_VCN 2591 }; 2592 2593 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 2594 int j; 2595 struct amdgpu_ip_block *block; 2596 2597 for (j = 0; j < adev->num_ip_blocks; j++) { 2598 block = &adev->ip_blocks[j]; 2599 2600 if (block->version->type != ip_order[i] || 2601 !block->status.valid || 2602 block->status.hw) 2603 continue; 2604 2605 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) 2606 r = block->version->funcs->resume(adev); 2607 else 2608 r = block->version->funcs->hw_init(adev); 2609 2610 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 2611 if (r) 2612 return r; 2613 block->status.hw = true; 2614 } 2615 } 2616 2617 return 0; 2618 } 2619 2620 /** 2621 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs 2622 * 2623 * @adev: amdgpu_device pointer 2624 * 2625 * First resume function for hardware IPs. The list of all the hardware 2626 * IPs that make up the asic is walked and the resume callbacks are run for 2627 * COMMON, GMC, and IH. resume puts the hardware into a functional state 2628 * after a suspend and updates the software state as necessary. This 2629 * function is also used for restoring the GPU after a GPU reset. 2630 * Returns 0 on success, negative error code on failure. 2631 */ 2632 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) 2633 { 2634 int i, r; 2635 2636 for (i = 0; i < adev->num_ip_blocks; i++) { 2637 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) 2638 continue; 2639 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2640 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2641 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 2642 2643 r = adev->ip_blocks[i].version->funcs->resume(adev); 2644 if (r) { 2645 DRM_ERROR("resume of IP block <%s> failed %d\n", 2646 adev->ip_blocks[i].version->funcs->name, r); 2647 return r; 2648 } 2649 adev->ip_blocks[i].status.hw = true; 2650 } 2651 } 2652 2653 return 0; 2654 } 2655 2656 /** 2657 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs 2658 * 2659 * @adev: amdgpu_device pointer 2660 * 2661 * First resume function for hardware IPs. The list of all the hardware 2662 * IPs that make up the asic is walked and the resume callbacks are run for 2663 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a 2664 * functional state after a suspend and updates the software state as 2665 * necessary. This function is also used for restoring the GPU after a GPU 2666 * reset. 2667 * Returns 0 on success, negative error code on failure. 2668 */ 2669 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) 2670 { 2671 int i, r; 2672 2673 for (i = 0; i < adev->num_ip_blocks; i++) { 2674 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) 2675 continue; 2676 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2677 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2678 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || 2679 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) 2680 continue; 2681 r = adev->ip_blocks[i].version->funcs->resume(adev); 2682 if (r) { 2683 DRM_ERROR("resume of IP block <%s> failed %d\n", 2684 adev->ip_blocks[i].version->funcs->name, r); 2685 return r; 2686 } 2687 adev->ip_blocks[i].status.hw = true; 2688 } 2689 2690 return 0; 2691 } 2692 2693 /** 2694 * amdgpu_device_ip_resume - run resume for hardware IPs 2695 * 2696 * @adev: amdgpu_device pointer 2697 * 2698 * Main resume function for hardware IPs. The hardware IPs 2699 * are split into two resume functions because they are 2700 * are also used in in recovering from a GPU reset and some additional 2701 * steps need to be take between them. In this case (S3/S4) they are 2702 * run sequentially. 2703 * Returns 0 on success, negative error code on failure. 2704 */ 2705 static int amdgpu_device_ip_resume(struct amdgpu_device *adev) 2706 { 2707 int r; 2708 2709 r = amdgpu_device_ip_resume_phase1(adev); 2710 if (r) 2711 return r; 2712 2713 r = amdgpu_device_fw_loading(adev); 2714 if (r) 2715 return r; 2716 2717 r = amdgpu_device_ip_resume_phase2(adev); 2718 2719 return r; 2720 } 2721 2722 /** 2723 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV 2724 * 2725 * @adev: amdgpu_device pointer 2726 * 2727 * Query the VBIOS data tables to determine if the board supports SR-IOV. 2728 */ 2729 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) 2730 { 2731 if (amdgpu_sriov_vf(adev)) { 2732 if (adev->is_atom_fw) { 2733 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev)) 2734 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 2735 } else { 2736 if (amdgpu_atombios_has_gpu_virtualization_table(adev)) 2737 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 2738 } 2739 2740 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)) 2741 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); 2742 } 2743 } 2744 2745 /** 2746 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic 2747 * 2748 * @asic_type: AMD asic type 2749 * 2750 * Check if there is DC (new modesetting infrastructre) support for an asic. 2751 * returns true if DC has support, false if not. 2752 */ 2753 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) 2754 { 2755 switch (asic_type) { 2756 #if defined(CONFIG_DRM_AMD_DC) 2757 case CHIP_BONAIRE: 2758 case CHIP_KAVERI: 2759 case CHIP_KABINI: 2760 case CHIP_MULLINS: 2761 /* 2762 * We have systems in the wild with these ASICs that require 2763 * LVDS and VGA support which is not supported with DC. 2764 * 2765 * Fallback to the non-DC driver here by default so as not to 2766 * cause regressions. 2767 */ 2768 return amdgpu_dc > 0; 2769 case CHIP_HAWAII: 2770 case CHIP_CARRIZO: 2771 case CHIP_STONEY: 2772 case CHIP_POLARIS10: 2773 case CHIP_POLARIS11: 2774 case CHIP_POLARIS12: 2775 case CHIP_VEGAM: 2776 case CHIP_TONGA: 2777 case CHIP_FIJI: 2778 case CHIP_VEGA10: 2779 case CHIP_VEGA12: 2780 case CHIP_VEGA20: 2781 #if defined(CONFIG_DRM_AMD_DC_DCN) 2782 case CHIP_RAVEN: 2783 case CHIP_NAVI10: 2784 case CHIP_NAVI14: 2785 case CHIP_NAVI12: 2786 case CHIP_RENOIR: 2787 #endif 2788 return amdgpu_dc != 0; 2789 #endif 2790 default: 2791 if (amdgpu_dc > 0) 2792 DRM_INFO("Display Core has been requested via kernel parameter " 2793 "but isn't supported by ASIC, ignoring\n"); 2794 return false; 2795 } 2796 } 2797 2798 /** 2799 * amdgpu_device_has_dc_support - check if dc is supported 2800 * 2801 * @adev: amdgpu_device_pointer 2802 * 2803 * Returns true for supported, false for not supported 2804 */ 2805 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) 2806 { 2807 if (amdgpu_sriov_vf(adev)) 2808 return false; 2809 2810 return amdgpu_device_asic_has_dc_support(adev->asic_type); 2811 } 2812 2813 2814 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) 2815 { 2816 struct amdgpu_device *adev = 2817 container_of(__work, struct amdgpu_device, xgmi_reset_work); 2818 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); 2819 2820 /* It's a bug to not have a hive within this function */ 2821 if (WARN_ON(!hive)) 2822 return; 2823 2824 /* 2825 * Use task barrier to synchronize all xgmi reset works across the 2826 * hive. task_barrier_enter and task_barrier_exit will block 2827 * until all the threads running the xgmi reset works reach 2828 * those points. task_barrier_full will do both blocks. 2829 */ 2830 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { 2831 2832 task_barrier_enter(&hive->tb); 2833 adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev); 2834 2835 if (adev->asic_reset_res) 2836 goto fail; 2837 2838 task_barrier_exit(&hive->tb); 2839 adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev); 2840 2841 if (adev->asic_reset_res) 2842 goto fail; 2843 2844 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count) 2845 adev->mmhub.funcs->reset_ras_error_count(adev); 2846 } else { 2847 2848 task_barrier_full(&hive->tb); 2849 adev->asic_reset_res = amdgpu_asic_reset(adev); 2850 } 2851 2852 fail: 2853 if (adev->asic_reset_res) 2854 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s", 2855 adev->asic_reset_res, adev->ddev->unique); 2856 } 2857 2858 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) 2859 { 2860 char *input = amdgpu_lockup_timeout; 2861 char *timeout_setting = NULL; 2862 int index = 0; 2863 long timeout; 2864 int ret = 0; 2865 2866 /* 2867 * By default timeout for non compute jobs is 10000. 2868 * And there is no timeout enforced on compute jobs. 2869 * In SR-IOV or passthrough mode, timeout for compute 2870 * jobs are 60000 by default. 2871 */ 2872 adev->gfx_timeout = msecs_to_jiffies(10000); 2873 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; 2874 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) 2875 adev->compute_timeout = msecs_to_jiffies(60000); 2876 else 2877 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; 2878 2879 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { 2880 while ((timeout_setting = strsep(&input, ",")) && 2881 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { 2882 ret = kstrtol(timeout_setting, 0, &timeout); 2883 if (ret) 2884 return ret; 2885 2886 if (timeout == 0) { 2887 index++; 2888 continue; 2889 } else if (timeout < 0) { 2890 timeout = MAX_SCHEDULE_TIMEOUT; 2891 } else { 2892 timeout = msecs_to_jiffies(timeout); 2893 } 2894 2895 switch (index++) { 2896 case 0: 2897 adev->gfx_timeout = timeout; 2898 break; 2899 case 1: 2900 adev->compute_timeout = timeout; 2901 break; 2902 case 2: 2903 adev->sdma_timeout = timeout; 2904 break; 2905 case 3: 2906 adev->video_timeout = timeout; 2907 break; 2908 default: 2909 break; 2910 } 2911 } 2912 /* 2913 * There is only one value specified and 2914 * it should apply to all non-compute jobs. 2915 */ 2916 if (index == 1) { 2917 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; 2918 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) 2919 adev->compute_timeout = adev->gfx_timeout; 2920 } 2921 } 2922 2923 return ret; 2924 } 2925 2926 /** 2927 * amdgpu_device_init - initialize the driver 2928 * 2929 * @adev: amdgpu_device pointer 2930 * @ddev: drm dev pointer 2931 * @pdev: pci dev pointer 2932 * @flags: driver flags 2933 * 2934 * Initializes the driver info and hw (all asics). 2935 * Returns 0 for success or an error on failure. 2936 * Called at driver startup. 2937 */ 2938 int amdgpu_device_init(struct amdgpu_device *adev, 2939 struct drm_device *ddev, 2940 struct pci_dev *pdev, 2941 uint32_t flags) 2942 { 2943 int r, i; 2944 bool boco = false; 2945 u32 max_MBps; 2946 2947 adev->shutdown = false; 2948 adev->dev = &pdev->dev; 2949 adev->ddev = ddev; 2950 adev->pdev = pdev; 2951 adev->flags = flags; 2952 2953 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST) 2954 adev->asic_type = amdgpu_force_asic_type; 2955 else 2956 adev->asic_type = flags & AMD_ASIC_MASK; 2957 2958 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; 2959 if (amdgpu_emu_mode == 1) 2960 adev->usec_timeout *= 10; 2961 adev->gmc.gart_size = 512 * 1024 * 1024; 2962 adev->accel_working = false; 2963 adev->num_rings = 0; 2964 adev->mman.buffer_funcs = NULL; 2965 adev->mman.buffer_funcs_ring = NULL; 2966 adev->vm_manager.vm_pte_funcs = NULL; 2967 adev->vm_manager.vm_pte_num_scheds = 0; 2968 adev->gmc.gmc_funcs = NULL; 2969 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); 2970 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 2971 2972 adev->smc_rreg = &amdgpu_invalid_rreg; 2973 adev->smc_wreg = &amdgpu_invalid_wreg; 2974 adev->pcie_rreg = &amdgpu_invalid_rreg; 2975 adev->pcie_wreg = &amdgpu_invalid_wreg; 2976 adev->pciep_rreg = &amdgpu_invalid_rreg; 2977 adev->pciep_wreg = &amdgpu_invalid_wreg; 2978 adev->pcie_rreg64 = &amdgpu_invalid_rreg64; 2979 adev->pcie_wreg64 = &amdgpu_invalid_wreg64; 2980 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; 2981 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; 2982 adev->didt_rreg = &amdgpu_invalid_rreg; 2983 adev->didt_wreg = &amdgpu_invalid_wreg; 2984 adev->gc_cac_rreg = &amdgpu_invalid_rreg; 2985 adev->gc_cac_wreg = &amdgpu_invalid_wreg; 2986 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg; 2987 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg; 2988 2989 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", 2990 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, 2991 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); 2992 2993 /* mutex initialization are all done here so we 2994 * can recall function without having locking issues */ 2995 atomic_set(&adev->irq.ih.lock, 0); 2996 mutex_init(&adev->firmware.mutex); 2997 mutex_init(&adev->pm.mutex); 2998 mutex_init(&adev->gfx.gpu_clock_mutex); 2999 mutex_init(&adev->srbm_mutex); 3000 mutex_init(&adev->gfx.pipe_reserve_mutex); 3001 mutex_init(&adev->gfx.gfx_off_mutex); 3002 mutex_init(&adev->grbm_idx_mutex); 3003 mutex_init(&adev->mn_lock); 3004 mutex_init(&adev->virt.vf_errors.lock); 3005 hash_init(adev->mn_hash); 3006 mutex_init(&adev->lock_reset); 3007 mutex_init(&adev->psp.mutex); 3008 mutex_init(&adev->notifier_lock); 3009 3010 r = amdgpu_device_check_arguments(adev); 3011 if (r) 3012 return r; 3013 3014 spin_lock_init(&adev->mmio_idx_lock); 3015 spin_lock_init(&adev->smc_idx_lock); 3016 spin_lock_init(&adev->pcie_idx_lock); 3017 spin_lock_init(&adev->uvd_ctx_idx_lock); 3018 spin_lock_init(&adev->didt_idx_lock); 3019 spin_lock_init(&adev->gc_cac_idx_lock); 3020 spin_lock_init(&adev->se_cac_idx_lock); 3021 spin_lock_init(&adev->audio_endpt_idx_lock); 3022 spin_lock_init(&adev->mm_stats.lock); 3023 3024 INIT_LIST_HEAD(&adev->shadow_list); 3025 mutex_init(&adev->shadow_list_lock); 3026 3027 INIT_LIST_HEAD(&adev->ring_lru_list); 3028 spin_lock_init(&adev->ring_lru_list_lock); 3029 3030 INIT_DELAYED_WORK(&adev->delayed_init_work, 3031 amdgpu_device_delayed_init_work_handler); 3032 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, 3033 amdgpu_device_delay_enable_gfx_off); 3034 3035 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); 3036 3037 adev->gfx.gfx_off_req_count = 1; 3038 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false; 3039 3040 /* Registers mapping */ 3041 /* TODO: block userspace mapping of io register */ 3042 if (adev->asic_type >= CHIP_BONAIRE) { 3043 adev->rmmio_base = pci_resource_start(adev->pdev, 5); 3044 adev->rmmio_size = pci_resource_len(adev->pdev, 5); 3045 } else { 3046 adev->rmmio_base = pci_resource_start(adev->pdev, 2); 3047 adev->rmmio_size = pci_resource_len(adev->pdev, 2); 3048 } 3049 3050 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size); 3051 if (adev->rmmio == NULL) { 3052 return -ENOMEM; 3053 } 3054 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); 3055 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); 3056 3057 /* io port mapping */ 3058 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { 3059 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) { 3060 adev->rio_mem_size = pci_resource_len(adev->pdev, i); 3061 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size); 3062 break; 3063 } 3064 } 3065 if (adev->rio_mem == NULL) 3066 DRM_INFO("PCI I/O BAR is not found.\n"); 3067 3068 /* enable PCIE atomic ops */ 3069 r = pci_enable_atomic_ops_to_root(adev->pdev, 3070 PCI_EXP_DEVCAP2_ATOMIC_COMP32 | 3071 PCI_EXP_DEVCAP2_ATOMIC_COMP64); 3072 if (r) { 3073 adev->have_atomics_support = false; 3074 DRM_INFO("PCIE atomic ops is not supported\n"); 3075 } else { 3076 adev->have_atomics_support = true; 3077 } 3078 3079 amdgpu_device_get_pcie_info(adev); 3080 3081 if (amdgpu_mcbp) 3082 DRM_INFO("MCBP is enabled\n"); 3083 3084 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10) 3085 adev->enable_mes = true; 3086 3087 /* detect hw virtualization here */ 3088 amdgpu_detect_virtualization(adev); 3089 3090 r = amdgpu_device_get_job_timeout_settings(adev); 3091 if (r) { 3092 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); 3093 return r; 3094 } 3095 3096 /* early init functions */ 3097 r = amdgpu_device_ip_early_init(adev); 3098 if (r) 3099 return r; 3100 3101 /* doorbell bar mapping and doorbell index init*/ 3102 amdgpu_device_doorbell_init(adev); 3103 3104 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ 3105 /* this will fail for cards that aren't VGA class devices, just 3106 * ignore it */ 3107 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); 3108 3109 if (amdgpu_device_supports_boco(ddev)) 3110 boco = true; 3111 if (amdgpu_has_atpx() && 3112 (amdgpu_is_atpx_hybrid() || 3113 amdgpu_has_atpx_dgpu_power_cntl()) && 3114 !pci_is_thunderbolt_attached(adev->pdev)) 3115 vga_switcheroo_register_client(adev->pdev, 3116 &amdgpu_switcheroo_ops, boco); 3117 if (boco) 3118 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); 3119 3120 if (amdgpu_emu_mode == 1) { 3121 /* post the asic on emulation mode */ 3122 emu_soc_asic_init(adev); 3123 goto fence_driver_init; 3124 } 3125 3126 /* detect if we are with an SRIOV vbios */ 3127 amdgpu_device_detect_sriov_bios(adev); 3128 3129 /* check if we need to reset the asic 3130 * E.g., driver was not cleanly unloaded previously, etc. 3131 */ 3132 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) { 3133 r = amdgpu_asic_reset(adev); 3134 if (r) { 3135 dev_err(adev->dev, "asic reset on init failed\n"); 3136 goto failed; 3137 } 3138 } 3139 3140 /* Post card if necessary */ 3141 if (amdgpu_device_need_post(adev)) { 3142 if (!adev->bios) { 3143 dev_err(adev->dev, "no vBIOS found\n"); 3144 r = -EINVAL; 3145 goto failed; 3146 } 3147 DRM_INFO("GPU posting now...\n"); 3148 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 3149 if (r) { 3150 dev_err(adev->dev, "gpu post error!\n"); 3151 goto failed; 3152 } 3153 } 3154 3155 if (adev->is_atom_fw) { 3156 /* Initialize clocks */ 3157 r = amdgpu_atomfirmware_get_clock_info(adev); 3158 if (r) { 3159 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); 3160 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 3161 goto failed; 3162 } 3163 } else { 3164 /* Initialize clocks */ 3165 r = amdgpu_atombios_get_clock_info(adev); 3166 if (r) { 3167 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); 3168 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 3169 goto failed; 3170 } 3171 /* init i2c buses */ 3172 if (!amdgpu_device_has_dc_support(adev)) 3173 amdgpu_atombios_i2c_init(adev); 3174 } 3175 3176 fence_driver_init: 3177 /* Fence driver */ 3178 r = amdgpu_fence_driver_init(adev); 3179 if (r) { 3180 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); 3181 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0); 3182 goto failed; 3183 } 3184 3185 /* init the mode config */ 3186 drm_mode_config_init(adev->ddev); 3187 3188 r = amdgpu_device_ip_init(adev); 3189 if (r) { 3190 /* failed in exclusive mode due to timeout */ 3191 if (amdgpu_sriov_vf(adev) && 3192 !amdgpu_sriov_runtime(adev) && 3193 amdgpu_virt_mmio_blocked(adev) && 3194 !amdgpu_virt_wait_reset(adev)) { 3195 dev_err(adev->dev, "VF exclusive mode timeout\n"); 3196 /* Don't send request since VF is inactive. */ 3197 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; 3198 adev->virt.ops = NULL; 3199 r = -EAGAIN; 3200 goto failed; 3201 } 3202 dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); 3203 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); 3204 goto failed; 3205 } 3206 3207 DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", 3208 adev->gfx.config.max_shader_engines, 3209 adev->gfx.config.max_sh_per_se, 3210 adev->gfx.config.max_cu_per_sh, 3211 adev->gfx.cu_info.number); 3212 3213 adev->accel_working = true; 3214 3215 amdgpu_vm_check_compute_bug(adev); 3216 3217 /* Initialize the buffer migration limit. */ 3218 if (amdgpu_moverate >= 0) 3219 max_MBps = amdgpu_moverate; 3220 else 3221 max_MBps = 8; /* Allow 8 MB/s. */ 3222 /* Get a log2 for easy divisions. */ 3223 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); 3224 3225 amdgpu_fbdev_init(adev); 3226 3227 r = amdgpu_pm_sysfs_init(adev); 3228 if (r) { 3229 adev->pm_sysfs_en = false; 3230 DRM_ERROR("registering pm debugfs failed (%d).\n", r); 3231 } else 3232 adev->pm_sysfs_en = true; 3233 3234 r = amdgpu_ucode_sysfs_init(adev); 3235 if (r) { 3236 adev->ucode_sysfs_en = false; 3237 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); 3238 } else 3239 adev->ucode_sysfs_en = true; 3240 3241 if ((amdgpu_testing & 1)) { 3242 if (adev->accel_working) 3243 amdgpu_test_moves(adev); 3244 else 3245 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n"); 3246 } 3247 if (amdgpu_benchmarking) { 3248 if (adev->accel_working) 3249 amdgpu_benchmark(adev, amdgpu_benchmarking); 3250 else 3251 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n"); 3252 } 3253 3254 /* 3255 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost. 3256 * Otherwise the mgpu fan boost feature will be skipped due to the 3257 * gpu instance is counted less. 3258 */ 3259 amdgpu_register_gpu_instance(adev); 3260 3261 /* enable clockgating, etc. after ib tests, etc. since some blocks require 3262 * explicit gating rather than handling it automatically. 3263 */ 3264 r = amdgpu_device_ip_late_init(adev); 3265 if (r) { 3266 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); 3267 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); 3268 goto failed; 3269 } 3270 3271 /* must succeed. */ 3272 amdgpu_ras_resume(adev); 3273 3274 queue_delayed_work(system_wq, &adev->delayed_init_work, 3275 msecs_to_jiffies(AMDGPU_RESUME_MS)); 3276 3277 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count); 3278 if (r) { 3279 dev_err(adev->dev, "Could not create pcie_replay_count"); 3280 return r; 3281 } 3282 3283 r = device_create_file(adev->dev, &dev_attr_product_name); 3284 if (r) { 3285 dev_err(adev->dev, "Could not create product_name"); 3286 return r; 3287 } 3288 3289 r = device_create_file(adev->dev, &dev_attr_product_number); 3290 if (r) { 3291 dev_err(adev->dev, "Could not create product_number"); 3292 return r; 3293 } 3294 3295 r = device_create_file(adev->dev, &dev_attr_serial_number); 3296 if (r) { 3297 dev_err(adev->dev, "Could not create serial_number"); 3298 return r; 3299 } 3300 3301 if (IS_ENABLED(CONFIG_PERF_EVENTS)) 3302 r = amdgpu_pmu_init(adev); 3303 if (r) 3304 dev_err(adev->dev, "amdgpu_pmu_init failed\n"); 3305 3306 return 0; 3307 3308 failed: 3309 amdgpu_vf_error_trans_all(adev); 3310 if (boco) 3311 vga_switcheroo_fini_domain_pm_ops(adev->dev); 3312 3313 return r; 3314 } 3315 3316 /** 3317 * amdgpu_device_fini - tear down the driver 3318 * 3319 * @adev: amdgpu_device pointer 3320 * 3321 * Tear down the driver info (all asics). 3322 * Called at driver shutdown. 3323 */ 3324 void amdgpu_device_fini(struct amdgpu_device *adev) 3325 { 3326 int r; 3327 3328 DRM_INFO("amdgpu: finishing device.\n"); 3329 flush_delayed_work(&adev->delayed_init_work); 3330 adev->shutdown = true; 3331 3332 /* make sure IB test finished before entering exclusive mode 3333 * to avoid preemption on IB test 3334 * */ 3335 if (amdgpu_sriov_vf(adev)) 3336 amdgpu_virt_request_full_gpu(adev, false); 3337 3338 /* disable all interrupts */ 3339 amdgpu_irq_disable_all(adev); 3340 if (adev->mode_info.mode_config_initialized){ 3341 if (!amdgpu_device_has_dc_support(adev)) 3342 drm_helper_force_disable_all(adev->ddev); 3343 else 3344 drm_atomic_helper_shutdown(adev->ddev); 3345 } 3346 amdgpu_fence_driver_fini(adev); 3347 if (adev->pm_sysfs_en) 3348 amdgpu_pm_sysfs_fini(adev); 3349 amdgpu_fbdev_fini(adev); 3350 r = amdgpu_device_ip_fini(adev); 3351 if (adev->firmware.gpu_info_fw) { 3352 release_firmware(adev->firmware.gpu_info_fw); 3353 adev->firmware.gpu_info_fw = NULL; 3354 } 3355 adev->accel_working = false; 3356 /* free i2c buses */ 3357 if (!amdgpu_device_has_dc_support(adev)) 3358 amdgpu_i2c_fini(adev); 3359 3360 if (amdgpu_emu_mode != 1) 3361 amdgpu_atombios_fini(adev); 3362 3363 kfree(adev->bios); 3364 adev->bios = NULL; 3365 if (amdgpu_has_atpx() && 3366 (amdgpu_is_atpx_hybrid() || 3367 amdgpu_has_atpx_dgpu_power_cntl()) && 3368 !pci_is_thunderbolt_attached(adev->pdev)) 3369 vga_switcheroo_unregister_client(adev->pdev); 3370 if (amdgpu_device_supports_boco(adev->ddev)) 3371 vga_switcheroo_fini_domain_pm_ops(adev->dev); 3372 vga_client_register(adev->pdev, NULL, NULL, NULL); 3373 if (adev->rio_mem) 3374 pci_iounmap(adev->pdev, adev->rio_mem); 3375 adev->rio_mem = NULL; 3376 iounmap(adev->rmmio); 3377 adev->rmmio = NULL; 3378 amdgpu_device_doorbell_fini(adev); 3379 3380 device_remove_file(adev->dev, &dev_attr_pcie_replay_count); 3381 if (adev->ucode_sysfs_en) 3382 amdgpu_ucode_sysfs_fini(adev); 3383 device_remove_file(adev->dev, &dev_attr_product_name); 3384 device_remove_file(adev->dev, &dev_attr_product_number); 3385 device_remove_file(adev->dev, &dev_attr_serial_number); 3386 if (IS_ENABLED(CONFIG_PERF_EVENTS)) 3387 amdgpu_pmu_fini(adev); 3388 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) 3389 amdgpu_discovery_fini(adev); 3390 } 3391 3392 3393 /* 3394 * Suspend & resume. 3395 */ 3396 /** 3397 * amdgpu_device_suspend - initiate device suspend 3398 * 3399 * @dev: drm dev pointer 3400 * @suspend: suspend state 3401 * @fbcon : notify the fbdev of suspend 3402 * 3403 * Puts the hw in the suspend state (all asics). 3404 * Returns 0 for success or an error on failure. 3405 * Called at driver suspend. 3406 */ 3407 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) 3408 { 3409 struct amdgpu_device *adev; 3410 struct drm_crtc *crtc; 3411 struct drm_connector *connector; 3412 struct drm_connector_list_iter iter; 3413 int r; 3414 3415 if (dev == NULL || dev->dev_private == NULL) { 3416 return -ENODEV; 3417 } 3418 3419 adev = dev->dev_private; 3420 3421 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 3422 return 0; 3423 3424 adev->in_suspend = true; 3425 drm_kms_helper_poll_disable(dev); 3426 3427 if (fbcon) 3428 amdgpu_fbdev_set_suspend(adev, 1); 3429 3430 cancel_delayed_work_sync(&adev->delayed_init_work); 3431 3432 if (!amdgpu_device_has_dc_support(adev)) { 3433 /* turn off display hw */ 3434 drm_modeset_lock_all(dev); 3435 drm_connector_list_iter_begin(dev, &iter); 3436 drm_for_each_connector_iter(connector, &iter) 3437 drm_helper_connector_dpms(connector, 3438 DRM_MODE_DPMS_OFF); 3439 drm_connector_list_iter_end(&iter); 3440 drm_modeset_unlock_all(dev); 3441 /* unpin the front buffers and cursors */ 3442 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 3443 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 3444 struct drm_framebuffer *fb = crtc->primary->fb; 3445 struct amdgpu_bo *robj; 3446 3447 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) { 3448 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 3449 r = amdgpu_bo_reserve(aobj, true); 3450 if (r == 0) { 3451 amdgpu_bo_unpin(aobj); 3452 amdgpu_bo_unreserve(aobj); 3453 } 3454 } 3455 3456 if (fb == NULL || fb->obj[0] == NULL) { 3457 continue; 3458 } 3459 robj = gem_to_amdgpu_bo(fb->obj[0]); 3460 /* don't unpin kernel fb objects */ 3461 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { 3462 r = amdgpu_bo_reserve(robj, true); 3463 if (r == 0) { 3464 amdgpu_bo_unpin(robj); 3465 amdgpu_bo_unreserve(robj); 3466 } 3467 } 3468 } 3469 } 3470 3471 amdgpu_amdkfd_suspend(adev, !fbcon); 3472 3473 amdgpu_ras_suspend(adev); 3474 3475 r = amdgpu_device_ip_suspend_phase1(adev); 3476 3477 /* evict vram memory */ 3478 amdgpu_bo_evict_vram(adev); 3479 3480 amdgpu_fence_driver_suspend(adev); 3481 3482 r = amdgpu_device_ip_suspend_phase2(adev); 3483 3484 /* evict remaining vram memory 3485 * This second call to evict vram is to evict the gart page table 3486 * using the CPU. 3487 */ 3488 amdgpu_bo_evict_vram(adev); 3489 3490 return 0; 3491 } 3492 3493 /** 3494 * amdgpu_device_resume - initiate device resume 3495 * 3496 * @dev: drm dev pointer 3497 * @resume: resume state 3498 * @fbcon : notify the fbdev of resume 3499 * 3500 * Bring the hw back to operating state (all asics). 3501 * Returns 0 for success or an error on failure. 3502 * Called at driver resume. 3503 */ 3504 int amdgpu_device_resume(struct drm_device *dev, bool fbcon) 3505 { 3506 struct drm_connector *connector; 3507 struct drm_connector_list_iter iter; 3508 struct amdgpu_device *adev = dev->dev_private; 3509 struct drm_crtc *crtc; 3510 int r = 0; 3511 3512 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 3513 return 0; 3514 3515 /* post card */ 3516 if (amdgpu_device_need_post(adev)) { 3517 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 3518 if (r) 3519 DRM_ERROR("amdgpu asic init failed\n"); 3520 } 3521 3522 r = amdgpu_device_ip_resume(adev); 3523 if (r) { 3524 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r); 3525 return r; 3526 } 3527 amdgpu_fence_driver_resume(adev); 3528 3529 3530 r = amdgpu_device_ip_late_init(adev); 3531 if (r) 3532 return r; 3533 3534 queue_delayed_work(system_wq, &adev->delayed_init_work, 3535 msecs_to_jiffies(AMDGPU_RESUME_MS)); 3536 3537 if (!amdgpu_device_has_dc_support(adev)) { 3538 /* pin cursors */ 3539 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 3540 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 3541 3542 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) { 3543 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 3544 r = amdgpu_bo_reserve(aobj, true); 3545 if (r == 0) { 3546 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); 3547 if (r != 0) 3548 DRM_ERROR("Failed to pin cursor BO (%d)\n", r); 3549 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj); 3550 amdgpu_bo_unreserve(aobj); 3551 } 3552 } 3553 } 3554 } 3555 r = amdgpu_amdkfd_resume(adev, !fbcon); 3556 if (r) 3557 return r; 3558 3559 /* Make sure IB tests flushed */ 3560 flush_delayed_work(&adev->delayed_init_work); 3561 3562 /* blat the mode back in */ 3563 if (fbcon) { 3564 if (!amdgpu_device_has_dc_support(adev)) { 3565 /* pre DCE11 */ 3566 drm_helper_resume_force_mode(dev); 3567 3568 /* turn on display hw */ 3569 drm_modeset_lock_all(dev); 3570 3571 drm_connector_list_iter_begin(dev, &iter); 3572 drm_for_each_connector_iter(connector, &iter) 3573 drm_helper_connector_dpms(connector, 3574 DRM_MODE_DPMS_ON); 3575 drm_connector_list_iter_end(&iter); 3576 3577 drm_modeset_unlock_all(dev); 3578 } 3579 amdgpu_fbdev_set_suspend(adev, 0); 3580 } 3581 3582 drm_kms_helper_poll_enable(dev); 3583 3584 amdgpu_ras_resume(adev); 3585 3586 /* 3587 * Most of the connector probing functions try to acquire runtime pm 3588 * refs to ensure that the GPU is powered on when connector polling is 3589 * performed. Since we're calling this from a runtime PM callback, 3590 * trying to acquire rpm refs will cause us to deadlock. 3591 * 3592 * Since we're guaranteed to be holding the rpm lock, it's safe to 3593 * temporarily disable the rpm helpers so this doesn't deadlock us. 3594 */ 3595 #ifdef CONFIG_PM 3596 dev->dev->power.disable_depth++; 3597 #endif 3598 if (!amdgpu_device_has_dc_support(adev)) 3599 drm_helper_hpd_irq_event(dev); 3600 else 3601 drm_kms_helper_hotplug_event(dev); 3602 #ifdef CONFIG_PM 3603 dev->dev->power.disable_depth--; 3604 #endif 3605 adev->in_suspend = false; 3606 3607 return 0; 3608 } 3609 3610 /** 3611 * amdgpu_device_ip_check_soft_reset - did soft reset succeed 3612 * 3613 * @adev: amdgpu_device pointer 3614 * 3615 * The list of all the hardware IPs that make up the asic is walked and 3616 * the check_soft_reset callbacks are run. check_soft_reset determines 3617 * if the asic is still hung or not. 3618 * Returns true if any of the IPs are still in a hung state, false if not. 3619 */ 3620 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) 3621 { 3622 int i; 3623 bool asic_hang = false; 3624 3625 if (amdgpu_sriov_vf(adev)) 3626 return true; 3627 3628 if (amdgpu_asic_need_full_reset(adev)) 3629 return true; 3630 3631 for (i = 0; i < adev->num_ip_blocks; i++) { 3632 if (!adev->ip_blocks[i].status.valid) 3633 continue; 3634 if (adev->ip_blocks[i].version->funcs->check_soft_reset) 3635 adev->ip_blocks[i].status.hang = 3636 adev->ip_blocks[i].version->funcs->check_soft_reset(adev); 3637 if (adev->ip_blocks[i].status.hang) { 3638 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name); 3639 asic_hang = true; 3640 } 3641 } 3642 return asic_hang; 3643 } 3644 3645 /** 3646 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset 3647 * 3648 * @adev: amdgpu_device pointer 3649 * 3650 * The list of all the hardware IPs that make up the asic is walked and the 3651 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset 3652 * handles any IP specific hardware or software state changes that are 3653 * necessary for a soft reset to succeed. 3654 * Returns 0 on success, negative error code on failure. 3655 */ 3656 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) 3657 { 3658 int i, r = 0; 3659 3660 for (i = 0; i < adev->num_ip_blocks; i++) { 3661 if (!adev->ip_blocks[i].status.valid) 3662 continue; 3663 if (adev->ip_blocks[i].status.hang && 3664 adev->ip_blocks[i].version->funcs->pre_soft_reset) { 3665 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev); 3666 if (r) 3667 return r; 3668 } 3669 } 3670 3671 return 0; 3672 } 3673 3674 /** 3675 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed 3676 * 3677 * @adev: amdgpu_device pointer 3678 * 3679 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu 3680 * reset is necessary to recover. 3681 * Returns true if a full asic reset is required, false if not. 3682 */ 3683 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) 3684 { 3685 int i; 3686 3687 if (amdgpu_asic_need_full_reset(adev)) 3688 return true; 3689 3690 for (i = 0; i < adev->num_ip_blocks; i++) { 3691 if (!adev->ip_blocks[i].status.valid) 3692 continue; 3693 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) || 3694 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) || 3695 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) || 3696 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) || 3697 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 3698 if (adev->ip_blocks[i].status.hang) { 3699 DRM_INFO("Some block need full reset!\n"); 3700 return true; 3701 } 3702 } 3703 } 3704 return false; 3705 } 3706 3707 /** 3708 * amdgpu_device_ip_soft_reset - do a soft reset 3709 * 3710 * @adev: amdgpu_device pointer 3711 * 3712 * The list of all the hardware IPs that make up the asic is walked and the 3713 * soft_reset callbacks are run if the block is hung. soft_reset handles any 3714 * IP specific hardware or software state changes that are necessary to soft 3715 * reset the IP. 3716 * Returns 0 on success, negative error code on failure. 3717 */ 3718 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) 3719 { 3720 int i, r = 0; 3721 3722 for (i = 0; i < adev->num_ip_blocks; i++) { 3723 if (!adev->ip_blocks[i].status.valid) 3724 continue; 3725 if (adev->ip_blocks[i].status.hang && 3726 adev->ip_blocks[i].version->funcs->soft_reset) { 3727 r = adev->ip_blocks[i].version->funcs->soft_reset(adev); 3728 if (r) 3729 return r; 3730 } 3731 } 3732 3733 return 0; 3734 } 3735 3736 /** 3737 * amdgpu_device_ip_post_soft_reset - clean up from soft reset 3738 * 3739 * @adev: amdgpu_device pointer 3740 * 3741 * The list of all the hardware IPs that make up the asic is walked and the 3742 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset 3743 * handles any IP specific hardware or software state changes that are 3744 * necessary after the IP has been soft reset. 3745 * Returns 0 on success, negative error code on failure. 3746 */ 3747 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) 3748 { 3749 int i, r = 0; 3750 3751 for (i = 0; i < adev->num_ip_blocks; i++) { 3752 if (!adev->ip_blocks[i].status.valid) 3753 continue; 3754 if (adev->ip_blocks[i].status.hang && 3755 adev->ip_blocks[i].version->funcs->post_soft_reset) 3756 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev); 3757 if (r) 3758 return r; 3759 } 3760 3761 return 0; 3762 } 3763 3764 /** 3765 * amdgpu_device_recover_vram - Recover some VRAM contents 3766 * 3767 * @adev: amdgpu_device pointer 3768 * 3769 * Restores the contents of VRAM buffers from the shadows in GTT. Used to 3770 * restore things like GPUVM page tables after a GPU reset where 3771 * the contents of VRAM might be lost. 3772 * 3773 * Returns: 3774 * 0 on success, negative error code on failure. 3775 */ 3776 static int amdgpu_device_recover_vram(struct amdgpu_device *adev) 3777 { 3778 struct dma_fence *fence = NULL, *next = NULL; 3779 struct amdgpu_bo *shadow; 3780 long r = 1, tmo; 3781 3782 if (amdgpu_sriov_runtime(adev)) 3783 tmo = msecs_to_jiffies(8000); 3784 else 3785 tmo = msecs_to_jiffies(100); 3786 3787 DRM_INFO("recover vram bo from shadow start\n"); 3788 mutex_lock(&adev->shadow_list_lock); 3789 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) { 3790 3791 /* No need to recover an evicted BO */ 3792 if (shadow->tbo.mem.mem_type != TTM_PL_TT || 3793 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET || 3794 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM) 3795 continue; 3796 3797 r = amdgpu_bo_restore_shadow(shadow, &next); 3798 if (r) 3799 break; 3800 3801 if (fence) { 3802 tmo = dma_fence_wait_timeout(fence, false, tmo); 3803 dma_fence_put(fence); 3804 fence = next; 3805 if (tmo == 0) { 3806 r = -ETIMEDOUT; 3807 break; 3808 } else if (tmo < 0) { 3809 r = tmo; 3810 break; 3811 } 3812 } else { 3813 fence = next; 3814 } 3815 } 3816 mutex_unlock(&adev->shadow_list_lock); 3817 3818 if (fence) 3819 tmo = dma_fence_wait_timeout(fence, false, tmo); 3820 dma_fence_put(fence); 3821 3822 if (r < 0 || tmo <= 0) { 3823 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo); 3824 return -EIO; 3825 } 3826 3827 DRM_INFO("recover vram bo from shadow done\n"); 3828 return 0; 3829 } 3830 3831 3832 /** 3833 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf 3834 * 3835 * @adev: amdgpu device pointer 3836 * @from_hypervisor: request from hypervisor 3837 * 3838 * do VF FLR and reinitialize Asic 3839 * return 0 means succeeded otherwise failed 3840 */ 3841 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, 3842 bool from_hypervisor) 3843 { 3844 int r; 3845 3846 if (from_hypervisor) 3847 r = amdgpu_virt_request_full_gpu(adev, true); 3848 else 3849 r = amdgpu_virt_reset_gpu(adev); 3850 if (r) 3851 return r; 3852 3853 amdgpu_amdkfd_pre_reset(adev); 3854 3855 /* Resume IP prior to SMC */ 3856 r = amdgpu_device_ip_reinit_early_sriov(adev); 3857 if (r) 3858 goto error; 3859 3860 amdgpu_virt_init_data_exchange(adev); 3861 /* we need recover gart prior to run SMC/CP/SDMA resume */ 3862 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]); 3863 3864 r = amdgpu_device_fw_loading(adev); 3865 if (r) 3866 return r; 3867 3868 /* now we are okay to resume SMC/CP/SDMA */ 3869 r = amdgpu_device_ip_reinit_late_sriov(adev); 3870 if (r) 3871 goto error; 3872 3873 amdgpu_irq_gpu_reset_resume_helper(adev); 3874 r = amdgpu_ib_ring_tests(adev); 3875 amdgpu_amdkfd_post_reset(adev); 3876 3877 error: 3878 amdgpu_virt_release_full_gpu(adev, true); 3879 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { 3880 amdgpu_inc_vram_lost(adev); 3881 r = amdgpu_device_recover_vram(adev); 3882 } 3883 3884 return r; 3885 } 3886 3887 /** 3888 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery 3889 * 3890 * @adev: amdgpu device pointer 3891 * 3892 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover 3893 * a hung GPU. 3894 */ 3895 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) 3896 { 3897 if (!amdgpu_device_ip_check_soft_reset(adev)) { 3898 DRM_INFO("Timeout, but no hardware hang detected.\n"); 3899 return false; 3900 } 3901 3902 if (amdgpu_gpu_recovery == 0) 3903 goto disabled; 3904 3905 if (amdgpu_sriov_vf(adev)) 3906 return true; 3907 3908 if (amdgpu_gpu_recovery == -1) { 3909 switch (adev->asic_type) { 3910 case CHIP_BONAIRE: 3911 case CHIP_HAWAII: 3912 case CHIP_TOPAZ: 3913 case CHIP_TONGA: 3914 case CHIP_FIJI: 3915 case CHIP_POLARIS10: 3916 case CHIP_POLARIS11: 3917 case CHIP_POLARIS12: 3918 case CHIP_VEGAM: 3919 case CHIP_VEGA20: 3920 case CHIP_VEGA10: 3921 case CHIP_VEGA12: 3922 case CHIP_RAVEN: 3923 case CHIP_ARCTURUS: 3924 case CHIP_RENOIR: 3925 case CHIP_NAVI10: 3926 case CHIP_NAVI14: 3927 case CHIP_NAVI12: 3928 break; 3929 default: 3930 goto disabled; 3931 } 3932 } 3933 3934 return true; 3935 3936 disabled: 3937 DRM_INFO("GPU recovery disabled.\n"); 3938 return false; 3939 } 3940 3941 3942 static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, 3943 struct amdgpu_job *job, 3944 bool *need_full_reset_arg) 3945 { 3946 int i, r = 0; 3947 bool need_full_reset = *need_full_reset_arg; 3948 3949 /* block all schedulers and reset given job's ring */ 3950 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 3951 struct amdgpu_ring *ring = adev->rings[i]; 3952 3953 if (!ring || !ring->sched.thread) 3954 continue; 3955 3956 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 3957 amdgpu_fence_driver_force_completion(ring); 3958 } 3959 3960 if(job) 3961 drm_sched_increase_karma(&job->base); 3962 3963 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */ 3964 if (!amdgpu_sriov_vf(adev)) { 3965 3966 if (!need_full_reset) 3967 need_full_reset = amdgpu_device_ip_need_full_reset(adev); 3968 3969 if (!need_full_reset) { 3970 amdgpu_device_ip_pre_soft_reset(adev); 3971 r = amdgpu_device_ip_soft_reset(adev); 3972 amdgpu_device_ip_post_soft_reset(adev); 3973 if (r || amdgpu_device_ip_check_soft_reset(adev)) { 3974 DRM_INFO("soft reset failed, will fallback to full reset!\n"); 3975 need_full_reset = true; 3976 } 3977 } 3978 3979 if (need_full_reset) 3980 r = amdgpu_device_ip_suspend(adev); 3981 3982 *need_full_reset_arg = need_full_reset; 3983 } 3984 3985 return r; 3986 } 3987 3988 static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, 3989 struct list_head *device_list_handle, 3990 bool *need_full_reset_arg) 3991 { 3992 struct amdgpu_device *tmp_adev = NULL; 3993 bool need_full_reset = *need_full_reset_arg, vram_lost = false; 3994 int r = 0; 3995 3996 /* 3997 * ASIC reset has to be done on all HGMI hive nodes ASAP 3998 * to allow proper links negotiation in FW (within 1 sec) 3999 */ 4000 if (need_full_reset) { 4001 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4002 /* For XGMI run all resets in parallel to speed up the process */ 4003 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 4004 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work)) 4005 r = -EALREADY; 4006 } else 4007 r = amdgpu_asic_reset(tmp_adev); 4008 4009 if (r) { 4010 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s", 4011 r, tmp_adev->ddev->unique); 4012 break; 4013 } 4014 } 4015 4016 /* For XGMI wait for all resets to complete before proceed */ 4017 if (!r) { 4018 list_for_each_entry(tmp_adev, device_list_handle, 4019 gmc.xgmi.head) { 4020 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 4021 flush_work(&tmp_adev->xgmi_reset_work); 4022 r = tmp_adev->asic_reset_res; 4023 if (r) 4024 break; 4025 } 4026 } 4027 } 4028 } 4029 4030 if (!r && amdgpu_ras_intr_triggered()) { 4031 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4032 if (tmp_adev->mmhub.funcs && 4033 tmp_adev->mmhub.funcs->reset_ras_error_count) 4034 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev); 4035 } 4036 4037 amdgpu_ras_intr_cleared(); 4038 } 4039 4040 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4041 if (need_full_reset) { 4042 /* post card */ 4043 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context)) 4044 DRM_WARN("asic atom init failed!"); 4045 4046 if (!r) { 4047 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); 4048 r = amdgpu_device_ip_resume_phase1(tmp_adev); 4049 if (r) 4050 goto out; 4051 4052 vram_lost = amdgpu_device_check_vram_lost(tmp_adev); 4053 if (vram_lost) { 4054 DRM_INFO("VRAM is lost due to GPU reset!\n"); 4055 amdgpu_inc_vram_lost(tmp_adev); 4056 } 4057 4058 r = amdgpu_gtt_mgr_recover( 4059 &tmp_adev->mman.bdev.man[TTM_PL_TT]); 4060 if (r) 4061 goto out; 4062 4063 r = amdgpu_device_fw_loading(tmp_adev); 4064 if (r) 4065 return r; 4066 4067 r = amdgpu_device_ip_resume_phase2(tmp_adev); 4068 if (r) 4069 goto out; 4070 4071 if (vram_lost) 4072 amdgpu_device_fill_reset_magic(tmp_adev); 4073 4074 /* 4075 * Add this ASIC as tracked as reset was already 4076 * complete successfully. 4077 */ 4078 amdgpu_register_gpu_instance(tmp_adev); 4079 4080 r = amdgpu_device_ip_late_init(tmp_adev); 4081 if (r) 4082 goto out; 4083 4084 amdgpu_fbdev_set_suspend(tmp_adev, 0); 4085 4086 /* must succeed. */ 4087 amdgpu_ras_resume(tmp_adev); 4088 4089 /* Update PSP FW topology after reset */ 4090 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1) 4091 r = amdgpu_xgmi_update_topology(hive, tmp_adev); 4092 } 4093 } 4094 4095 4096 out: 4097 if (!r) { 4098 amdgpu_irq_gpu_reset_resume_helper(tmp_adev); 4099 r = amdgpu_ib_ring_tests(tmp_adev); 4100 if (r) { 4101 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r); 4102 r = amdgpu_device_ip_suspend(tmp_adev); 4103 need_full_reset = true; 4104 r = -EAGAIN; 4105 goto end; 4106 } 4107 } 4108 4109 if (!r) 4110 r = amdgpu_device_recover_vram(tmp_adev); 4111 else 4112 tmp_adev->asic_reset_res = r; 4113 } 4114 4115 end: 4116 *need_full_reset_arg = need_full_reset; 4117 return r; 4118 } 4119 4120 static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock) 4121 { 4122 if (trylock) { 4123 if (!mutex_trylock(&adev->lock_reset)) 4124 return false; 4125 } else 4126 mutex_lock(&adev->lock_reset); 4127 4128 atomic_inc(&adev->gpu_reset_counter); 4129 adev->in_gpu_reset = true; 4130 switch (amdgpu_asic_reset_method(adev)) { 4131 case AMD_RESET_METHOD_MODE1: 4132 adev->mp1_state = PP_MP1_STATE_SHUTDOWN; 4133 break; 4134 case AMD_RESET_METHOD_MODE2: 4135 adev->mp1_state = PP_MP1_STATE_RESET; 4136 break; 4137 default: 4138 adev->mp1_state = PP_MP1_STATE_NONE; 4139 break; 4140 } 4141 4142 return true; 4143 } 4144 4145 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) 4146 { 4147 amdgpu_vf_error_trans_all(adev); 4148 adev->mp1_state = PP_MP1_STATE_NONE; 4149 adev->in_gpu_reset = false; 4150 mutex_unlock(&adev->lock_reset); 4151 } 4152 4153 /** 4154 * amdgpu_device_gpu_recover - reset the asic and recover scheduler 4155 * 4156 * @adev: amdgpu device pointer 4157 * @job: which job trigger hang 4158 * 4159 * Attempt to reset the GPU if it has hung (all asics). 4160 * Attempt to do soft-reset or full-reset and reinitialize Asic 4161 * Returns 0 for success or an error on failure. 4162 */ 4163 4164 int amdgpu_device_gpu_recover(struct amdgpu_device *adev, 4165 struct amdgpu_job *job) 4166 { 4167 struct list_head device_list, *device_list_handle = NULL; 4168 bool need_full_reset, job_signaled; 4169 struct amdgpu_hive_info *hive = NULL; 4170 struct amdgpu_device *tmp_adev = NULL; 4171 int i, r = 0; 4172 bool in_ras_intr = amdgpu_ras_intr_triggered(); 4173 bool use_baco = 4174 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ? 4175 true : false; 4176 4177 /* 4178 * Flush RAM to disk so that after reboot 4179 * the user can read log and see why the system rebooted. 4180 */ 4181 if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) { 4182 4183 DRM_WARN("Emergency reboot."); 4184 4185 ksys_sync_helper(); 4186 emergency_restart(); 4187 } 4188 4189 need_full_reset = job_signaled = false; 4190 INIT_LIST_HEAD(&device_list); 4191 4192 amdgpu_ras_set_error_query_ready(adev, false); 4193 4194 dev_info(adev->dev, "GPU %s begin!\n", 4195 (in_ras_intr && !use_baco) ? "jobs stop":"reset"); 4196 4197 cancel_delayed_work_sync(&adev->delayed_init_work); 4198 4199 hive = amdgpu_get_xgmi_hive(adev, false); 4200 4201 /* 4202 * Here we trylock to avoid chain of resets executing from 4203 * either trigger by jobs on different adevs in XGMI hive or jobs on 4204 * different schedulers for same device while this TO handler is running. 4205 * We always reset all schedulers for device and all devices for XGMI 4206 * hive so that should take care of them too. 4207 */ 4208 4209 if (hive && !mutex_trylock(&hive->reset_lock)) { 4210 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", 4211 job ? job->base.id : -1, hive->hive_id); 4212 return 0; 4213 } 4214 4215 /* Start with adev pre asic reset first for soft reset check.*/ 4216 if (!amdgpu_device_lock_adev(adev, !hive)) { 4217 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress", 4218 job ? job->base.id : -1); 4219 return 0; 4220 } 4221 4222 /* Block kfd: SRIOV would do it separately */ 4223 if (!amdgpu_sriov_vf(adev)) 4224 amdgpu_amdkfd_pre_reset(adev); 4225 4226 /* Build list of devices to reset */ 4227 if (adev->gmc.xgmi.num_physical_nodes > 1) { 4228 if (!hive) { 4229 /*unlock kfd: SRIOV would do it separately */ 4230 if (!amdgpu_sriov_vf(adev)) 4231 amdgpu_amdkfd_post_reset(adev); 4232 amdgpu_device_unlock_adev(adev); 4233 return -ENODEV; 4234 } 4235 4236 /* 4237 * In case we are in XGMI hive mode device reset is done for all the 4238 * nodes in the hive to retrain all XGMI links and hence the reset 4239 * sequence is executed in loop on all nodes. 4240 */ 4241 device_list_handle = &hive->device_list; 4242 } else { 4243 list_add_tail(&adev->gmc.xgmi.head, &device_list); 4244 device_list_handle = &device_list; 4245 } 4246 4247 /* block all schedulers and reset given job's ring */ 4248 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4249 if (tmp_adev != adev) { 4250 amdgpu_ras_set_error_query_ready(tmp_adev, false); 4251 amdgpu_device_lock_adev(tmp_adev, false); 4252 if (!amdgpu_sriov_vf(tmp_adev)) 4253 amdgpu_amdkfd_pre_reset(tmp_adev); 4254 } 4255 4256 /* 4257 * Mark these ASICs to be reseted as untracked first 4258 * And add them back after reset completed 4259 */ 4260 amdgpu_unregister_gpu_instance(tmp_adev); 4261 4262 amdgpu_fbdev_set_suspend(adev, 1); 4263 4264 /* disable ras on ALL IPs */ 4265 if (!(in_ras_intr && !use_baco) && 4266 amdgpu_device_ip_need_full_reset(tmp_adev)) 4267 amdgpu_ras_suspend(tmp_adev); 4268 4269 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 4270 struct amdgpu_ring *ring = tmp_adev->rings[i]; 4271 4272 if (!ring || !ring->sched.thread) 4273 continue; 4274 4275 drm_sched_stop(&ring->sched, job ? &job->base : NULL); 4276 4277 if (in_ras_intr && !use_baco) 4278 amdgpu_job_stop_all_jobs_on_sched(&ring->sched); 4279 } 4280 } 4281 4282 4283 if (in_ras_intr && !use_baco) 4284 goto skip_sched_resume; 4285 4286 /* 4287 * Must check guilty signal here since after this point all old 4288 * HW fences are force signaled. 4289 * 4290 * job->base holds a reference to parent fence 4291 */ 4292 if (job && job->base.s_fence->parent && 4293 dma_fence_is_signaled(job->base.s_fence->parent)) 4294 job_signaled = true; 4295 4296 if (job_signaled) { 4297 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); 4298 goto skip_hw_reset; 4299 } 4300 4301 4302 /* Guilty job will be freed after this*/ 4303 r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset); 4304 if (r) { 4305 /*TODO Should we stop ?*/ 4306 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", 4307 r, adev->ddev->unique); 4308 adev->asic_reset_res = r; 4309 } 4310 4311 retry: /* Rest of adevs pre asic reset from XGMI hive. */ 4312 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4313 4314 if (tmp_adev == adev) 4315 continue; 4316 4317 r = amdgpu_device_pre_asic_reset(tmp_adev, 4318 NULL, 4319 &need_full_reset); 4320 /*TODO Should we stop ?*/ 4321 if (r) { 4322 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", 4323 r, tmp_adev->ddev->unique); 4324 tmp_adev->asic_reset_res = r; 4325 } 4326 } 4327 4328 /* Actual ASIC resets if needed.*/ 4329 /* TODO Implement XGMI hive reset logic for SRIOV */ 4330 if (amdgpu_sriov_vf(adev)) { 4331 r = amdgpu_device_reset_sriov(adev, job ? false : true); 4332 if (r) 4333 adev->asic_reset_res = r; 4334 } else { 4335 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset); 4336 if (r && r == -EAGAIN) 4337 goto retry; 4338 } 4339 4340 skip_hw_reset: 4341 4342 /* Post ASIC reset for all devs .*/ 4343 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4344 4345 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 4346 struct amdgpu_ring *ring = tmp_adev->rings[i]; 4347 4348 if (!ring || !ring->sched.thread) 4349 continue; 4350 4351 /* No point to resubmit jobs if we didn't HW reset*/ 4352 if (!tmp_adev->asic_reset_res && !job_signaled) 4353 drm_sched_resubmit_jobs(&ring->sched); 4354 4355 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res); 4356 } 4357 4358 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) { 4359 drm_helper_resume_force_mode(tmp_adev->ddev); 4360 } 4361 4362 tmp_adev->asic_reset_res = 0; 4363 4364 if (r) { 4365 /* bad news, how to tell it to userspace ? */ 4366 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter)); 4367 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); 4368 } else { 4369 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); 4370 } 4371 } 4372 4373 skip_sched_resume: 4374 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 4375 /*unlock kfd: SRIOV would do it separately */ 4376 if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev)) 4377 amdgpu_amdkfd_post_reset(tmp_adev); 4378 amdgpu_device_unlock_adev(tmp_adev); 4379 } 4380 4381 if (hive) 4382 mutex_unlock(&hive->reset_lock); 4383 4384 if (r) 4385 dev_info(adev->dev, "GPU reset end with ret = %d\n", r); 4386 return r; 4387 } 4388 4389 /** 4390 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot 4391 * 4392 * @adev: amdgpu_device pointer 4393 * 4394 * Fetchs and stores in the driver the PCIE capabilities (gen speed 4395 * and lanes) of the slot the device is in. Handles APUs and 4396 * virtualized environments where PCIE config space may not be available. 4397 */ 4398 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) 4399 { 4400 struct pci_dev *pdev; 4401 enum pci_bus_speed speed_cap, platform_speed_cap; 4402 enum pcie_link_width platform_link_width; 4403 4404 if (amdgpu_pcie_gen_cap) 4405 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; 4406 4407 if (amdgpu_pcie_lane_cap) 4408 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap; 4409 4410 /* covers APUs as well */ 4411 if (pci_is_root_bus(adev->pdev->bus)) { 4412 if (adev->pm.pcie_gen_mask == 0) 4413 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK; 4414 if (adev->pm.pcie_mlw_mask == 0) 4415 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK; 4416 return; 4417 } 4418 4419 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) 4420 return; 4421 4422 pcie_bandwidth_available(adev->pdev, NULL, 4423 &platform_speed_cap, &platform_link_width); 4424 4425 if (adev->pm.pcie_gen_mask == 0) { 4426 /* asic caps */ 4427 pdev = adev->pdev; 4428 speed_cap = pcie_get_speed_cap(pdev); 4429 if (speed_cap == PCI_SPEED_UNKNOWN) { 4430 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4431 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4432 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 4433 } else { 4434 if (speed_cap == PCIE_SPEED_16_0GT) 4435 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4436 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4437 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | 4438 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4); 4439 else if (speed_cap == PCIE_SPEED_8_0GT) 4440 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4441 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4442 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 4443 else if (speed_cap == PCIE_SPEED_5_0GT) 4444 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4445 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2); 4446 else 4447 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; 4448 } 4449 /* platform caps */ 4450 if (platform_speed_cap == PCI_SPEED_UNKNOWN) { 4451 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4452 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 4453 } else { 4454 if (platform_speed_cap == PCIE_SPEED_16_0GT) 4455 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4456 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4457 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | 4458 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); 4459 else if (platform_speed_cap == PCIE_SPEED_8_0GT) 4460 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4461 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 4462 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); 4463 else if (platform_speed_cap == PCIE_SPEED_5_0GT) 4464 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 4465 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 4466 else 4467 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1; 4468 4469 } 4470 } 4471 if (adev->pm.pcie_mlw_mask == 0) { 4472 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) { 4473 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; 4474 } else { 4475 switch (platform_link_width) { 4476 case PCIE_LNK_X32: 4477 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | 4478 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 4479 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 4480 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 4481 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4482 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4483 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4484 break; 4485 case PCIE_LNK_X16: 4486 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 4487 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 4488 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 4489 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4490 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4491 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4492 break; 4493 case PCIE_LNK_X12: 4494 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 4495 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 4496 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4497 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4498 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4499 break; 4500 case PCIE_LNK_X8: 4501 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 4502 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4503 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4504 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4505 break; 4506 case PCIE_LNK_X4: 4507 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 4508 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4509 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4510 break; 4511 case PCIE_LNK_X2: 4512 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 4513 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 4514 break; 4515 case PCIE_LNK_X1: 4516 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; 4517 break; 4518 default: 4519 break; 4520 } 4521 } 4522 } 4523 } 4524 4525 int amdgpu_device_baco_enter(struct drm_device *dev) 4526 { 4527 struct amdgpu_device *adev = dev->dev_private; 4528 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 4529 4530 if (!amdgpu_device_supports_baco(adev->ddev)) 4531 return -ENOTSUPP; 4532 4533 if (ras && ras->supported) 4534 adev->nbio.funcs->enable_doorbell_interrupt(adev, false); 4535 4536 return amdgpu_dpm_baco_enter(adev); 4537 } 4538 4539 int amdgpu_device_baco_exit(struct drm_device *dev) 4540 { 4541 struct amdgpu_device *adev = dev->dev_private; 4542 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 4543 int ret = 0; 4544 4545 if (!amdgpu_device_supports_baco(adev->ddev)) 4546 return -ENOTSUPP; 4547 4548 ret = amdgpu_dpm_baco_exit(adev); 4549 if (ret) 4550 return ret; 4551 4552 if (ras && ras->supported) 4553 adev->nbio.funcs->enable_doorbell_interrupt(adev, true); 4554 4555 return 0; 4556 } 4557