1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/power_supply.h> 29 #include <linux/kthread.h> 30 #include <linux/console.h> 31 #include <linux/slab.h> 32 #include <drm/drmP.h> 33 #include <drm/drm_crtc_helper.h> 34 #include <drm/drm_atomic_helper.h> 35 #include <drm/amdgpu_drm.h> 36 #include <linux/vgaarb.h> 37 #include <linux/vga_switcheroo.h> 38 #include <linux/efi.h> 39 #include "amdgpu.h" 40 #include "amdgpu_trace.h" 41 #include "amdgpu_i2c.h" 42 #include "atom.h" 43 #include "amdgpu_atombios.h" 44 #include "amdgpu_atomfirmware.h" 45 #include "amd_pcie.h" 46 #ifdef CONFIG_DRM_AMDGPU_SI 47 #include "si.h" 48 #endif 49 #ifdef CONFIG_DRM_AMDGPU_CIK 50 #include "cik.h" 51 #endif 52 #include "vi.h" 53 #include "soc15.h" 54 #include "bif/bif_4_1_d.h" 55 #include <linux/pci.h> 56 #include <linux/firmware.h> 57 #include "amdgpu_vf_error.h" 58 59 #include "amdgpu_amdkfd.h" 60 #include "amdgpu_pm.h" 61 62 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); 63 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); 64 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); 65 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); 66 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); 67 68 #define AMDGPU_RESUME_MS 2000 69 70 static const char *amdgpu_asic_name[] = { 71 "TAHITI", 72 "PITCAIRN", 73 "VERDE", 74 "OLAND", 75 "HAINAN", 76 "BONAIRE", 77 "KAVERI", 78 "KABINI", 79 "HAWAII", 80 "MULLINS", 81 "TOPAZ", 82 "TONGA", 83 "FIJI", 84 "CARRIZO", 85 "STONEY", 86 "POLARIS10", 87 "POLARIS11", 88 "POLARIS12", 89 "VEGAM", 90 "VEGA10", 91 "VEGA12", 92 "VEGA20", 93 "RAVEN", 94 "LAST", 95 }; 96 97 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); 98 99 /** 100 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control 101 * 102 * @dev: drm_device pointer 103 * 104 * Returns true if the device is a dGPU with HG/PX power control, 105 * otherwise return false. 106 */ 107 bool amdgpu_device_is_px(struct drm_device *dev) 108 { 109 struct amdgpu_device *adev = dev->dev_private; 110 111 if (adev->flags & AMD_IS_PX) 112 return true; 113 return false; 114 } 115 116 /* 117 * MMIO register access helper functions. 118 */ 119 /** 120 * amdgpu_mm_rreg - read a memory mapped IO register 121 * 122 * @adev: amdgpu_device pointer 123 * @reg: dword aligned register offset 124 * @acc_flags: access flags which require special behavior 125 * 126 * Returns the 32 bit value from the offset specified. 127 */ 128 uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, 129 uint32_t acc_flags) 130 { 131 uint32_t ret; 132 133 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) 134 return amdgpu_virt_kiq_rreg(adev, reg); 135 136 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) 137 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); 138 else { 139 unsigned long flags; 140 141 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 142 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); 143 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); 144 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 145 } 146 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret); 147 return ret; 148 } 149 150 /* 151 * MMIO register read with bytes helper functions 152 * @offset:bytes offset from MMIO start 153 * 154 */ 155 156 /** 157 * amdgpu_mm_rreg8 - read a memory mapped IO register 158 * 159 * @adev: amdgpu_device pointer 160 * @offset: byte aligned register offset 161 * 162 * Returns the 8 bit value from the offset specified. 163 */ 164 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) { 165 if (offset < adev->rmmio_size) 166 return (readb(adev->rmmio + offset)); 167 BUG(); 168 } 169 170 /* 171 * MMIO register write with bytes helper functions 172 * @offset:bytes offset from MMIO start 173 * @value: the value want to be written to the register 174 * 175 */ 176 /** 177 * amdgpu_mm_wreg8 - read a memory mapped IO register 178 * 179 * @adev: amdgpu_device pointer 180 * @offset: byte aligned register offset 181 * @value: 8 bit value to write 182 * 183 * Writes the value specified to the offset specified. 184 */ 185 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) { 186 if (offset < adev->rmmio_size) 187 writeb(value, adev->rmmio + offset); 188 else 189 BUG(); 190 } 191 192 /** 193 * amdgpu_mm_wreg - write to a memory mapped IO register 194 * 195 * @adev: amdgpu_device pointer 196 * @reg: dword aligned register offset 197 * @v: 32 bit value to write to the register 198 * @acc_flags: access flags which require special behavior 199 * 200 * Writes the value specified to the offset specified. 201 */ 202 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, 203 uint32_t acc_flags) 204 { 205 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v); 206 207 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { 208 adev->last_mm_index = v; 209 } 210 211 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) 212 return amdgpu_virt_kiq_wreg(adev, reg, v); 213 214 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) 215 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); 216 else { 217 unsigned long flags; 218 219 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 220 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); 221 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); 222 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 223 } 224 225 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) { 226 udelay(500); 227 } 228 } 229 230 /** 231 * amdgpu_io_rreg - read an IO register 232 * 233 * @adev: amdgpu_device pointer 234 * @reg: dword aligned register offset 235 * 236 * Returns the 32 bit value from the offset specified. 237 */ 238 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) 239 { 240 if ((reg * 4) < adev->rio_mem_size) 241 return ioread32(adev->rio_mem + (reg * 4)); 242 else { 243 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); 244 return ioread32(adev->rio_mem + (mmMM_DATA * 4)); 245 } 246 } 247 248 /** 249 * amdgpu_io_wreg - write to an IO register 250 * 251 * @adev: amdgpu_device pointer 252 * @reg: dword aligned register offset 253 * @v: 32 bit value to write to the register 254 * 255 * Writes the value specified to the offset specified. 256 */ 257 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) 258 { 259 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { 260 adev->last_mm_index = v; 261 } 262 263 if ((reg * 4) < adev->rio_mem_size) 264 iowrite32(v, adev->rio_mem + (reg * 4)); 265 else { 266 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); 267 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4)); 268 } 269 270 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) { 271 udelay(500); 272 } 273 } 274 275 /** 276 * amdgpu_mm_rdoorbell - read a doorbell dword 277 * 278 * @adev: amdgpu_device pointer 279 * @index: doorbell index 280 * 281 * Returns the value in the doorbell aperture at the 282 * requested doorbell index (CIK). 283 */ 284 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) 285 { 286 if (index < adev->doorbell.num_doorbells) { 287 return readl(adev->doorbell.ptr + index); 288 } else { 289 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 290 return 0; 291 } 292 } 293 294 /** 295 * amdgpu_mm_wdoorbell - write a doorbell dword 296 * 297 * @adev: amdgpu_device pointer 298 * @index: doorbell index 299 * @v: value to write 300 * 301 * Writes @v to the doorbell aperture at the 302 * requested doorbell index (CIK). 303 */ 304 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) 305 { 306 if (index < adev->doorbell.num_doorbells) { 307 writel(v, adev->doorbell.ptr + index); 308 } else { 309 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 310 } 311 } 312 313 /** 314 * amdgpu_mm_rdoorbell64 - read a doorbell Qword 315 * 316 * @adev: amdgpu_device pointer 317 * @index: doorbell index 318 * 319 * Returns the value in the doorbell aperture at the 320 * requested doorbell index (VEGA10+). 321 */ 322 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) 323 { 324 if (index < adev->doorbell.num_doorbells) { 325 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); 326 } else { 327 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); 328 return 0; 329 } 330 } 331 332 /** 333 * amdgpu_mm_wdoorbell64 - write a doorbell Qword 334 * 335 * @adev: amdgpu_device pointer 336 * @index: doorbell index 337 * @v: value to write 338 * 339 * Writes @v to the doorbell aperture at the 340 * requested doorbell index (VEGA10+). 341 */ 342 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) 343 { 344 if (index < adev->doorbell.num_doorbells) { 345 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); 346 } else { 347 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); 348 } 349 } 350 351 /** 352 * amdgpu_invalid_rreg - dummy reg read function 353 * 354 * @adev: amdgpu device pointer 355 * @reg: offset of register 356 * 357 * Dummy register read function. Used for register blocks 358 * that certain asics don't have (all asics). 359 * Returns the value in the register. 360 */ 361 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) 362 { 363 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg); 364 BUG(); 365 return 0; 366 } 367 368 /** 369 * amdgpu_invalid_wreg - dummy reg write function 370 * 371 * @adev: amdgpu device pointer 372 * @reg: offset of register 373 * @v: value to write to the register 374 * 375 * Dummy register read function. Used for register blocks 376 * that certain asics don't have (all asics). 377 */ 378 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) 379 { 380 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n", 381 reg, v); 382 BUG(); 383 } 384 385 /** 386 * amdgpu_block_invalid_rreg - dummy reg read function 387 * 388 * @adev: amdgpu device pointer 389 * @block: offset of instance 390 * @reg: offset of register 391 * 392 * Dummy register read function. Used for register blocks 393 * that certain asics don't have (all asics). 394 * Returns the value in the register. 395 */ 396 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev, 397 uint32_t block, uint32_t reg) 398 { 399 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n", 400 reg, block); 401 BUG(); 402 return 0; 403 } 404 405 /** 406 * amdgpu_block_invalid_wreg - dummy reg write function 407 * 408 * @adev: amdgpu device pointer 409 * @block: offset of instance 410 * @reg: offset of register 411 * @v: value to write to the register 412 * 413 * Dummy register read function. Used for register blocks 414 * that certain asics don't have (all asics). 415 */ 416 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, 417 uint32_t block, 418 uint32_t reg, uint32_t v) 419 { 420 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n", 421 reg, block, v); 422 BUG(); 423 } 424 425 /** 426 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page 427 * 428 * @adev: amdgpu device pointer 429 * 430 * Allocates a scratch page of VRAM for use by various things in the 431 * driver. 432 */ 433 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev) 434 { 435 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, 436 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 437 &adev->vram_scratch.robj, 438 &adev->vram_scratch.gpu_addr, 439 (void **)&adev->vram_scratch.ptr); 440 } 441 442 /** 443 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page 444 * 445 * @adev: amdgpu device pointer 446 * 447 * Frees the VRAM scratch page. 448 */ 449 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev) 450 { 451 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL); 452 } 453 454 /** 455 * amdgpu_device_program_register_sequence - program an array of registers. 456 * 457 * @adev: amdgpu_device pointer 458 * @registers: pointer to the register array 459 * @array_size: size of the register array 460 * 461 * Programs an array or registers with and and or masks. 462 * This is a helper for setting golden registers. 463 */ 464 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, 465 const u32 *registers, 466 const u32 array_size) 467 { 468 u32 tmp, reg, and_mask, or_mask; 469 int i; 470 471 if (array_size % 3) 472 return; 473 474 for (i = 0; i < array_size; i +=3) { 475 reg = registers[i + 0]; 476 and_mask = registers[i + 1]; 477 or_mask = registers[i + 2]; 478 479 if (and_mask == 0xffffffff) { 480 tmp = or_mask; 481 } else { 482 tmp = RREG32(reg); 483 tmp &= ~and_mask; 484 tmp |= or_mask; 485 } 486 WREG32(reg, tmp); 487 } 488 } 489 490 /** 491 * amdgpu_device_pci_config_reset - reset the GPU 492 * 493 * @adev: amdgpu_device pointer 494 * 495 * Resets the GPU using the pci config reset sequence. 496 * Only applicable to asics prior to vega10. 497 */ 498 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) 499 { 500 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA); 501 } 502 503 /* 504 * GPU doorbell aperture helpers function. 505 */ 506 /** 507 * amdgpu_device_doorbell_init - Init doorbell driver information. 508 * 509 * @adev: amdgpu_device pointer 510 * 511 * Init doorbell driver information (CIK) 512 * Returns 0 on success, error on failure. 513 */ 514 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) 515 { 516 /* No doorbell on SI hardware generation */ 517 if (adev->asic_type < CHIP_BONAIRE) { 518 adev->doorbell.base = 0; 519 adev->doorbell.size = 0; 520 adev->doorbell.num_doorbells = 0; 521 adev->doorbell.ptr = NULL; 522 return 0; 523 } 524 525 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) 526 return -EINVAL; 527 528 /* doorbell bar mapping */ 529 adev->doorbell.base = pci_resource_start(adev->pdev, 2); 530 adev->doorbell.size = pci_resource_len(adev->pdev, 2); 531 532 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), 533 AMDGPU_DOORBELL_MAX_ASSIGNMENT+1); 534 if (adev->doorbell.num_doorbells == 0) 535 return -EINVAL; 536 537 adev->doorbell.ptr = ioremap(adev->doorbell.base, 538 adev->doorbell.num_doorbells * 539 sizeof(u32)); 540 if (adev->doorbell.ptr == NULL) 541 return -ENOMEM; 542 543 return 0; 544 } 545 546 /** 547 * amdgpu_device_doorbell_fini - Tear down doorbell driver information. 548 * 549 * @adev: amdgpu_device pointer 550 * 551 * Tear down doorbell driver information (CIK) 552 */ 553 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev) 554 { 555 iounmap(adev->doorbell.ptr); 556 adev->doorbell.ptr = NULL; 557 } 558 559 560 561 /* 562 * amdgpu_device_wb_*() 563 * Writeback is the method by which the GPU updates special pages in memory 564 * with the status of certain GPU events (fences, ring pointers,etc.). 565 */ 566 567 /** 568 * amdgpu_device_wb_fini - Disable Writeback and free memory 569 * 570 * @adev: amdgpu_device pointer 571 * 572 * Disables Writeback and frees the Writeback memory (all asics). 573 * Used at driver shutdown. 574 */ 575 static void amdgpu_device_wb_fini(struct amdgpu_device *adev) 576 { 577 if (adev->wb.wb_obj) { 578 amdgpu_bo_free_kernel(&adev->wb.wb_obj, 579 &adev->wb.gpu_addr, 580 (void **)&adev->wb.wb); 581 adev->wb.wb_obj = NULL; 582 } 583 } 584 585 /** 586 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory 587 * 588 * @adev: amdgpu_device pointer 589 * 590 * Initializes writeback and allocates writeback memory (all asics). 591 * Used at driver startup. 592 * Returns 0 on success or an -error on failure. 593 */ 594 static int amdgpu_device_wb_init(struct amdgpu_device *adev) 595 { 596 int r; 597 598 if (adev->wb.wb_obj == NULL) { 599 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */ 600 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8, 601 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 602 &adev->wb.wb_obj, &adev->wb.gpu_addr, 603 (void **)&adev->wb.wb); 604 if (r) { 605 dev_warn(adev->dev, "(%d) create WB bo failed\n", r); 606 return r; 607 } 608 609 adev->wb.num_wb = AMDGPU_MAX_WB; 610 memset(&adev->wb.used, 0, sizeof(adev->wb.used)); 611 612 /* clear wb memory */ 613 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8); 614 } 615 616 return 0; 617 } 618 619 /** 620 * amdgpu_device_wb_get - Allocate a wb entry 621 * 622 * @adev: amdgpu_device pointer 623 * @wb: wb index 624 * 625 * Allocate a wb slot for use by the driver (all asics). 626 * Returns 0 on success or -EINVAL on failure. 627 */ 628 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) 629 { 630 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); 631 632 if (offset < adev->wb.num_wb) { 633 __set_bit(offset, adev->wb.used); 634 *wb = offset << 3; /* convert to dw offset */ 635 return 0; 636 } else { 637 return -EINVAL; 638 } 639 } 640 641 /** 642 * amdgpu_device_wb_free - Free a wb entry 643 * 644 * @adev: amdgpu_device pointer 645 * @wb: wb index 646 * 647 * Free a wb slot allocated for use by the driver (all asics) 648 */ 649 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) 650 { 651 wb >>= 3; 652 if (wb < adev->wb.num_wb) 653 __clear_bit(wb, adev->wb.used); 654 } 655 656 /** 657 * amdgpu_device_resize_fb_bar - try to resize FB BAR 658 * 659 * @adev: amdgpu_device pointer 660 * 661 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not 662 * to fail, but if any of the BARs is not accessible after the size we abort 663 * driver loading by returning -ENODEV. 664 */ 665 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) 666 { 667 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size); 668 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1; 669 struct pci_bus *root; 670 struct resource *res; 671 unsigned i; 672 u16 cmd; 673 int r; 674 675 /* Bypass for VF */ 676 if (amdgpu_sriov_vf(adev)) 677 return 0; 678 679 /* Check if the root BUS has 64bit memory resources */ 680 root = adev->pdev->bus; 681 while (root->parent) 682 root = root->parent; 683 684 pci_bus_for_each_resource(root, res, i) { 685 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && 686 res->start > 0x100000000ull) 687 break; 688 } 689 690 /* Trying to resize is pointless without a root hub window above 4GB */ 691 if (!res) 692 return 0; 693 694 /* Disable memory decoding while we change the BAR addresses and size */ 695 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd); 696 pci_write_config_word(adev->pdev, PCI_COMMAND, 697 cmd & ~PCI_COMMAND_MEMORY); 698 699 /* Free the VRAM and doorbell BAR, we most likely need to move both. */ 700 amdgpu_device_doorbell_fini(adev); 701 if (adev->asic_type >= CHIP_BONAIRE) 702 pci_release_resource(adev->pdev, 2); 703 704 pci_release_resource(adev->pdev, 0); 705 706 r = pci_resize_resource(adev->pdev, 0, rbar_size); 707 if (r == -ENOSPC) 708 DRM_INFO("Not enough PCI address space for a large BAR."); 709 else if (r && r != -ENOTSUPP) 710 DRM_ERROR("Problem resizing BAR0 (%d).", r); 711 712 pci_assign_unassigned_bus_resources(adev->pdev->bus); 713 714 /* When the doorbell or fb BAR isn't available we have no chance of 715 * using the device. 716 */ 717 r = amdgpu_device_doorbell_init(adev); 718 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET)) 719 return -ENODEV; 720 721 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd); 722 723 return 0; 724 } 725 726 /* 727 * GPU helpers function. 728 */ 729 /** 730 * amdgpu_device_need_post - check if the hw need post or not 731 * 732 * @adev: amdgpu_device pointer 733 * 734 * Check if the asic has been initialized (all asics) at driver startup 735 * or post is needed if hw reset is performed. 736 * Returns true if need or false if not. 737 */ 738 bool amdgpu_device_need_post(struct amdgpu_device *adev) 739 { 740 uint32_t reg; 741 742 if (amdgpu_sriov_vf(adev)) 743 return false; 744 745 if (amdgpu_passthrough(adev)) { 746 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot 747 * some old smc fw still need driver do vPost otherwise gpu hang, while 748 * those smc fw version above 22.15 doesn't have this flaw, so we force 749 * vpost executed for smc version below 22.15 750 */ 751 if (adev->asic_type == CHIP_FIJI) { 752 int err; 753 uint32_t fw_ver; 754 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev); 755 /* force vPost if error occured */ 756 if (err) 757 return true; 758 759 fw_ver = *((uint32_t *)adev->pm.fw->data + 69); 760 if (fw_ver < 0x00160e00) 761 return true; 762 } 763 } 764 765 if (adev->has_hw_reset) { 766 adev->has_hw_reset = false; 767 return true; 768 } 769 770 /* bios scratch used on CIK+ */ 771 if (adev->asic_type >= CHIP_BONAIRE) 772 return amdgpu_atombios_scratch_need_asic_init(adev); 773 774 /* check MEM_SIZE for older asics */ 775 reg = amdgpu_asic_get_config_memsize(adev); 776 777 if ((reg != 0) && (reg != 0xffffffff)) 778 return false; 779 780 return true; 781 } 782 783 /* if we get transitioned to only one device, take VGA back */ 784 /** 785 * amdgpu_device_vga_set_decode - enable/disable vga decode 786 * 787 * @cookie: amdgpu_device pointer 788 * @state: enable/disable vga decode 789 * 790 * Enable/disable vga decode (all asics). 791 * Returns VGA resource flags. 792 */ 793 static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state) 794 { 795 struct amdgpu_device *adev = cookie; 796 amdgpu_asic_set_vga_state(adev, state); 797 if (state) 798 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | 799 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 800 else 801 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 802 } 803 804 /** 805 * amdgpu_device_check_block_size - validate the vm block size 806 * 807 * @adev: amdgpu_device pointer 808 * 809 * Validates the vm block size specified via module parameter. 810 * The vm block size defines number of bits in page table versus page directory, 811 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 812 * page table and the remaining bits are in the page directory. 813 */ 814 static void amdgpu_device_check_block_size(struct amdgpu_device *adev) 815 { 816 /* defines number of bits in page table versus page directory, 817 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 818 * page table and the remaining bits are in the page directory */ 819 if (amdgpu_vm_block_size == -1) 820 return; 821 822 if (amdgpu_vm_block_size < 9) { 823 dev_warn(adev->dev, "VM page table size (%d) too small\n", 824 amdgpu_vm_block_size); 825 amdgpu_vm_block_size = -1; 826 } 827 } 828 829 /** 830 * amdgpu_device_check_vm_size - validate the vm size 831 * 832 * @adev: amdgpu_device pointer 833 * 834 * Validates the vm size in GB specified via module parameter. 835 * The VM size is the size of the GPU virtual memory space in GB. 836 */ 837 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) 838 { 839 /* no need to check the default value */ 840 if (amdgpu_vm_size == -1) 841 return; 842 843 if (amdgpu_vm_size < 1) { 844 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n", 845 amdgpu_vm_size); 846 amdgpu_vm_size = -1; 847 } 848 } 849 850 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) 851 { 852 struct sysinfo si; 853 bool is_os_64 = (sizeof(void *) == 8) ? true : false; 854 uint64_t total_memory; 855 uint64_t dram_size_seven_GB = 0x1B8000000; 856 uint64_t dram_size_three_GB = 0xB8000000; 857 858 if (amdgpu_smu_memory_pool_size == 0) 859 return; 860 861 if (!is_os_64) { 862 DRM_WARN("Not 64-bit OS, feature not supported\n"); 863 goto def_value; 864 } 865 si_meminfo(&si); 866 total_memory = (uint64_t)si.totalram * si.mem_unit; 867 868 if ((amdgpu_smu_memory_pool_size == 1) || 869 (amdgpu_smu_memory_pool_size == 2)) { 870 if (total_memory < dram_size_three_GB) 871 goto def_value1; 872 } else if ((amdgpu_smu_memory_pool_size == 4) || 873 (amdgpu_smu_memory_pool_size == 8)) { 874 if (total_memory < dram_size_seven_GB) 875 goto def_value1; 876 } else { 877 DRM_WARN("Smu memory pool size not supported\n"); 878 goto def_value; 879 } 880 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; 881 882 return; 883 884 def_value1: 885 DRM_WARN("No enough system memory\n"); 886 def_value: 887 adev->pm.smu_prv_buffer_size = 0; 888 } 889 890 /** 891 * amdgpu_device_check_arguments - validate module params 892 * 893 * @adev: amdgpu_device pointer 894 * 895 * Validates certain module parameters and updates 896 * the associated values used by the driver (all asics). 897 */ 898 static void amdgpu_device_check_arguments(struct amdgpu_device *adev) 899 { 900 if (amdgpu_sched_jobs < 4) { 901 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", 902 amdgpu_sched_jobs); 903 amdgpu_sched_jobs = 4; 904 } else if (!is_power_of_2(amdgpu_sched_jobs)){ 905 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n", 906 amdgpu_sched_jobs); 907 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); 908 } 909 910 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) { 911 /* gart size must be greater or equal to 32M */ 912 dev_warn(adev->dev, "gart size (%d) too small\n", 913 amdgpu_gart_size); 914 amdgpu_gart_size = -1; 915 } 916 917 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) { 918 /* gtt size must be greater or equal to 32M */ 919 dev_warn(adev->dev, "gtt size (%d) too small\n", 920 amdgpu_gtt_size); 921 amdgpu_gtt_size = -1; 922 } 923 924 /* valid range is between 4 and 9 inclusive */ 925 if (amdgpu_vm_fragment_size != -1 && 926 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) { 927 dev_warn(adev->dev, "valid range is between 4 and 9\n"); 928 amdgpu_vm_fragment_size = -1; 929 } 930 931 amdgpu_device_check_smu_prv_buffer_size(adev); 932 933 amdgpu_device_check_vm_size(adev); 934 935 amdgpu_device_check_block_size(adev); 936 937 if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 || 938 !is_power_of_2(amdgpu_vram_page_split))) { 939 dev_warn(adev->dev, "invalid VRAM page split (%d)\n", 940 amdgpu_vram_page_split); 941 amdgpu_vram_page_split = 1024; 942 } 943 944 if (amdgpu_lockup_timeout == 0) { 945 dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n"); 946 amdgpu_lockup_timeout = 10000; 947 } 948 949 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); 950 } 951 952 /** 953 * amdgpu_switcheroo_set_state - set switcheroo state 954 * 955 * @pdev: pci dev pointer 956 * @state: vga_switcheroo state 957 * 958 * Callback for the switcheroo driver. Suspends or resumes the 959 * the asics before or after it is powered up using ACPI methods. 960 */ 961 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state) 962 { 963 struct drm_device *dev = pci_get_drvdata(pdev); 964 965 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF) 966 return; 967 968 if (state == VGA_SWITCHEROO_ON) { 969 pr_info("amdgpu: switched on\n"); 970 /* don't suspend or resume card normally */ 971 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 972 973 amdgpu_device_resume(dev, true, true); 974 975 dev->switch_power_state = DRM_SWITCH_POWER_ON; 976 drm_kms_helper_poll_enable(dev); 977 } else { 978 pr_info("amdgpu: switched off\n"); 979 drm_kms_helper_poll_disable(dev); 980 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 981 amdgpu_device_suspend(dev, true, true); 982 dev->switch_power_state = DRM_SWITCH_POWER_OFF; 983 } 984 } 985 986 /** 987 * amdgpu_switcheroo_can_switch - see if switcheroo state can change 988 * 989 * @pdev: pci dev pointer 990 * 991 * Callback for the switcheroo driver. Check of the switcheroo 992 * state can be changed. 993 * Returns true if the state can be changed, false if not. 994 */ 995 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev) 996 { 997 struct drm_device *dev = pci_get_drvdata(pdev); 998 999 /* 1000 * FIXME: open_count is protected by drm_global_mutex but that would lead to 1001 * locking inversion with the driver load path. And the access here is 1002 * completely racy anyway. So don't bother with locking for now. 1003 */ 1004 return dev->open_count == 0; 1005 } 1006 1007 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { 1008 .set_gpu_state = amdgpu_switcheroo_set_state, 1009 .reprobe = NULL, 1010 .can_switch = amdgpu_switcheroo_can_switch, 1011 }; 1012 1013 /** 1014 * amdgpu_device_ip_set_clockgating_state - set the CG state 1015 * 1016 * @dev: amdgpu_device pointer 1017 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1018 * @state: clockgating state (gate or ungate) 1019 * 1020 * Sets the requested clockgating state for all instances of 1021 * the hardware IP specified. 1022 * Returns the error code from the last instance. 1023 */ 1024 int amdgpu_device_ip_set_clockgating_state(void *dev, 1025 enum amd_ip_block_type block_type, 1026 enum amd_clockgating_state state) 1027 { 1028 struct amdgpu_device *adev = dev; 1029 int i, r = 0; 1030 1031 for (i = 0; i < adev->num_ip_blocks; i++) { 1032 if (!adev->ip_blocks[i].status.valid) 1033 continue; 1034 if (adev->ip_blocks[i].version->type != block_type) 1035 continue; 1036 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) 1037 continue; 1038 r = adev->ip_blocks[i].version->funcs->set_clockgating_state( 1039 (void *)adev, state); 1040 if (r) 1041 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", 1042 adev->ip_blocks[i].version->funcs->name, r); 1043 } 1044 return r; 1045 } 1046 1047 /** 1048 * amdgpu_device_ip_set_powergating_state - set the PG state 1049 * 1050 * @dev: amdgpu_device pointer 1051 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1052 * @state: powergating state (gate or ungate) 1053 * 1054 * Sets the requested powergating state for all instances of 1055 * the hardware IP specified. 1056 * Returns the error code from the last instance. 1057 */ 1058 int amdgpu_device_ip_set_powergating_state(void *dev, 1059 enum amd_ip_block_type block_type, 1060 enum amd_powergating_state state) 1061 { 1062 struct amdgpu_device *adev = dev; 1063 int i, r = 0; 1064 1065 for (i = 0; i < adev->num_ip_blocks; i++) { 1066 if (!adev->ip_blocks[i].status.valid) 1067 continue; 1068 if (adev->ip_blocks[i].version->type != block_type) 1069 continue; 1070 if (!adev->ip_blocks[i].version->funcs->set_powergating_state) 1071 continue; 1072 r = adev->ip_blocks[i].version->funcs->set_powergating_state( 1073 (void *)adev, state); 1074 if (r) 1075 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", 1076 adev->ip_blocks[i].version->funcs->name, r); 1077 } 1078 return r; 1079 } 1080 1081 /** 1082 * amdgpu_device_ip_get_clockgating_state - get the CG state 1083 * 1084 * @adev: amdgpu_device pointer 1085 * @flags: clockgating feature flags 1086 * 1087 * Walks the list of IPs on the device and updates the clockgating 1088 * flags for each IP. 1089 * Updates @flags with the feature flags for each hardware IP where 1090 * clockgating is enabled. 1091 */ 1092 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, 1093 u32 *flags) 1094 { 1095 int i; 1096 1097 for (i = 0; i < adev->num_ip_blocks; i++) { 1098 if (!adev->ip_blocks[i].status.valid) 1099 continue; 1100 if (adev->ip_blocks[i].version->funcs->get_clockgating_state) 1101 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags); 1102 } 1103 } 1104 1105 /** 1106 * amdgpu_device_ip_wait_for_idle - wait for idle 1107 * 1108 * @adev: amdgpu_device pointer 1109 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1110 * 1111 * Waits for the request hardware IP to be idle. 1112 * Returns 0 for success or a negative error code on failure. 1113 */ 1114 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, 1115 enum amd_ip_block_type block_type) 1116 { 1117 int i, r; 1118 1119 for (i = 0; i < adev->num_ip_blocks; i++) { 1120 if (!adev->ip_blocks[i].status.valid) 1121 continue; 1122 if (adev->ip_blocks[i].version->type == block_type) { 1123 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev); 1124 if (r) 1125 return r; 1126 break; 1127 } 1128 } 1129 return 0; 1130 1131 } 1132 1133 /** 1134 * amdgpu_device_ip_is_idle - is the hardware IP idle 1135 * 1136 * @adev: amdgpu_device pointer 1137 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1138 * 1139 * Check if the hardware IP is idle or not. 1140 * Returns true if it the IP is idle, false if not. 1141 */ 1142 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, 1143 enum amd_ip_block_type block_type) 1144 { 1145 int i; 1146 1147 for (i = 0; i < adev->num_ip_blocks; i++) { 1148 if (!adev->ip_blocks[i].status.valid) 1149 continue; 1150 if (adev->ip_blocks[i].version->type == block_type) 1151 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev); 1152 } 1153 return true; 1154 1155 } 1156 1157 /** 1158 * amdgpu_device_ip_get_ip_block - get a hw IP pointer 1159 * 1160 * @adev: amdgpu_device pointer 1161 * @type: Type of hardware IP (SMU, GFX, UVD, etc.) 1162 * 1163 * Returns a pointer to the hardware IP block structure 1164 * if it exists for the asic, otherwise NULL. 1165 */ 1166 struct amdgpu_ip_block * 1167 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, 1168 enum amd_ip_block_type type) 1169 { 1170 int i; 1171 1172 for (i = 0; i < adev->num_ip_blocks; i++) 1173 if (adev->ip_blocks[i].version->type == type) 1174 return &adev->ip_blocks[i]; 1175 1176 return NULL; 1177 } 1178 1179 /** 1180 * amdgpu_device_ip_block_version_cmp 1181 * 1182 * @adev: amdgpu_device pointer 1183 * @type: enum amd_ip_block_type 1184 * @major: major version 1185 * @minor: minor version 1186 * 1187 * return 0 if equal or greater 1188 * return 1 if smaller or the ip_block doesn't exist 1189 */ 1190 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, 1191 enum amd_ip_block_type type, 1192 u32 major, u32 minor) 1193 { 1194 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type); 1195 1196 if (ip_block && ((ip_block->version->major > major) || 1197 ((ip_block->version->major == major) && 1198 (ip_block->version->minor >= minor)))) 1199 return 0; 1200 1201 return 1; 1202 } 1203 1204 /** 1205 * amdgpu_device_ip_block_add 1206 * 1207 * @adev: amdgpu_device pointer 1208 * @ip_block_version: pointer to the IP to add 1209 * 1210 * Adds the IP block driver information to the collection of IPs 1211 * on the asic. 1212 */ 1213 int amdgpu_device_ip_block_add(struct amdgpu_device *adev, 1214 const struct amdgpu_ip_block_version *ip_block_version) 1215 { 1216 if (!ip_block_version) 1217 return -EINVAL; 1218 1219 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, 1220 ip_block_version->funcs->name); 1221 1222 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; 1223 1224 return 0; 1225 } 1226 1227 /** 1228 * amdgpu_device_enable_virtual_display - enable virtual display feature 1229 * 1230 * @adev: amdgpu_device pointer 1231 * 1232 * Enabled the virtual display feature if the user has enabled it via 1233 * the module parameter virtual_display. This feature provides a virtual 1234 * display hardware on headless boards or in virtualized environments. 1235 * This function parses and validates the configuration string specified by 1236 * the user and configues the virtual display configuration (number of 1237 * virtual connectors, crtcs, etc.) specified. 1238 */ 1239 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) 1240 { 1241 adev->enable_virtual_display = false; 1242 1243 if (amdgpu_virtual_display) { 1244 struct drm_device *ddev = adev->ddev; 1245 const char *pci_address_name = pci_name(ddev->pdev); 1246 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname; 1247 1248 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL); 1249 pciaddstr_tmp = pciaddstr; 1250 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) { 1251 pciaddname = strsep(&pciaddname_tmp, ","); 1252 if (!strcmp("all", pciaddname) 1253 || !strcmp(pci_address_name, pciaddname)) { 1254 long num_crtc; 1255 int res = -1; 1256 1257 adev->enable_virtual_display = true; 1258 1259 if (pciaddname_tmp) 1260 res = kstrtol(pciaddname_tmp, 10, 1261 &num_crtc); 1262 1263 if (!res) { 1264 if (num_crtc < 1) 1265 num_crtc = 1; 1266 if (num_crtc > 6) 1267 num_crtc = 6; 1268 adev->mode_info.num_crtc = num_crtc; 1269 } else { 1270 adev->mode_info.num_crtc = 1; 1271 } 1272 break; 1273 } 1274 } 1275 1276 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n", 1277 amdgpu_virtual_display, pci_address_name, 1278 adev->enable_virtual_display, adev->mode_info.num_crtc); 1279 1280 kfree(pciaddstr); 1281 } 1282 } 1283 1284 /** 1285 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware 1286 * 1287 * @adev: amdgpu_device pointer 1288 * 1289 * Parses the asic configuration parameters specified in the gpu info 1290 * firmware and makes them availale to the driver for use in configuring 1291 * the asic. 1292 * Returns 0 on success, -EINVAL on failure. 1293 */ 1294 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) 1295 { 1296 const char *chip_name; 1297 char fw_name[30]; 1298 int err; 1299 const struct gpu_info_firmware_header_v1_0 *hdr; 1300 1301 adev->firmware.gpu_info_fw = NULL; 1302 1303 switch (adev->asic_type) { 1304 case CHIP_TOPAZ: 1305 case CHIP_TONGA: 1306 case CHIP_FIJI: 1307 case CHIP_POLARIS10: 1308 case CHIP_POLARIS11: 1309 case CHIP_POLARIS12: 1310 case CHIP_VEGAM: 1311 case CHIP_CARRIZO: 1312 case CHIP_STONEY: 1313 #ifdef CONFIG_DRM_AMDGPU_SI 1314 case CHIP_VERDE: 1315 case CHIP_TAHITI: 1316 case CHIP_PITCAIRN: 1317 case CHIP_OLAND: 1318 case CHIP_HAINAN: 1319 #endif 1320 #ifdef CONFIG_DRM_AMDGPU_CIK 1321 case CHIP_BONAIRE: 1322 case CHIP_HAWAII: 1323 case CHIP_KAVERI: 1324 case CHIP_KABINI: 1325 case CHIP_MULLINS: 1326 #endif 1327 case CHIP_VEGA20: 1328 default: 1329 return 0; 1330 case CHIP_VEGA10: 1331 chip_name = "vega10"; 1332 break; 1333 case CHIP_VEGA12: 1334 chip_name = "vega12"; 1335 break; 1336 case CHIP_RAVEN: 1337 if (adev->rev_id >= 8) 1338 chip_name = "raven2"; 1339 else if (adev->pdev->device == 0x15d8) 1340 chip_name = "picasso"; 1341 else 1342 chip_name = "raven"; 1343 break; 1344 } 1345 1346 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); 1347 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev); 1348 if (err) { 1349 dev_err(adev->dev, 1350 "Failed to load gpu_info firmware \"%s\"\n", 1351 fw_name); 1352 goto out; 1353 } 1354 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw); 1355 if (err) { 1356 dev_err(adev->dev, 1357 "Failed to validate gpu_info firmware \"%s\"\n", 1358 fw_name); 1359 goto out; 1360 } 1361 1362 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data; 1363 amdgpu_ucode_print_gpu_info_hdr(&hdr->header); 1364 1365 switch (hdr->version_major) { 1366 case 1: 1367 { 1368 const struct gpu_info_firmware_v1_0 *gpu_info_fw = 1369 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + 1370 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1371 1372 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); 1373 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); 1374 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); 1375 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se); 1376 adev->gfx.config.max_texture_channel_caches = 1377 le32_to_cpu(gpu_info_fw->gc_num_tccs); 1378 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs); 1379 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds); 1380 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth); 1381 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth); 1382 adev->gfx.config.double_offchip_lds_buf = 1383 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer); 1384 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size); 1385 adev->gfx.cu_info.max_waves_per_simd = 1386 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd); 1387 adev->gfx.cu_info.max_scratch_slots_per_cu = 1388 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); 1389 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); 1390 break; 1391 } 1392 default: 1393 dev_err(adev->dev, 1394 "Unsupported gpu_info table %d\n", hdr->header.ucode_version); 1395 err = -EINVAL; 1396 goto out; 1397 } 1398 out: 1399 return err; 1400 } 1401 1402 /** 1403 * amdgpu_device_ip_early_init - run early init for hardware IPs 1404 * 1405 * @adev: amdgpu_device pointer 1406 * 1407 * Early initialization pass for hardware IPs. The hardware IPs that make 1408 * up each asic are discovered each IP's early_init callback is run. This 1409 * is the first stage in initializing the asic. 1410 * Returns 0 on success, negative error code on failure. 1411 */ 1412 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) 1413 { 1414 int i, r; 1415 1416 amdgpu_device_enable_virtual_display(adev); 1417 1418 switch (adev->asic_type) { 1419 case CHIP_TOPAZ: 1420 case CHIP_TONGA: 1421 case CHIP_FIJI: 1422 case CHIP_POLARIS10: 1423 case CHIP_POLARIS11: 1424 case CHIP_POLARIS12: 1425 case CHIP_VEGAM: 1426 case CHIP_CARRIZO: 1427 case CHIP_STONEY: 1428 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) 1429 adev->family = AMDGPU_FAMILY_CZ; 1430 else 1431 adev->family = AMDGPU_FAMILY_VI; 1432 1433 r = vi_set_ip_blocks(adev); 1434 if (r) 1435 return r; 1436 break; 1437 #ifdef CONFIG_DRM_AMDGPU_SI 1438 case CHIP_VERDE: 1439 case CHIP_TAHITI: 1440 case CHIP_PITCAIRN: 1441 case CHIP_OLAND: 1442 case CHIP_HAINAN: 1443 adev->family = AMDGPU_FAMILY_SI; 1444 r = si_set_ip_blocks(adev); 1445 if (r) 1446 return r; 1447 break; 1448 #endif 1449 #ifdef CONFIG_DRM_AMDGPU_CIK 1450 case CHIP_BONAIRE: 1451 case CHIP_HAWAII: 1452 case CHIP_KAVERI: 1453 case CHIP_KABINI: 1454 case CHIP_MULLINS: 1455 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII)) 1456 adev->family = AMDGPU_FAMILY_CI; 1457 else 1458 adev->family = AMDGPU_FAMILY_KV; 1459 1460 r = cik_set_ip_blocks(adev); 1461 if (r) 1462 return r; 1463 break; 1464 #endif 1465 case CHIP_VEGA10: 1466 case CHIP_VEGA12: 1467 case CHIP_VEGA20: 1468 case CHIP_RAVEN: 1469 if (adev->asic_type == CHIP_RAVEN) 1470 adev->family = AMDGPU_FAMILY_RV; 1471 else 1472 adev->family = AMDGPU_FAMILY_AI; 1473 1474 r = soc15_set_ip_blocks(adev); 1475 if (r) 1476 return r; 1477 break; 1478 default: 1479 /* FIXME: not supported yet */ 1480 return -EINVAL; 1481 } 1482 1483 r = amdgpu_device_parse_gpu_info_fw(adev); 1484 if (r) 1485 return r; 1486 1487 amdgpu_amdkfd_device_probe(adev); 1488 1489 if (amdgpu_sriov_vf(adev)) { 1490 r = amdgpu_virt_request_full_gpu(adev, true); 1491 if (r) 1492 return -EAGAIN; 1493 } 1494 1495 adev->powerplay.pp_feature = amdgpu_pp_feature_mask; 1496 if (amdgpu_sriov_vf(adev)) 1497 adev->powerplay.pp_feature &= ~PP_GFXOFF_MASK; 1498 1499 for (i = 0; i < adev->num_ip_blocks; i++) { 1500 if ((amdgpu_ip_block_mask & (1 << i)) == 0) { 1501 DRM_ERROR("disabled ip block: %d <%s>\n", 1502 i, adev->ip_blocks[i].version->funcs->name); 1503 adev->ip_blocks[i].status.valid = false; 1504 } else { 1505 if (adev->ip_blocks[i].version->funcs->early_init) { 1506 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev); 1507 if (r == -ENOENT) { 1508 adev->ip_blocks[i].status.valid = false; 1509 } else if (r) { 1510 DRM_ERROR("early_init of IP block <%s> failed %d\n", 1511 adev->ip_blocks[i].version->funcs->name, r); 1512 return r; 1513 } else { 1514 adev->ip_blocks[i].status.valid = true; 1515 } 1516 } else { 1517 adev->ip_blocks[i].status.valid = true; 1518 } 1519 } 1520 } 1521 1522 adev->cg_flags &= amdgpu_cg_mask; 1523 adev->pg_flags &= amdgpu_pg_mask; 1524 1525 return 0; 1526 } 1527 1528 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) 1529 { 1530 int i, r; 1531 1532 for (i = 0; i < adev->num_ip_blocks; i++) { 1533 if (!adev->ip_blocks[i].status.sw) 1534 continue; 1535 if (adev->ip_blocks[i].status.hw) 1536 continue; 1537 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 1538 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 1539 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1540 if (r) { 1541 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1542 adev->ip_blocks[i].version->funcs->name, r); 1543 return r; 1544 } 1545 adev->ip_blocks[i].status.hw = true; 1546 } 1547 } 1548 1549 return 0; 1550 } 1551 1552 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) 1553 { 1554 int i, r; 1555 1556 for (i = 0; i < adev->num_ip_blocks; i++) { 1557 if (!adev->ip_blocks[i].status.sw) 1558 continue; 1559 if (adev->ip_blocks[i].status.hw) 1560 continue; 1561 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1562 if (r) { 1563 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1564 adev->ip_blocks[i].version->funcs->name, r); 1565 return r; 1566 } 1567 adev->ip_blocks[i].status.hw = true; 1568 } 1569 1570 return 0; 1571 } 1572 1573 static int amdgpu_device_fw_loading(struct amdgpu_device *adev) 1574 { 1575 int r = 0; 1576 int i; 1577 1578 if (adev->asic_type >= CHIP_VEGA10) { 1579 for (i = 0; i < adev->num_ip_blocks; i++) { 1580 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 1581 if (adev->in_gpu_reset || adev->in_suspend) { 1582 if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) 1583 break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */ 1584 r = adev->ip_blocks[i].version->funcs->resume(adev); 1585 if (r) { 1586 DRM_ERROR("resume of IP block <%s> failed %d\n", 1587 adev->ip_blocks[i].version->funcs->name, r); 1588 return r; 1589 } 1590 } else { 1591 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1592 if (r) { 1593 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 1594 adev->ip_blocks[i].version->funcs->name, r); 1595 return r; 1596 } 1597 } 1598 adev->ip_blocks[i].status.hw = true; 1599 } 1600 } 1601 } 1602 1603 if (adev->powerplay.pp_funcs->load_firmware) { 1604 r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle); 1605 if (r) { 1606 pr_err("firmware loading failed\n"); 1607 return r; 1608 } 1609 } 1610 1611 return 0; 1612 } 1613 1614 /** 1615 * amdgpu_device_ip_init - run init for hardware IPs 1616 * 1617 * @adev: amdgpu_device pointer 1618 * 1619 * Main initialization pass for hardware IPs. The list of all the hardware 1620 * IPs that make up the asic is walked and the sw_init and hw_init callbacks 1621 * are run. sw_init initializes the software state associated with each IP 1622 * and hw_init initializes the hardware associated with each IP. 1623 * Returns 0 on success, negative error code on failure. 1624 */ 1625 static int amdgpu_device_ip_init(struct amdgpu_device *adev) 1626 { 1627 int i, r; 1628 1629 for (i = 0; i < adev->num_ip_blocks; i++) { 1630 if (!adev->ip_blocks[i].status.valid) 1631 continue; 1632 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev); 1633 if (r) { 1634 DRM_ERROR("sw_init of IP block <%s> failed %d\n", 1635 adev->ip_blocks[i].version->funcs->name, r); 1636 return r; 1637 } 1638 adev->ip_blocks[i].status.sw = true; 1639 1640 /* need to do gmc hw init early so we can allocate gpu mem */ 1641 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 1642 r = amdgpu_device_vram_scratch_init(adev); 1643 if (r) { 1644 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r); 1645 return r; 1646 } 1647 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); 1648 if (r) { 1649 DRM_ERROR("hw_init %d failed %d\n", i, r); 1650 return r; 1651 } 1652 r = amdgpu_device_wb_init(adev); 1653 if (r) { 1654 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); 1655 return r; 1656 } 1657 adev->ip_blocks[i].status.hw = true; 1658 1659 /* right after GMC hw init, we create CSA */ 1660 if (amdgpu_sriov_vf(adev)) { 1661 r = amdgpu_allocate_static_csa(adev); 1662 if (r) { 1663 DRM_ERROR("allocate CSA failed %d\n", r); 1664 return r; 1665 } 1666 } 1667 } 1668 } 1669 1670 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ 1671 if (r) 1672 return r; 1673 1674 r = amdgpu_device_ip_hw_init_phase1(adev); 1675 if (r) 1676 return r; 1677 1678 r = amdgpu_device_fw_loading(adev); 1679 if (r) 1680 return r; 1681 1682 r = amdgpu_device_ip_hw_init_phase2(adev); 1683 if (r) 1684 return r; 1685 1686 amdgpu_xgmi_add_device(adev); 1687 amdgpu_amdkfd_device_init(adev); 1688 1689 if (amdgpu_sriov_vf(adev)) 1690 amdgpu_virt_release_full_gpu(adev, true); 1691 1692 return 0; 1693 } 1694 1695 /** 1696 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer 1697 * 1698 * @adev: amdgpu_device pointer 1699 * 1700 * Writes a reset magic value to the gart pointer in VRAM. The driver calls 1701 * this function before a GPU reset. If the value is retained after a 1702 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents. 1703 */ 1704 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) 1705 { 1706 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); 1707 } 1708 1709 /** 1710 * amdgpu_device_check_vram_lost - check if vram is valid 1711 * 1712 * @adev: amdgpu_device pointer 1713 * 1714 * Checks the reset magic value written to the gart pointer in VRAM. 1715 * The driver calls this after a GPU reset to see if the contents of 1716 * VRAM is lost or now. 1717 * returns true if vram is lost, false if not. 1718 */ 1719 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) 1720 { 1721 return !!memcmp(adev->gart.ptr, adev->reset_magic, 1722 AMDGPU_RESET_MAGIC_NUM); 1723 } 1724 1725 /** 1726 * amdgpu_device_set_cg_state - set clockgating for amdgpu device 1727 * 1728 * @adev: amdgpu_device pointer 1729 * 1730 * The list of all the hardware IPs that make up the asic is walked and the 1731 * set_clockgating_state callbacks are run. 1732 * Late initialization pass enabling clockgating for hardware IPs. 1733 * Fini or suspend, pass disabling clockgating for hardware IPs. 1734 * Returns 0 on success, negative error code on failure. 1735 */ 1736 1737 static int amdgpu_device_set_cg_state(struct amdgpu_device *adev, 1738 enum amd_clockgating_state state) 1739 { 1740 int i, j, r; 1741 1742 if (amdgpu_emu_mode == 1) 1743 return 0; 1744 1745 for (j = 0; j < adev->num_ip_blocks; j++) { 1746 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 1747 if (!adev->ip_blocks[i].status.late_initialized) 1748 continue; 1749 /* skip CG for VCE/UVD, it's handled specially */ 1750 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 1751 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 1752 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 1753 adev->ip_blocks[i].version->funcs->set_clockgating_state) { 1754 /* enable clockgating to save power */ 1755 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, 1756 state); 1757 if (r) { 1758 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", 1759 adev->ip_blocks[i].version->funcs->name, r); 1760 return r; 1761 } 1762 } 1763 } 1764 1765 return 0; 1766 } 1767 1768 static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state) 1769 { 1770 int i, j, r; 1771 1772 if (amdgpu_emu_mode == 1) 1773 return 0; 1774 1775 for (j = 0; j < adev->num_ip_blocks; j++) { 1776 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 1777 if (!adev->ip_blocks[i].status.late_initialized) 1778 continue; 1779 /* skip CG for VCE/UVD, it's handled specially */ 1780 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 1781 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 1782 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 1783 adev->ip_blocks[i].version->funcs->set_powergating_state) { 1784 /* enable powergating to save power */ 1785 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, 1786 state); 1787 if (r) { 1788 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", 1789 adev->ip_blocks[i].version->funcs->name, r); 1790 return r; 1791 } 1792 } 1793 } 1794 return 0; 1795 } 1796 1797 /** 1798 * amdgpu_device_ip_late_init - run late init for hardware IPs 1799 * 1800 * @adev: amdgpu_device pointer 1801 * 1802 * Late initialization pass for hardware IPs. The list of all the hardware 1803 * IPs that make up the asic is walked and the late_init callbacks are run. 1804 * late_init covers any special initialization that an IP requires 1805 * after all of the have been initialized or something that needs to happen 1806 * late in the init process. 1807 * Returns 0 on success, negative error code on failure. 1808 */ 1809 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) 1810 { 1811 int i = 0, r; 1812 1813 for (i = 0; i < adev->num_ip_blocks; i++) { 1814 if (!adev->ip_blocks[i].status.hw) 1815 continue; 1816 if (adev->ip_blocks[i].version->funcs->late_init) { 1817 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); 1818 if (r) { 1819 DRM_ERROR("late_init of IP block <%s> failed %d\n", 1820 adev->ip_blocks[i].version->funcs->name, r); 1821 return r; 1822 } 1823 } 1824 adev->ip_blocks[i].status.late_initialized = true; 1825 } 1826 1827 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); 1828 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); 1829 1830 queue_delayed_work(system_wq, &adev->late_init_work, 1831 msecs_to_jiffies(AMDGPU_RESUME_MS)); 1832 1833 amdgpu_device_fill_reset_magic(adev); 1834 1835 return 0; 1836 } 1837 1838 /** 1839 * amdgpu_device_ip_fini - run fini for hardware IPs 1840 * 1841 * @adev: amdgpu_device pointer 1842 * 1843 * Main teardown pass for hardware IPs. The list of all the hardware 1844 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks 1845 * are run. hw_fini tears down the hardware associated with each IP 1846 * and sw_fini tears down any software state associated with each IP. 1847 * Returns 0 on success, negative error code on failure. 1848 */ 1849 static int amdgpu_device_ip_fini(struct amdgpu_device *adev) 1850 { 1851 int i, r; 1852 1853 amdgpu_amdkfd_device_fini(adev); 1854 1855 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 1856 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 1857 1858 /* need to disable SMC first */ 1859 for (i = 0; i < adev->num_ip_blocks; i++) { 1860 if (!adev->ip_blocks[i].status.hw) 1861 continue; 1862 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 1863 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 1864 /* XXX handle errors */ 1865 if (r) { 1866 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 1867 adev->ip_blocks[i].version->funcs->name, r); 1868 } 1869 adev->ip_blocks[i].status.hw = false; 1870 break; 1871 } 1872 } 1873 1874 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 1875 if (!adev->ip_blocks[i].status.hw) 1876 continue; 1877 1878 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 1879 /* XXX handle errors */ 1880 if (r) { 1881 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 1882 adev->ip_blocks[i].version->funcs->name, r); 1883 } 1884 1885 adev->ip_blocks[i].status.hw = false; 1886 } 1887 1888 1889 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 1890 if (!adev->ip_blocks[i].status.sw) 1891 continue; 1892 1893 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 1894 amdgpu_ucode_free_bo(adev); 1895 amdgpu_free_static_csa(adev); 1896 amdgpu_device_wb_fini(adev); 1897 amdgpu_device_vram_scratch_fini(adev); 1898 } 1899 1900 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); 1901 /* XXX handle errors */ 1902 if (r) { 1903 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", 1904 adev->ip_blocks[i].version->funcs->name, r); 1905 } 1906 adev->ip_blocks[i].status.sw = false; 1907 adev->ip_blocks[i].status.valid = false; 1908 } 1909 1910 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 1911 if (!adev->ip_blocks[i].status.late_initialized) 1912 continue; 1913 if (adev->ip_blocks[i].version->funcs->late_fini) 1914 adev->ip_blocks[i].version->funcs->late_fini((void *)adev); 1915 adev->ip_blocks[i].status.late_initialized = false; 1916 } 1917 1918 if (amdgpu_sriov_vf(adev)) 1919 if (amdgpu_virt_release_full_gpu(adev, false)) 1920 DRM_ERROR("failed to release exclusive mode on fini\n"); 1921 1922 return 0; 1923 } 1924 1925 static int amdgpu_device_enable_mgpu_fan_boost(void) 1926 { 1927 struct amdgpu_gpu_instance *gpu_ins; 1928 struct amdgpu_device *adev; 1929 int i, ret = 0; 1930 1931 mutex_lock(&mgpu_info.mutex); 1932 1933 /* 1934 * MGPU fan boost feature should be enabled 1935 * only when there are two or more dGPUs in 1936 * the system 1937 */ 1938 if (mgpu_info.num_dgpu < 2) 1939 goto out; 1940 1941 for (i = 0; i < mgpu_info.num_dgpu; i++) { 1942 gpu_ins = &(mgpu_info.gpu_ins[i]); 1943 adev = gpu_ins->adev; 1944 if (!(adev->flags & AMD_IS_APU) && 1945 !gpu_ins->mgpu_fan_enabled && 1946 adev->powerplay.pp_funcs && 1947 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) { 1948 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev); 1949 if (ret) 1950 break; 1951 1952 gpu_ins->mgpu_fan_enabled = 1; 1953 } 1954 } 1955 1956 out: 1957 mutex_unlock(&mgpu_info.mutex); 1958 1959 return ret; 1960 } 1961 1962 /** 1963 * amdgpu_device_ip_late_init_func_handler - work handler for ib test 1964 * 1965 * @work: work_struct. 1966 */ 1967 static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work) 1968 { 1969 struct amdgpu_device *adev = 1970 container_of(work, struct amdgpu_device, late_init_work.work); 1971 int r; 1972 1973 r = amdgpu_ib_ring_tests(adev); 1974 if (r) 1975 DRM_ERROR("ib ring test failed (%d).\n", r); 1976 1977 r = amdgpu_device_enable_mgpu_fan_boost(); 1978 if (r) 1979 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); 1980 } 1981 1982 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) 1983 { 1984 struct amdgpu_device *adev = 1985 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work); 1986 1987 mutex_lock(&adev->gfx.gfx_off_mutex); 1988 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) { 1989 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) 1990 adev->gfx.gfx_off_state = true; 1991 } 1992 mutex_unlock(&adev->gfx.gfx_off_mutex); 1993 } 1994 1995 /** 1996 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1) 1997 * 1998 * @adev: amdgpu_device pointer 1999 * 2000 * Main suspend function for hardware IPs. The list of all the hardware 2001 * IPs that make up the asic is walked, clockgating is disabled and the 2002 * suspend callbacks are run. suspend puts the hardware and software state 2003 * in each IP into a state suitable for suspend. 2004 * Returns 0 on success, negative error code on failure. 2005 */ 2006 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) 2007 { 2008 int i, r; 2009 2010 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 2011 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 2012 2013 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2014 if (!adev->ip_blocks[i].status.valid) 2015 continue; 2016 /* displays are handled separately */ 2017 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { 2018 /* XXX handle errors */ 2019 r = adev->ip_blocks[i].version->funcs->suspend(adev); 2020 /* XXX handle errors */ 2021 if (r) { 2022 DRM_ERROR("suspend of IP block <%s> failed %d\n", 2023 adev->ip_blocks[i].version->funcs->name, r); 2024 } 2025 } 2026 } 2027 2028 return 0; 2029 } 2030 2031 /** 2032 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2) 2033 * 2034 * @adev: amdgpu_device pointer 2035 * 2036 * Main suspend function for hardware IPs. The list of all the hardware 2037 * IPs that make up the asic is walked, clockgating is disabled and the 2038 * suspend callbacks are run. suspend puts the hardware and software state 2039 * in each IP into a state suitable for suspend. 2040 * Returns 0 on success, negative error code on failure. 2041 */ 2042 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) 2043 { 2044 int i, r; 2045 2046 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2047 if (!adev->ip_blocks[i].status.valid) 2048 continue; 2049 /* displays are handled in phase1 */ 2050 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) 2051 continue; 2052 /* XXX handle errors */ 2053 r = adev->ip_blocks[i].version->funcs->suspend(adev); 2054 /* XXX handle errors */ 2055 if (r) { 2056 DRM_ERROR("suspend of IP block <%s> failed %d\n", 2057 adev->ip_blocks[i].version->funcs->name, r); 2058 } 2059 } 2060 2061 return 0; 2062 } 2063 2064 /** 2065 * amdgpu_device_ip_suspend - run suspend for hardware IPs 2066 * 2067 * @adev: amdgpu_device pointer 2068 * 2069 * Main suspend function for hardware IPs. The list of all the hardware 2070 * IPs that make up the asic is walked, clockgating is disabled and the 2071 * suspend callbacks are run. suspend puts the hardware and software state 2072 * in each IP into a state suitable for suspend. 2073 * Returns 0 on success, negative error code on failure. 2074 */ 2075 int amdgpu_device_ip_suspend(struct amdgpu_device *adev) 2076 { 2077 int r; 2078 2079 if (amdgpu_sriov_vf(adev)) 2080 amdgpu_virt_request_full_gpu(adev, false); 2081 2082 r = amdgpu_device_ip_suspend_phase1(adev); 2083 if (r) 2084 return r; 2085 r = amdgpu_device_ip_suspend_phase2(adev); 2086 2087 if (amdgpu_sriov_vf(adev)) 2088 amdgpu_virt_release_full_gpu(adev, false); 2089 2090 return r; 2091 } 2092 2093 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) 2094 { 2095 int i, r; 2096 2097 static enum amd_ip_block_type ip_order[] = { 2098 AMD_IP_BLOCK_TYPE_GMC, 2099 AMD_IP_BLOCK_TYPE_COMMON, 2100 AMD_IP_BLOCK_TYPE_PSP, 2101 AMD_IP_BLOCK_TYPE_IH, 2102 }; 2103 2104 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 2105 int j; 2106 struct amdgpu_ip_block *block; 2107 2108 for (j = 0; j < adev->num_ip_blocks; j++) { 2109 block = &adev->ip_blocks[j]; 2110 2111 if (block->version->type != ip_order[i] || 2112 !block->status.valid) 2113 continue; 2114 2115 r = block->version->funcs->hw_init(adev); 2116 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 2117 if (r) 2118 return r; 2119 } 2120 } 2121 2122 return 0; 2123 } 2124 2125 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) 2126 { 2127 int i, r; 2128 2129 static enum amd_ip_block_type ip_order[] = { 2130 AMD_IP_BLOCK_TYPE_SMC, 2131 AMD_IP_BLOCK_TYPE_DCE, 2132 AMD_IP_BLOCK_TYPE_GFX, 2133 AMD_IP_BLOCK_TYPE_SDMA, 2134 AMD_IP_BLOCK_TYPE_UVD, 2135 AMD_IP_BLOCK_TYPE_VCE 2136 }; 2137 2138 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 2139 int j; 2140 struct amdgpu_ip_block *block; 2141 2142 for (j = 0; j < adev->num_ip_blocks; j++) { 2143 block = &adev->ip_blocks[j]; 2144 2145 if (block->version->type != ip_order[i] || 2146 !block->status.valid) 2147 continue; 2148 2149 r = block->version->funcs->hw_init(adev); 2150 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 2151 if (r) 2152 return r; 2153 } 2154 } 2155 2156 return 0; 2157 } 2158 2159 /** 2160 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs 2161 * 2162 * @adev: amdgpu_device pointer 2163 * 2164 * First resume function for hardware IPs. The list of all the hardware 2165 * IPs that make up the asic is walked and the resume callbacks are run for 2166 * COMMON, GMC, and IH. resume puts the hardware into a functional state 2167 * after a suspend and updates the software state as necessary. This 2168 * function is also used for restoring the GPU after a GPU reset. 2169 * Returns 0 on success, negative error code on failure. 2170 */ 2171 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) 2172 { 2173 int i, r; 2174 2175 for (i = 0; i < adev->num_ip_blocks; i++) { 2176 if (!adev->ip_blocks[i].status.valid) 2177 continue; 2178 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2179 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2180 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 2181 r = adev->ip_blocks[i].version->funcs->resume(adev); 2182 if (r) { 2183 DRM_ERROR("resume of IP block <%s> failed %d\n", 2184 adev->ip_blocks[i].version->funcs->name, r); 2185 return r; 2186 } 2187 } 2188 } 2189 2190 return 0; 2191 } 2192 2193 /** 2194 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs 2195 * 2196 * @adev: amdgpu_device pointer 2197 * 2198 * First resume function for hardware IPs. The list of all the hardware 2199 * IPs that make up the asic is walked and the resume callbacks are run for 2200 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a 2201 * functional state after a suspend and updates the software state as 2202 * necessary. This function is also used for restoring the GPU after a GPU 2203 * reset. 2204 * Returns 0 on success, negative error code on failure. 2205 */ 2206 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) 2207 { 2208 int i, r; 2209 2210 for (i = 0; i < adev->num_ip_blocks; i++) { 2211 if (!adev->ip_blocks[i].status.valid) 2212 continue; 2213 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2214 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2215 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || 2216 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) 2217 continue; 2218 r = adev->ip_blocks[i].version->funcs->resume(adev); 2219 if (r) { 2220 DRM_ERROR("resume of IP block <%s> failed %d\n", 2221 adev->ip_blocks[i].version->funcs->name, r); 2222 return r; 2223 } 2224 } 2225 2226 return 0; 2227 } 2228 2229 /** 2230 * amdgpu_device_ip_resume - run resume for hardware IPs 2231 * 2232 * @adev: amdgpu_device pointer 2233 * 2234 * Main resume function for hardware IPs. The hardware IPs 2235 * are split into two resume functions because they are 2236 * are also used in in recovering from a GPU reset and some additional 2237 * steps need to be take between them. In this case (S3/S4) they are 2238 * run sequentially. 2239 * Returns 0 on success, negative error code on failure. 2240 */ 2241 static int amdgpu_device_ip_resume(struct amdgpu_device *adev) 2242 { 2243 int r; 2244 2245 r = amdgpu_device_ip_resume_phase1(adev); 2246 if (r) 2247 return r; 2248 2249 r = amdgpu_device_fw_loading(adev); 2250 if (r) 2251 return r; 2252 2253 r = amdgpu_device_ip_resume_phase2(adev); 2254 2255 return r; 2256 } 2257 2258 /** 2259 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV 2260 * 2261 * @adev: amdgpu_device pointer 2262 * 2263 * Query the VBIOS data tables to determine if the board supports SR-IOV. 2264 */ 2265 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) 2266 { 2267 if (amdgpu_sriov_vf(adev)) { 2268 if (adev->is_atom_fw) { 2269 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev)) 2270 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 2271 } else { 2272 if (amdgpu_atombios_has_gpu_virtualization_table(adev)) 2273 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 2274 } 2275 2276 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)) 2277 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); 2278 } 2279 } 2280 2281 /** 2282 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic 2283 * 2284 * @asic_type: AMD asic type 2285 * 2286 * Check if there is DC (new modesetting infrastructre) support for an asic. 2287 * returns true if DC has support, false if not. 2288 */ 2289 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) 2290 { 2291 switch (asic_type) { 2292 #if defined(CONFIG_DRM_AMD_DC) 2293 case CHIP_BONAIRE: 2294 case CHIP_KAVERI: 2295 case CHIP_KABINI: 2296 case CHIP_MULLINS: 2297 /* 2298 * We have systems in the wild with these ASICs that require 2299 * LVDS and VGA support which is not supported with DC. 2300 * 2301 * Fallback to the non-DC driver here by default so as not to 2302 * cause regressions. 2303 */ 2304 return amdgpu_dc > 0; 2305 case CHIP_HAWAII: 2306 case CHIP_CARRIZO: 2307 case CHIP_STONEY: 2308 case CHIP_POLARIS10: 2309 case CHIP_POLARIS11: 2310 case CHIP_POLARIS12: 2311 case CHIP_VEGAM: 2312 case CHIP_TONGA: 2313 case CHIP_FIJI: 2314 case CHIP_VEGA10: 2315 case CHIP_VEGA12: 2316 case CHIP_VEGA20: 2317 #if defined(CONFIG_DRM_AMD_DC_DCN1_0) 2318 case CHIP_RAVEN: 2319 #endif 2320 return amdgpu_dc != 0; 2321 #endif 2322 default: 2323 return false; 2324 } 2325 } 2326 2327 /** 2328 * amdgpu_device_has_dc_support - check if dc is supported 2329 * 2330 * @adev: amdgpu_device_pointer 2331 * 2332 * Returns true for supported, false for not supported 2333 */ 2334 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) 2335 { 2336 if (amdgpu_sriov_vf(adev)) 2337 return false; 2338 2339 return amdgpu_device_asic_has_dc_support(adev->asic_type); 2340 } 2341 2342 /** 2343 * amdgpu_device_init - initialize the driver 2344 * 2345 * @adev: amdgpu_device pointer 2346 * @ddev: drm dev pointer 2347 * @pdev: pci dev pointer 2348 * @flags: driver flags 2349 * 2350 * Initializes the driver info and hw (all asics). 2351 * Returns 0 for success or an error on failure. 2352 * Called at driver startup. 2353 */ 2354 int amdgpu_device_init(struct amdgpu_device *adev, 2355 struct drm_device *ddev, 2356 struct pci_dev *pdev, 2357 uint32_t flags) 2358 { 2359 int r, i; 2360 bool runtime = false; 2361 u32 max_MBps; 2362 2363 adev->shutdown = false; 2364 adev->dev = &pdev->dev; 2365 adev->ddev = ddev; 2366 adev->pdev = pdev; 2367 adev->flags = flags; 2368 adev->asic_type = flags & AMD_ASIC_MASK; 2369 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; 2370 if (amdgpu_emu_mode == 1) 2371 adev->usec_timeout *= 2; 2372 adev->gmc.gart_size = 512 * 1024 * 1024; 2373 adev->accel_working = false; 2374 adev->num_rings = 0; 2375 adev->mman.buffer_funcs = NULL; 2376 adev->mman.buffer_funcs_ring = NULL; 2377 adev->vm_manager.vm_pte_funcs = NULL; 2378 adev->vm_manager.vm_pte_num_rqs = 0; 2379 adev->gmc.gmc_funcs = NULL; 2380 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); 2381 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 2382 2383 adev->smc_rreg = &amdgpu_invalid_rreg; 2384 adev->smc_wreg = &amdgpu_invalid_wreg; 2385 adev->pcie_rreg = &amdgpu_invalid_rreg; 2386 adev->pcie_wreg = &amdgpu_invalid_wreg; 2387 adev->pciep_rreg = &amdgpu_invalid_rreg; 2388 adev->pciep_wreg = &amdgpu_invalid_wreg; 2389 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; 2390 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; 2391 adev->didt_rreg = &amdgpu_invalid_rreg; 2392 adev->didt_wreg = &amdgpu_invalid_wreg; 2393 adev->gc_cac_rreg = &amdgpu_invalid_rreg; 2394 adev->gc_cac_wreg = &amdgpu_invalid_wreg; 2395 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg; 2396 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg; 2397 2398 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", 2399 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, 2400 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); 2401 2402 /* mutex initialization are all done here so we 2403 * can recall function without having locking issues */ 2404 atomic_set(&adev->irq.ih.lock, 0); 2405 mutex_init(&adev->firmware.mutex); 2406 mutex_init(&adev->pm.mutex); 2407 mutex_init(&adev->gfx.gpu_clock_mutex); 2408 mutex_init(&adev->srbm_mutex); 2409 mutex_init(&adev->gfx.pipe_reserve_mutex); 2410 mutex_init(&adev->gfx.gfx_off_mutex); 2411 mutex_init(&adev->grbm_idx_mutex); 2412 mutex_init(&adev->mn_lock); 2413 mutex_init(&adev->virt.vf_errors.lock); 2414 hash_init(adev->mn_hash); 2415 mutex_init(&adev->lock_reset); 2416 2417 amdgpu_device_check_arguments(adev); 2418 2419 spin_lock_init(&adev->mmio_idx_lock); 2420 spin_lock_init(&adev->smc_idx_lock); 2421 spin_lock_init(&adev->pcie_idx_lock); 2422 spin_lock_init(&adev->uvd_ctx_idx_lock); 2423 spin_lock_init(&adev->didt_idx_lock); 2424 spin_lock_init(&adev->gc_cac_idx_lock); 2425 spin_lock_init(&adev->se_cac_idx_lock); 2426 spin_lock_init(&adev->audio_endpt_idx_lock); 2427 spin_lock_init(&adev->mm_stats.lock); 2428 2429 INIT_LIST_HEAD(&adev->shadow_list); 2430 mutex_init(&adev->shadow_list_lock); 2431 2432 INIT_LIST_HEAD(&adev->ring_lru_list); 2433 spin_lock_init(&adev->ring_lru_list_lock); 2434 2435 INIT_DELAYED_WORK(&adev->late_init_work, 2436 amdgpu_device_ip_late_init_func_handler); 2437 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, 2438 amdgpu_device_delay_enable_gfx_off); 2439 2440 adev->gfx.gfx_off_req_count = 1; 2441 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false; 2442 2443 /* Registers mapping */ 2444 /* TODO: block userspace mapping of io register */ 2445 if (adev->asic_type >= CHIP_BONAIRE) { 2446 adev->rmmio_base = pci_resource_start(adev->pdev, 5); 2447 adev->rmmio_size = pci_resource_len(adev->pdev, 5); 2448 } else { 2449 adev->rmmio_base = pci_resource_start(adev->pdev, 2); 2450 adev->rmmio_size = pci_resource_len(adev->pdev, 2); 2451 } 2452 2453 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size); 2454 if (adev->rmmio == NULL) { 2455 return -ENOMEM; 2456 } 2457 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); 2458 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); 2459 2460 /* doorbell bar mapping */ 2461 amdgpu_device_doorbell_init(adev); 2462 2463 /* io port mapping */ 2464 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { 2465 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) { 2466 adev->rio_mem_size = pci_resource_len(adev->pdev, i); 2467 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size); 2468 break; 2469 } 2470 } 2471 if (adev->rio_mem == NULL) 2472 DRM_INFO("PCI I/O BAR is not found.\n"); 2473 2474 amdgpu_device_get_pcie_info(adev); 2475 2476 /* early init functions */ 2477 r = amdgpu_device_ip_early_init(adev); 2478 if (r) 2479 return r; 2480 2481 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ 2482 /* this will fail for cards that aren't VGA class devices, just 2483 * ignore it */ 2484 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); 2485 2486 if (amdgpu_device_is_px(ddev)) 2487 runtime = true; 2488 if (!pci_is_thunderbolt_attached(adev->pdev)) 2489 vga_switcheroo_register_client(adev->pdev, 2490 &amdgpu_switcheroo_ops, runtime); 2491 if (runtime) 2492 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); 2493 2494 if (amdgpu_emu_mode == 1) { 2495 /* post the asic on emulation mode */ 2496 emu_soc_asic_init(adev); 2497 goto fence_driver_init; 2498 } 2499 2500 /* Read BIOS */ 2501 if (!amdgpu_get_bios(adev)) { 2502 r = -EINVAL; 2503 goto failed; 2504 } 2505 2506 r = amdgpu_atombios_init(adev); 2507 if (r) { 2508 dev_err(adev->dev, "amdgpu_atombios_init failed\n"); 2509 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); 2510 goto failed; 2511 } 2512 2513 /* detect if we are with an SRIOV vbios */ 2514 amdgpu_device_detect_sriov_bios(adev); 2515 2516 /* Post card if necessary */ 2517 if (amdgpu_device_need_post(adev)) { 2518 if (!adev->bios) { 2519 dev_err(adev->dev, "no vBIOS found\n"); 2520 r = -EINVAL; 2521 goto failed; 2522 } 2523 DRM_INFO("GPU posting now...\n"); 2524 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 2525 if (r) { 2526 dev_err(adev->dev, "gpu post error!\n"); 2527 goto failed; 2528 } 2529 } 2530 2531 if (adev->is_atom_fw) { 2532 /* Initialize clocks */ 2533 r = amdgpu_atomfirmware_get_clock_info(adev); 2534 if (r) { 2535 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); 2536 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 2537 goto failed; 2538 } 2539 } else { 2540 /* Initialize clocks */ 2541 r = amdgpu_atombios_get_clock_info(adev); 2542 if (r) { 2543 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); 2544 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 2545 goto failed; 2546 } 2547 /* init i2c buses */ 2548 if (!amdgpu_device_has_dc_support(adev)) 2549 amdgpu_atombios_i2c_init(adev); 2550 } 2551 2552 fence_driver_init: 2553 /* Fence driver */ 2554 r = amdgpu_fence_driver_init(adev); 2555 if (r) { 2556 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); 2557 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0); 2558 goto failed; 2559 } 2560 2561 /* init the mode config */ 2562 drm_mode_config_init(adev->ddev); 2563 2564 r = amdgpu_device_ip_init(adev); 2565 if (r) { 2566 /* failed in exclusive mode due to timeout */ 2567 if (amdgpu_sriov_vf(adev) && 2568 !amdgpu_sriov_runtime(adev) && 2569 amdgpu_virt_mmio_blocked(adev) && 2570 !amdgpu_virt_wait_reset(adev)) { 2571 dev_err(adev->dev, "VF exclusive mode timeout\n"); 2572 /* Don't send request since VF is inactive. */ 2573 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; 2574 adev->virt.ops = NULL; 2575 r = -EAGAIN; 2576 goto failed; 2577 } 2578 dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); 2579 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); 2580 goto failed; 2581 } 2582 2583 adev->accel_working = true; 2584 2585 amdgpu_vm_check_compute_bug(adev); 2586 2587 /* Initialize the buffer migration limit. */ 2588 if (amdgpu_moverate >= 0) 2589 max_MBps = amdgpu_moverate; 2590 else 2591 max_MBps = 8; /* Allow 8 MB/s. */ 2592 /* Get a log2 for easy divisions. */ 2593 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); 2594 2595 r = amdgpu_ib_pool_init(adev); 2596 if (r) { 2597 dev_err(adev->dev, "IB initialization failed (%d).\n", r); 2598 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); 2599 goto failed; 2600 } 2601 2602 if (amdgpu_sriov_vf(adev)) 2603 amdgpu_virt_init_data_exchange(adev); 2604 2605 amdgpu_fbdev_init(adev); 2606 2607 r = amdgpu_pm_sysfs_init(adev); 2608 if (r) 2609 DRM_ERROR("registering pm debugfs failed (%d).\n", r); 2610 2611 r = amdgpu_debugfs_gem_init(adev); 2612 if (r) 2613 DRM_ERROR("registering gem debugfs failed (%d).\n", r); 2614 2615 r = amdgpu_debugfs_regs_init(adev); 2616 if (r) 2617 DRM_ERROR("registering register debugfs failed (%d).\n", r); 2618 2619 r = amdgpu_debugfs_firmware_init(adev); 2620 if (r) 2621 DRM_ERROR("registering firmware debugfs failed (%d).\n", r); 2622 2623 r = amdgpu_debugfs_init(adev); 2624 if (r) 2625 DRM_ERROR("Creating debugfs files failed (%d).\n", r); 2626 2627 if ((amdgpu_testing & 1)) { 2628 if (adev->accel_working) 2629 amdgpu_test_moves(adev); 2630 else 2631 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n"); 2632 } 2633 if (amdgpu_benchmarking) { 2634 if (adev->accel_working) 2635 amdgpu_benchmark(adev, amdgpu_benchmarking); 2636 else 2637 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n"); 2638 } 2639 2640 /* enable clockgating, etc. after ib tests, etc. since some blocks require 2641 * explicit gating rather than handling it automatically. 2642 */ 2643 r = amdgpu_device_ip_late_init(adev); 2644 if (r) { 2645 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); 2646 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); 2647 goto failed; 2648 } 2649 2650 return 0; 2651 2652 failed: 2653 amdgpu_vf_error_trans_all(adev); 2654 if (runtime) 2655 vga_switcheroo_fini_domain_pm_ops(adev->dev); 2656 2657 return r; 2658 } 2659 2660 /** 2661 * amdgpu_device_fini - tear down the driver 2662 * 2663 * @adev: amdgpu_device pointer 2664 * 2665 * Tear down the driver info (all asics). 2666 * Called at driver shutdown. 2667 */ 2668 void amdgpu_device_fini(struct amdgpu_device *adev) 2669 { 2670 int r; 2671 2672 DRM_INFO("amdgpu: finishing device.\n"); 2673 adev->shutdown = true; 2674 /* disable all interrupts */ 2675 amdgpu_irq_disable_all(adev); 2676 if (adev->mode_info.mode_config_initialized){ 2677 if (!amdgpu_device_has_dc_support(adev)) 2678 drm_crtc_force_disable_all(adev->ddev); 2679 else 2680 drm_atomic_helper_shutdown(adev->ddev); 2681 } 2682 amdgpu_ib_pool_fini(adev); 2683 amdgpu_fence_driver_fini(adev); 2684 amdgpu_pm_sysfs_fini(adev); 2685 amdgpu_fbdev_fini(adev); 2686 r = amdgpu_device_ip_fini(adev); 2687 if (adev->firmware.gpu_info_fw) { 2688 release_firmware(adev->firmware.gpu_info_fw); 2689 adev->firmware.gpu_info_fw = NULL; 2690 } 2691 adev->accel_working = false; 2692 cancel_delayed_work_sync(&adev->late_init_work); 2693 /* free i2c buses */ 2694 if (!amdgpu_device_has_dc_support(adev)) 2695 amdgpu_i2c_fini(adev); 2696 2697 if (amdgpu_emu_mode != 1) 2698 amdgpu_atombios_fini(adev); 2699 2700 kfree(adev->bios); 2701 adev->bios = NULL; 2702 if (!pci_is_thunderbolt_attached(adev->pdev)) 2703 vga_switcheroo_unregister_client(adev->pdev); 2704 if (adev->flags & AMD_IS_PX) 2705 vga_switcheroo_fini_domain_pm_ops(adev->dev); 2706 vga_client_register(adev->pdev, NULL, NULL, NULL); 2707 if (adev->rio_mem) 2708 pci_iounmap(adev->pdev, adev->rio_mem); 2709 adev->rio_mem = NULL; 2710 iounmap(adev->rmmio); 2711 adev->rmmio = NULL; 2712 amdgpu_device_doorbell_fini(adev); 2713 amdgpu_debugfs_regs_cleanup(adev); 2714 } 2715 2716 2717 /* 2718 * Suspend & resume. 2719 */ 2720 /** 2721 * amdgpu_device_suspend - initiate device suspend 2722 * 2723 * @dev: drm dev pointer 2724 * @suspend: suspend state 2725 * @fbcon : notify the fbdev of suspend 2726 * 2727 * Puts the hw in the suspend state (all asics). 2728 * Returns 0 for success or an error on failure. 2729 * Called at driver suspend. 2730 */ 2731 int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) 2732 { 2733 struct amdgpu_device *adev; 2734 struct drm_crtc *crtc; 2735 struct drm_connector *connector; 2736 int r; 2737 2738 if (dev == NULL || dev->dev_private == NULL) { 2739 return -ENODEV; 2740 } 2741 2742 adev = dev->dev_private; 2743 2744 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 2745 return 0; 2746 2747 adev->in_suspend = true; 2748 drm_kms_helper_poll_disable(dev); 2749 2750 if (fbcon) 2751 amdgpu_fbdev_set_suspend(adev, 1); 2752 2753 cancel_delayed_work_sync(&adev->late_init_work); 2754 2755 if (!amdgpu_device_has_dc_support(adev)) { 2756 /* turn off display hw */ 2757 drm_modeset_lock_all(dev); 2758 list_for_each_entry(connector, &dev->mode_config.connector_list, head) { 2759 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); 2760 } 2761 drm_modeset_unlock_all(dev); 2762 /* unpin the front buffers and cursors */ 2763 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 2764 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2765 struct drm_framebuffer *fb = crtc->primary->fb; 2766 struct amdgpu_bo *robj; 2767 2768 if (amdgpu_crtc->cursor_bo) { 2769 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2770 r = amdgpu_bo_reserve(aobj, true); 2771 if (r == 0) { 2772 amdgpu_bo_unpin(aobj); 2773 amdgpu_bo_unreserve(aobj); 2774 } 2775 } 2776 2777 if (fb == NULL || fb->obj[0] == NULL) { 2778 continue; 2779 } 2780 robj = gem_to_amdgpu_bo(fb->obj[0]); 2781 /* don't unpin kernel fb objects */ 2782 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { 2783 r = amdgpu_bo_reserve(robj, true); 2784 if (r == 0) { 2785 amdgpu_bo_unpin(robj); 2786 amdgpu_bo_unreserve(robj); 2787 } 2788 } 2789 } 2790 } 2791 2792 amdgpu_amdkfd_suspend(adev); 2793 2794 r = amdgpu_device_ip_suspend_phase1(adev); 2795 2796 /* evict vram memory */ 2797 amdgpu_bo_evict_vram(adev); 2798 2799 amdgpu_fence_driver_suspend(adev); 2800 2801 r = amdgpu_device_ip_suspend_phase2(adev); 2802 2803 /* evict remaining vram memory 2804 * This second call to evict vram is to evict the gart page table 2805 * using the CPU. 2806 */ 2807 amdgpu_bo_evict_vram(adev); 2808 2809 pci_save_state(dev->pdev); 2810 if (suspend) { 2811 /* Shut down the device */ 2812 pci_disable_device(dev->pdev); 2813 pci_set_power_state(dev->pdev, PCI_D3hot); 2814 } else { 2815 r = amdgpu_asic_reset(adev); 2816 if (r) 2817 DRM_ERROR("amdgpu asic reset failed\n"); 2818 } 2819 2820 return 0; 2821 } 2822 2823 /** 2824 * amdgpu_device_resume - initiate device resume 2825 * 2826 * @dev: drm dev pointer 2827 * @resume: resume state 2828 * @fbcon : notify the fbdev of resume 2829 * 2830 * Bring the hw back to operating state (all asics). 2831 * Returns 0 for success or an error on failure. 2832 * Called at driver resume. 2833 */ 2834 int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) 2835 { 2836 struct drm_connector *connector; 2837 struct amdgpu_device *adev = dev->dev_private; 2838 struct drm_crtc *crtc; 2839 int r = 0; 2840 2841 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 2842 return 0; 2843 2844 if (resume) { 2845 pci_set_power_state(dev->pdev, PCI_D0); 2846 pci_restore_state(dev->pdev); 2847 r = pci_enable_device(dev->pdev); 2848 if (r) 2849 return r; 2850 } 2851 2852 /* post card */ 2853 if (amdgpu_device_need_post(adev)) { 2854 r = amdgpu_atom_asic_init(adev->mode_info.atom_context); 2855 if (r) 2856 DRM_ERROR("amdgpu asic init failed\n"); 2857 } 2858 2859 r = amdgpu_device_ip_resume(adev); 2860 if (r) { 2861 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r); 2862 return r; 2863 } 2864 amdgpu_fence_driver_resume(adev); 2865 2866 2867 r = amdgpu_device_ip_late_init(adev); 2868 if (r) 2869 return r; 2870 2871 if (!amdgpu_device_has_dc_support(adev)) { 2872 /* pin cursors */ 2873 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 2874 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2875 2876 if (amdgpu_crtc->cursor_bo) { 2877 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2878 r = amdgpu_bo_reserve(aobj, true); 2879 if (r == 0) { 2880 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); 2881 if (r != 0) 2882 DRM_ERROR("Failed to pin cursor BO (%d)\n", r); 2883 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj); 2884 amdgpu_bo_unreserve(aobj); 2885 } 2886 } 2887 } 2888 } 2889 r = amdgpu_amdkfd_resume(adev); 2890 if (r) 2891 return r; 2892 2893 /* Make sure IB tests flushed */ 2894 flush_delayed_work(&adev->late_init_work); 2895 2896 /* blat the mode back in */ 2897 if (fbcon) { 2898 if (!amdgpu_device_has_dc_support(adev)) { 2899 /* pre DCE11 */ 2900 drm_helper_resume_force_mode(dev); 2901 2902 /* turn on display hw */ 2903 drm_modeset_lock_all(dev); 2904 list_for_each_entry(connector, &dev->mode_config.connector_list, head) { 2905 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); 2906 } 2907 drm_modeset_unlock_all(dev); 2908 } 2909 amdgpu_fbdev_set_suspend(adev, 0); 2910 } 2911 2912 drm_kms_helper_poll_enable(dev); 2913 2914 /* 2915 * Most of the connector probing functions try to acquire runtime pm 2916 * refs to ensure that the GPU is powered on when connector polling is 2917 * performed. Since we're calling this from a runtime PM callback, 2918 * trying to acquire rpm refs will cause us to deadlock. 2919 * 2920 * Since we're guaranteed to be holding the rpm lock, it's safe to 2921 * temporarily disable the rpm helpers so this doesn't deadlock us. 2922 */ 2923 #ifdef CONFIG_PM 2924 dev->dev->power.disable_depth++; 2925 #endif 2926 if (!amdgpu_device_has_dc_support(adev)) 2927 drm_helper_hpd_irq_event(dev); 2928 else 2929 drm_kms_helper_hotplug_event(dev); 2930 #ifdef CONFIG_PM 2931 dev->dev->power.disable_depth--; 2932 #endif 2933 adev->in_suspend = false; 2934 2935 return 0; 2936 } 2937 2938 /** 2939 * amdgpu_device_ip_check_soft_reset - did soft reset succeed 2940 * 2941 * @adev: amdgpu_device pointer 2942 * 2943 * The list of all the hardware IPs that make up the asic is walked and 2944 * the check_soft_reset callbacks are run. check_soft_reset determines 2945 * if the asic is still hung or not. 2946 * Returns true if any of the IPs are still in a hung state, false if not. 2947 */ 2948 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) 2949 { 2950 int i; 2951 bool asic_hang = false; 2952 2953 if (amdgpu_sriov_vf(adev)) 2954 return true; 2955 2956 if (amdgpu_asic_need_full_reset(adev)) 2957 return true; 2958 2959 for (i = 0; i < adev->num_ip_blocks; i++) { 2960 if (!adev->ip_blocks[i].status.valid) 2961 continue; 2962 if (adev->ip_blocks[i].version->funcs->check_soft_reset) 2963 adev->ip_blocks[i].status.hang = 2964 adev->ip_blocks[i].version->funcs->check_soft_reset(adev); 2965 if (adev->ip_blocks[i].status.hang) { 2966 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name); 2967 asic_hang = true; 2968 } 2969 } 2970 return asic_hang; 2971 } 2972 2973 /** 2974 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset 2975 * 2976 * @adev: amdgpu_device pointer 2977 * 2978 * The list of all the hardware IPs that make up the asic is walked and the 2979 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset 2980 * handles any IP specific hardware or software state changes that are 2981 * necessary for a soft reset to succeed. 2982 * Returns 0 on success, negative error code on failure. 2983 */ 2984 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) 2985 { 2986 int i, r = 0; 2987 2988 for (i = 0; i < adev->num_ip_blocks; i++) { 2989 if (!adev->ip_blocks[i].status.valid) 2990 continue; 2991 if (adev->ip_blocks[i].status.hang && 2992 adev->ip_blocks[i].version->funcs->pre_soft_reset) { 2993 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev); 2994 if (r) 2995 return r; 2996 } 2997 } 2998 2999 return 0; 3000 } 3001 3002 /** 3003 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed 3004 * 3005 * @adev: amdgpu_device pointer 3006 * 3007 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu 3008 * reset is necessary to recover. 3009 * Returns true if a full asic reset is required, false if not. 3010 */ 3011 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) 3012 { 3013 int i; 3014 3015 if (amdgpu_asic_need_full_reset(adev)) 3016 return true; 3017 3018 for (i = 0; i < adev->num_ip_blocks; i++) { 3019 if (!adev->ip_blocks[i].status.valid) 3020 continue; 3021 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) || 3022 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) || 3023 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) || 3024 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) || 3025 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 3026 if (adev->ip_blocks[i].status.hang) { 3027 DRM_INFO("Some block need full reset!\n"); 3028 return true; 3029 } 3030 } 3031 } 3032 return false; 3033 } 3034 3035 /** 3036 * amdgpu_device_ip_soft_reset - do a soft reset 3037 * 3038 * @adev: amdgpu_device pointer 3039 * 3040 * The list of all the hardware IPs that make up the asic is walked and the 3041 * soft_reset callbacks are run if the block is hung. soft_reset handles any 3042 * IP specific hardware or software state changes that are necessary to soft 3043 * reset the IP. 3044 * Returns 0 on success, negative error code on failure. 3045 */ 3046 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) 3047 { 3048 int i, r = 0; 3049 3050 for (i = 0; i < adev->num_ip_blocks; i++) { 3051 if (!adev->ip_blocks[i].status.valid) 3052 continue; 3053 if (adev->ip_blocks[i].status.hang && 3054 adev->ip_blocks[i].version->funcs->soft_reset) { 3055 r = adev->ip_blocks[i].version->funcs->soft_reset(adev); 3056 if (r) 3057 return r; 3058 } 3059 } 3060 3061 return 0; 3062 } 3063 3064 /** 3065 * amdgpu_device_ip_post_soft_reset - clean up from soft reset 3066 * 3067 * @adev: amdgpu_device pointer 3068 * 3069 * The list of all the hardware IPs that make up the asic is walked and the 3070 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset 3071 * handles any IP specific hardware or software state changes that are 3072 * necessary after the IP has been soft reset. 3073 * Returns 0 on success, negative error code on failure. 3074 */ 3075 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) 3076 { 3077 int i, r = 0; 3078 3079 for (i = 0; i < adev->num_ip_blocks; i++) { 3080 if (!adev->ip_blocks[i].status.valid) 3081 continue; 3082 if (adev->ip_blocks[i].status.hang && 3083 adev->ip_blocks[i].version->funcs->post_soft_reset) 3084 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev); 3085 if (r) 3086 return r; 3087 } 3088 3089 return 0; 3090 } 3091 3092 /** 3093 * amdgpu_device_recover_vram - Recover some VRAM contents 3094 * 3095 * @adev: amdgpu_device pointer 3096 * 3097 * Restores the contents of VRAM buffers from the shadows in GTT. Used to 3098 * restore things like GPUVM page tables after a GPU reset where 3099 * the contents of VRAM might be lost. 3100 * 3101 * Returns: 3102 * 0 on success, negative error code on failure. 3103 */ 3104 static int amdgpu_device_recover_vram(struct amdgpu_device *adev) 3105 { 3106 struct dma_fence *fence = NULL, *next = NULL; 3107 struct amdgpu_bo *shadow; 3108 long r = 1, tmo; 3109 3110 if (amdgpu_sriov_runtime(adev)) 3111 tmo = msecs_to_jiffies(8000); 3112 else 3113 tmo = msecs_to_jiffies(100); 3114 3115 DRM_INFO("recover vram bo from shadow start\n"); 3116 mutex_lock(&adev->shadow_list_lock); 3117 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) { 3118 3119 /* No need to recover an evicted BO */ 3120 if (shadow->tbo.mem.mem_type != TTM_PL_TT || 3121 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM) 3122 continue; 3123 3124 r = amdgpu_bo_restore_shadow(shadow, &next); 3125 if (r) 3126 break; 3127 3128 if (fence) { 3129 r = dma_fence_wait_timeout(fence, false, tmo); 3130 dma_fence_put(fence); 3131 fence = next; 3132 if (r <= 0) 3133 break; 3134 } else { 3135 fence = next; 3136 } 3137 } 3138 mutex_unlock(&adev->shadow_list_lock); 3139 3140 if (fence) 3141 tmo = dma_fence_wait_timeout(fence, false, tmo); 3142 dma_fence_put(fence); 3143 3144 if (r <= 0 || tmo <= 0) { 3145 DRM_ERROR("recover vram bo from shadow failed\n"); 3146 return -EIO; 3147 } 3148 3149 DRM_INFO("recover vram bo from shadow done\n"); 3150 return 0; 3151 } 3152 3153 /** 3154 * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough 3155 * 3156 * @adev: amdgpu device pointer 3157 * 3158 * attempt to do soft-reset or full-reset and reinitialize Asic 3159 * return 0 means succeeded otherwise failed 3160 */ 3161 static int amdgpu_device_reset(struct amdgpu_device *adev) 3162 { 3163 bool need_full_reset, vram_lost = 0; 3164 int r; 3165 3166 need_full_reset = amdgpu_device_ip_need_full_reset(adev); 3167 3168 if (!need_full_reset) { 3169 amdgpu_device_ip_pre_soft_reset(adev); 3170 r = amdgpu_device_ip_soft_reset(adev); 3171 amdgpu_device_ip_post_soft_reset(adev); 3172 if (r || amdgpu_device_ip_check_soft_reset(adev)) { 3173 DRM_INFO("soft reset failed, will fallback to full reset!\n"); 3174 need_full_reset = true; 3175 } 3176 } 3177 3178 if (need_full_reset) { 3179 r = amdgpu_device_ip_suspend(adev); 3180 3181 retry: 3182 r = amdgpu_asic_reset(adev); 3183 /* post card */ 3184 amdgpu_atom_asic_init(adev->mode_info.atom_context); 3185 3186 if (!r) { 3187 dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); 3188 r = amdgpu_device_ip_resume_phase1(adev); 3189 if (r) 3190 goto out; 3191 3192 vram_lost = amdgpu_device_check_vram_lost(adev); 3193 if (vram_lost) { 3194 DRM_ERROR("VRAM is lost!\n"); 3195 atomic_inc(&adev->vram_lost_counter); 3196 } 3197 3198 r = amdgpu_gtt_mgr_recover( 3199 &adev->mman.bdev.man[TTM_PL_TT]); 3200 if (r) 3201 goto out; 3202 3203 r = amdgpu_device_fw_loading(adev); 3204 if (r) 3205 return r; 3206 3207 r = amdgpu_device_ip_resume_phase2(adev); 3208 if (r) 3209 goto out; 3210 3211 if (vram_lost) 3212 amdgpu_device_fill_reset_magic(adev); 3213 } 3214 } 3215 3216 out: 3217 if (!r) { 3218 amdgpu_irq_gpu_reset_resume_helper(adev); 3219 r = amdgpu_ib_ring_tests(adev); 3220 if (r) { 3221 dev_err(adev->dev, "ib ring test failed (%d).\n", r); 3222 r = amdgpu_device_ip_suspend(adev); 3223 need_full_reset = true; 3224 goto retry; 3225 } 3226 } 3227 3228 if (!r) 3229 r = amdgpu_device_recover_vram(adev); 3230 3231 return r; 3232 } 3233 3234 /** 3235 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf 3236 * 3237 * @adev: amdgpu device pointer 3238 * @from_hypervisor: request from hypervisor 3239 * 3240 * do VF FLR and reinitialize Asic 3241 * return 0 means succeeded otherwise failed 3242 */ 3243 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, 3244 bool from_hypervisor) 3245 { 3246 int r; 3247 3248 if (from_hypervisor) 3249 r = amdgpu_virt_request_full_gpu(adev, true); 3250 else 3251 r = amdgpu_virt_reset_gpu(adev); 3252 if (r) 3253 return r; 3254 3255 /* Resume IP prior to SMC */ 3256 r = amdgpu_device_ip_reinit_early_sriov(adev); 3257 if (r) 3258 goto error; 3259 3260 /* we need recover gart prior to run SMC/CP/SDMA resume */ 3261 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]); 3262 3263 r = amdgpu_device_fw_loading(adev); 3264 if (r) 3265 return r; 3266 3267 /* now we are okay to resume SMC/CP/SDMA */ 3268 r = amdgpu_device_ip_reinit_late_sriov(adev); 3269 if (r) 3270 goto error; 3271 3272 amdgpu_irq_gpu_reset_resume_helper(adev); 3273 r = amdgpu_ib_ring_tests(adev); 3274 3275 error: 3276 amdgpu_virt_release_full_gpu(adev, true); 3277 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { 3278 atomic_inc(&adev->vram_lost_counter); 3279 r = amdgpu_device_recover_vram(adev); 3280 } 3281 3282 return r; 3283 } 3284 3285 /** 3286 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery 3287 * 3288 * @adev: amdgpu device pointer 3289 * 3290 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover 3291 * a hung GPU. 3292 */ 3293 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) 3294 { 3295 if (!amdgpu_device_ip_check_soft_reset(adev)) { 3296 DRM_INFO("Timeout, but no hardware hang detected.\n"); 3297 return false; 3298 } 3299 3300 if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1 && 3301 !amdgpu_sriov_vf(adev))) { 3302 DRM_INFO("GPU recovery disabled.\n"); 3303 return false; 3304 } 3305 3306 return true; 3307 } 3308 3309 /** 3310 * amdgpu_device_gpu_recover - reset the asic and recover scheduler 3311 * 3312 * @adev: amdgpu device pointer 3313 * @job: which job trigger hang 3314 * 3315 * Attempt to reset the GPU if it has hung (all asics). 3316 * Returns 0 for success or an error on failure. 3317 */ 3318 int amdgpu_device_gpu_recover(struct amdgpu_device *adev, 3319 struct amdgpu_job *job) 3320 { 3321 int i, r, resched; 3322 3323 dev_info(adev->dev, "GPU reset begin!\n"); 3324 3325 mutex_lock(&adev->lock_reset); 3326 atomic_inc(&adev->gpu_reset_counter); 3327 adev->in_gpu_reset = 1; 3328 3329 /* Block kfd */ 3330 amdgpu_amdkfd_pre_reset(adev); 3331 3332 /* block TTM */ 3333 resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); 3334 3335 /* block all schedulers and reset given job's ring */ 3336 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 3337 struct amdgpu_ring *ring = adev->rings[i]; 3338 3339 if (!ring || !ring->sched.thread) 3340 continue; 3341 3342 kthread_park(ring->sched.thread); 3343 3344 if (job && job->base.sched == &ring->sched) 3345 continue; 3346 3347 drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL); 3348 3349 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 3350 amdgpu_fence_driver_force_completion(ring); 3351 } 3352 3353 if (amdgpu_sriov_vf(adev)) 3354 r = amdgpu_device_reset_sriov(adev, job ? false : true); 3355 else 3356 r = amdgpu_device_reset(adev); 3357 3358 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 3359 struct amdgpu_ring *ring = adev->rings[i]; 3360 3361 if (!ring || !ring->sched.thread) 3362 continue; 3363 3364 /* only need recovery sched of the given job's ring 3365 * or all rings (in the case @job is NULL) 3366 * after above amdgpu_reset accomplished 3367 */ 3368 if ((!job || job->base.sched == &ring->sched) && !r) 3369 drm_sched_job_recovery(&ring->sched); 3370 3371 kthread_unpark(ring->sched.thread); 3372 } 3373 3374 if (!amdgpu_device_has_dc_support(adev)) { 3375 drm_helper_resume_force_mode(adev->ddev); 3376 } 3377 3378 ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); 3379 3380 if (r) { 3381 /* bad news, how to tell it to userspace ? */ 3382 dev_info(adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter)); 3383 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); 3384 } else { 3385 dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter)); 3386 } 3387 3388 /*unlock kfd */ 3389 amdgpu_amdkfd_post_reset(adev); 3390 amdgpu_vf_error_trans_all(adev); 3391 adev->in_gpu_reset = 0; 3392 mutex_unlock(&adev->lock_reset); 3393 return r; 3394 } 3395 3396 /** 3397 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot 3398 * 3399 * @adev: amdgpu_device pointer 3400 * 3401 * Fetchs and stores in the driver the PCIE capabilities (gen speed 3402 * and lanes) of the slot the device is in. Handles APUs and 3403 * virtualized environments where PCIE config space may not be available. 3404 */ 3405 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) 3406 { 3407 struct pci_dev *pdev; 3408 enum pci_bus_speed speed_cap; 3409 enum pcie_link_width link_width; 3410 3411 if (amdgpu_pcie_gen_cap) 3412 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; 3413 3414 if (amdgpu_pcie_lane_cap) 3415 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap; 3416 3417 /* covers APUs as well */ 3418 if (pci_is_root_bus(adev->pdev->bus)) { 3419 if (adev->pm.pcie_gen_mask == 0) 3420 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK; 3421 if (adev->pm.pcie_mlw_mask == 0) 3422 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK; 3423 return; 3424 } 3425 3426 if (adev->pm.pcie_gen_mask == 0) { 3427 /* asic caps */ 3428 pdev = adev->pdev; 3429 speed_cap = pcie_get_speed_cap(pdev); 3430 if (speed_cap == PCI_SPEED_UNKNOWN) { 3431 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3432 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3433 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 3434 } else { 3435 if (speed_cap == PCIE_SPEED_16_0GT) 3436 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3437 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3438 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | 3439 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4); 3440 else if (speed_cap == PCIE_SPEED_8_0GT) 3441 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3442 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3443 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 3444 else if (speed_cap == PCIE_SPEED_5_0GT) 3445 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3446 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2); 3447 else 3448 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; 3449 } 3450 /* platform caps */ 3451 pdev = adev->ddev->pdev->bus->self; 3452 speed_cap = pcie_get_speed_cap(pdev); 3453 if (speed_cap == PCI_SPEED_UNKNOWN) { 3454 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3455 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 3456 } else { 3457 if (speed_cap == PCIE_SPEED_16_0GT) 3458 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3459 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3460 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | 3461 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); 3462 else if (speed_cap == PCIE_SPEED_8_0GT) 3463 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3464 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 3465 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); 3466 else if (speed_cap == PCIE_SPEED_5_0GT) 3467 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 3468 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 3469 else 3470 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1; 3471 3472 } 3473 } 3474 if (adev->pm.pcie_mlw_mask == 0) { 3475 pdev = adev->ddev->pdev->bus->self; 3476 link_width = pcie_get_width_cap(pdev); 3477 if (link_width == PCIE_LNK_WIDTH_UNKNOWN) { 3478 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; 3479 } else { 3480 switch (link_width) { 3481 case PCIE_LNK_X32: 3482 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | 3483 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 3484 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 3485 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3486 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3487 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3488 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3489 break; 3490 case PCIE_LNK_X16: 3491 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 3492 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 3493 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3494 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3495 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3496 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3497 break; 3498 case PCIE_LNK_X12: 3499 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 3500 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3501 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3502 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3503 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3504 break; 3505 case PCIE_LNK_X8: 3506 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 3507 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3508 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3509 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3510 break; 3511 case PCIE_LNK_X4: 3512 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 3513 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3514 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3515 break; 3516 case PCIE_LNK_X2: 3517 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 3518 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 3519 break; 3520 case PCIE_LNK_X1: 3521 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; 3522 break; 3523 default: 3524 break; 3525 } 3526 } 3527 } 3528 } 3529 3530