xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c (revision 72251fac062c0b4fe98670ec9e3db3f0702c50ae)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 
34 #include <drm/drm_atomic_helper.h>
35 #include <drm/drm_probe_helper.h>
36 #include <drm/amdgpu_drm.h>
37 #include <linux/vgaarb.h>
38 #include <linux/vga_switcheroo.h>
39 #include <linux/efi.h>
40 #include "amdgpu.h"
41 #include "amdgpu_trace.h"
42 #include "amdgpu_i2c.h"
43 #include "atom.h"
44 #include "amdgpu_atombios.h"
45 #include "amdgpu_atomfirmware.h"
46 #include "amd_pcie.h"
47 #ifdef CONFIG_DRM_AMDGPU_SI
48 #include "si.h"
49 #endif
50 #ifdef CONFIG_DRM_AMDGPU_CIK
51 #include "cik.h"
52 #endif
53 #include "vi.h"
54 #include "soc15.h"
55 #include "nv.h"
56 #include "bif/bif_4_1_d.h"
57 #include <linux/pci.h>
58 #include <linux/firmware.h>
59 #include "amdgpu_vf_error.h"
60 
61 #include "amdgpu_amdkfd.h"
62 #include "amdgpu_pm.h"
63 
64 #include "amdgpu_xgmi.h"
65 #include "amdgpu_ras.h"
66 #include "amdgpu_pmu.h"
67 
68 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
69 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
70 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
71 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
72 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
73 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
74 MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
75 MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
76 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
77 
78 #define AMDGPU_RESUME_MS		2000
79 
80 static const char *amdgpu_asic_name[] = {
81 	"TAHITI",
82 	"PITCAIRN",
83 	"VERDE",
84 	"OLAND",
85 	"HAINAN",
86 	"BONAIRE",
87 	"KAVERI",
88 	"KABINI",
89 	"HAWAII",
90 	"MULLINS",
91 	"TOPAZ",
92 	"TONGA",
93 	"FIJI",
94 	"CARRIZO",
95 	"STONEY",
96 	"POLARIS10",
97 	"POLARIS11",
98 	"POLARIS12",
99 	"VEGAM",
100 	"VEGA10",
101 	"VEGA12",
102 	"VEGA20",
103 	"RAVEN",
104 	"ARCTURUS",
105 	"NAVI10",
106 	"NAVI14",
107 	"NAVI12",
108 	"LAST",
109 };
110 
111 /**
112  * DOC: pcie_replay_count
113  *
114  * The amdgpu driver provides a sysfs API for reporting the total number
115  * of PCIe replays (NAKs)
116  * The file pcie_replay_count is used for this and returns the total
117  * number of replays as a sum of the NAKs generated and NAKs received
118  */
119 
120 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
121 		struct device_attribute *attr, char *buf)
122 {
123 	struct drm_device *ddev = dev_get_drvdata(dev);
124 	struct amdgpu_device *adev = ddev->dev_private;
125 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
126 
127 	return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
128 }
129 
130 static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
131 		amdgpu_device_get_pcie_replay_count, NULL);
132 
133 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
134 
135 /**
136  * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
137  *
138  * @dev: drm_device pointer
139  *
140  * Returns true if the device is a dGPU with HG/PX power control,
141  * otherwise return false.
142  */
143 bool amdgpu_device_is_px(struct drm_device *dev)
144 {
145 	struct amdgpu_device *adev = dev->dev_private;
146 
147 	if (adev->flags & AMD_IS_PX)
148 		return true;
149 	return false;
150 }
151 
152 /*
153  * MMIO register access helper functions.
154  */
155 /**
156  * amdgpu_mm_rreg - read a memory mapped IO register
157  *
158  * @adev: amdgpu_device pointer
159  * @reg: dword aligned register offset
160  * @acc_flags: access flags which require special behavior
161  *
162  * Returns the 32 bit value from the offset specified.
163  */
164 uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
165 			uint32_t acc_flags)
166 {
167 	uint32_t ret;
168 
169 	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
170 		return amdgpu_virt_kiq_rreg(adev, reg);
171 
172 	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
173 		ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
174 	else {
175 		unsigned long flags;
176 
177 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
178 		writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
179 		ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
180 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
181 	}
182 	trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
183 	return ret;
184 }
185 
186 /*
187  * MMIO register read with bytes helper functions
188  * @offset:bytes offset from MMIO start
189  *
190 */
191 
192 /**
193  * amdgpu_mm_rreg8 - read a memory mapped IO register
194  *
195  * @adev: amdgpu_device pointer
196  * @offset: byte aligned register offset
197  *
198  * Returns the 8 bit value from the offset specified.
199  */
200 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
201 	if (offset < adev->rmmio_size)
202 		return (readb(adev->rmmio + offset));
203 	BUG();
204 }
205 
206 /*
207  * MMIO register write with bytes helper functions
208  * @offset:bytes offset from MMIO start
209  * @value: the value want to be written to the register
210  *
211 */
212 /**
213  * amdgpu_mm_wreg8 - read a memory mapped IO register
214  *
215  * @adev: amdgpu_device pointer
216  * @offset: byte aligned register offset
217  * @value: 8 bit value to write
218  *
219  * Writes the value specified to the offset specified.
220  */
221 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
222 	if (offset < adev->rmmio_size)
223 		writeb(value, adev->rmmio + offset);
224 	else
225 		BUG();
226 }
227 
228 /**
229  * amdgpu_mm_wreg - write to a memory mapped IO register
230  *
231  * @adev: amdgpu_device pointer
232  * @reg: dword aligned register offset
233  * @v: 32 bit value to write to the register
234  * @acc_flags: access flags which require special behavior
235  *
236  * Writes the value specified to the offset specified.
237  */
238 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
239 		    uint32_t acc_flags)
240 {
241 	trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
242 
243 	if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
244 		adev->last_mm_index = v;
245 	}
246 
247 	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
248 		return amdgpu_virt_kiq_wreg(adev, reg, v);
249 
250 	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
251 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
252 	else {
253 		unsigned long flags;
254 
255 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
256 		writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
257 		writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
258 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
259 	}
260 
261 	if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
262 		udelay(500);
263 	}
264 }
265 
266 /**
267  * amdgpu_io_rreg - read an IO register
268  *
269  * @adev: amdgpu_device pointer
270  * @reg: dword aligned register offset
271  *
272  * Returns the 32 bit value from the offset specified.
273  */
274 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
275 {
276 	if ((reg * 4) < adev->rio_mem_size)
277 		return ioread32(adev->rio_mem + (reg * 4));
278 	else {
279 		iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
280 		return ioread32(adev->rio_mem + (mmMM_DATA * 4));
281 	}
282 }
283 
284 /**
285  * amdgpu_io_wreg - write to an IO register
286  *
287  * @adev: amdgpu_device pointer
288  * @reg: dword aligned register offset
289  * @v: 32 bit value to write to the register
290  *
291  * Writes the value specified to the offset specified.
292  */
293 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
294 {
295 	if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
296 		adev->last_mm_index = v;
297 	}
298 
299 	if ((reg * 4) < adev->rio_mem_size)
300 		iowrite32(v, adev->rio_mem + (reg * 4));
301 	else {
302 		iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
303 		iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
304 	}
305 
306 	if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
307 		udelay(500);
308 	}
309 }
310 
311 /**
312  * amdgpu_mm_rdoorbell - read a doorbell dword
313  *
314  * @adev: amdgpu_device pointer
315  * @index: doorbell index
316  *
317  * Returns the value in the doorbell aperture at the
318  * requested doorbell index (CIK).
319  */
320 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
321 {
322 	if (index < adev->doorbell.num_doorbells) {
323 		return readl(adev->doorbell.ptr + index);
324 	} else {
325 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
326 		return 0;
327 	}
328 }
329 
330 /**
331  * amdgpu_mm_wdoorbell - write a doorbell dword
332  *
333  * @adev: amdgpu_device pointer
334  * @index: doorbell index
335  * @v: value to write
336  *
337  * Writes @v to the doorbell aperture at the
338  * requested doorbell index (CIK).
339  */
340 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
341 {
342 	if (index < adev->doorbell.num_doorbells) {
343 		writel(v, adev->doorbell.ptr + index);
344 	} else {
345 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
346 	}
347 }
348 
349 /**
350  * amdgpu_mm_rdoorbell64 - read a doorbell Qword
351  *
352  * @adev: amdgpu_device pointer
353  * @index: doorbell index
354  *
355  * Returns the value in the doorbell aperture at the
356  * requested doorbell index (VEGA10+).
357  */
358 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
359 {
360 	if (index < adev->doorbell.num_doorbells) {
361 		return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
362 	} else {
363 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
364 		return 0;
365 	}
366 }
367 
368 /**
369  * amdgpu_mm_wdoorbell64 - write a doorbell Qword
370  *
371  * @adev: amdgpu_device pointer
372  * @index: doorbell index
373  * @v: value to write
374  *
375  * Writes @v to the doorbell aperture at the
376  * requested doorbell index (VEGA10+).
377  */
378 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
379 {
380 	if (index < adev->doorbell.num_doorbells) {
381 		atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
382 	} else {
383 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
384 	}
385 }
386 
387 /**
388  * amdgpu_invalid_rreg - dummy reg read function
389  *
390  * @adev: amdgpu device pointer
391  * @reg: offset of register
392  *
393  * Dummy register read function.  Used for register blocks
394  * that certain asics don't have (all asics).
395  * Returns the value in the register.
396  */
397 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
398 {
399 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
400 	BUG();
401 	return 0;
402 }
403 
404 /**
405  * amdgpu_invalid_wreg - dummy reg write function
406  *
407  * @adev: amdgpu device pointer
408  * @reg: offset of register
409  * @v: value to write to the register
410  *
411  * Dummy register read function.  Used for register blocks
412  * that certain asics don't have (all asics).
413  */
414 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
415 {
416 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
417 		  reg, v);
418 	BUG();
419 }
420 
421 /**
422  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
423  *
424  * @adev: amdgpu device pointer
425  * @reg: offset of register
426  *
427  * Dummy register read function.  Used for register blocks
428  * that certain asics don't have (all asics).
429  * Returns the value in the register.
430  */
431 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
432 {
433 	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
434 	BUG();
435 	return 0;
436 }
437 
438 /**
439  * amdgpu_invalid_wreg64 - dummy reg write function
440  *
441  * @adev: amdgpu device pointer
442  * @reg: offset of register
443  * @v: value to write to the register
444  *
445  * Dummy register read function.  Used for register blocks
446  * that certain asics don't have (all asics).
447  */
448 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
449 {
450 	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
451 		  reg, v);
452 	BUG();
453 }
454 
455 /**
456  * amdgpu_block_invalid_rreg - dummy reg read function
457  *
458  * @adev: amdgpu device pointer
459  * @block: offset of instance
460  * @reg: offset of register
461  *
462  * Dummy register read function.  Used for register blocks
463  * that certain asics don't have (all asics).
464  * Returns the value in the register.
465  */
466 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
467 					  uint32_t block, uint32_t reg)
468 {
469 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
470 		  reg, block);
471 	BUG();
472 	return 0;
473 }
474 
475 /**
476  * amdgpu_block_invalid_wreg - dummy reg write function
477  *
478  * @adev: amdgpu device pointer
479  * @block: offset of instance
480  * @reg: offset of register
481  * @v: value to write to the register
482  *
483  * Dummy register read function.  Used for register blocks
484  * that certain asics don't have (all asics).
485  */
486 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
487 				      uint32_t block,
488 				      uint32_t reg, uint32_t v)
489 {
490 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
491 		  reg, block, v);
492 	BUG();
493 }
494 
495 /**
496  * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
497  *
498  * @adev: amdgpu device pointer
499  *
500  * Allocates a scratch page of VRAM for use by various things in the
501  * driver.
502  */
503 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
504 {
505 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
506 				       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
507 				       &adev->vram_scratch.robj,
508 				       &adev->vram_scratch.gpu_addr,
509 				       (void **)&adev->vram_scratch.ptr);
510 }
511 
512 /**
513  * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
514  *
515  * @adev: amdgpu device pointer
516  *
517  * Frees the VRAM scratch page.
518  */
519 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
520 {
521 	amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
522 }
523 
524 /**
525  * amdgpu_device_program_register_sequence - program an array of registers.
526  *
527  * @adev: amdgpu_device pointer
528  * @registers: pointer to the register array
529  * @array_size: size of the register array
530  *
531  * Programs an array or registers with and and or masks.
532  * This is a helper for setting golden registers.
533  */
534 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
535 					     const u32 *registers,
536 					     const u32 array_size)
537 {
538 	u32 tmp, reg, and_mask, or_mask;
539 	int i;
540 
541 	if (array_size % 3)
542 		return;
543 
544 	for (i = 0; i < array_size; i +=3) {
545 		reg = registers[i + 0];
546 		and_mask = registers[i + 1];
547 		or_mask = registers[i + 2];
548 
549 		if (and_mask == 0xffffffff) {
550 			tmp = or_mask;
551 		} else {
552 			tmp = RREG32(reg);
553 			tmp &= ~and_mask;
554 			if (adev->family >= AMDGPU_FAMILY_AI)
555 				tmp |= (or_mask & and_mask);
556 			else
557 				tmp |= or_mask;
558 		}
559 		WREG32(reg, tmp);
560 	}
561 }
562 
563 /**
564  * amdgpu_device_pci_config_reset - reset the GPU
565  *
566  * @adev: amdgpu_device pointer
567  *
568  * Resets the GPU using the pci config reset sequence.
569  * Only applicable to asics prior to vega10.
570  */
571 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
572 {
573 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
574 }
575 
576 /*
577  * GPU doorbell aperture helpers function.
578  */
579 /**
580  * amdgpu_device_doorbell_init - Init doorbell driver information.
581  *
582  * @adev: amdgpu_device pointer
583  *
584  * Init doorbell driver information (CIK)
585  * Returns 0 on success, error on failure.
586  */
587 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
588 {
589 
590 	/* No doorbell on SI hardware generation */
591 	if (adev->asic_type < CHIP_BONAIRE) {
592 		adev->doorbell.base = 0;
593 		adev->doorbell.size = 0;
594 		adev->doorbell.num_doorbells = 0;
595 		adev->doorbell.ptr = NULL;
596 		return 0;
597 	}
598 
599 	if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
600 		return -EINVAL;
601 
602 	amdgpu_asic_init_doorbell_index(adev);
603 
604 	/* doorbell bar mapping */
605 	adev->doorbell.base = pci_resource_start(adev->pdev, 2);
606 	adev->doorbell.size = pci_resource_len(adev->pdev, 2);
607 
608 	adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
609 					     adev->doorbell_index.max_assignment+1);
610 	if (adev->doorbell.num_doorbells == 0)
611 		return -EINVAL;
612 
613 	/* For Vega, reserve and map two pages on doorbell BAR since SDMA
614 	 * paging queue doorbell use the second page. The
615 	 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
616 	 * doorbells are in the first page. So with paging queue enabled,
617 	 * the max num_doorbells should + 1 page (0x400 in dword)
618 	 */
619 	if (adev->asic_type >= CHIP_VEGA10)
620 		adev->doorbell.num_doorbells += 0x400;
621 
622 	adev->doorbell.ptr = ioremap(adev->doorbell.base,
623 				     adev->doorbell.num_doorbells *
624 				     sizeof(u32));
625 	if (adev->doorbell.ptr == NULL)
626 		return -ENOMEM;
627 
628 	return 0;
629 }
630 
631 /**
632  * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
633  *
634  * @adev: amdgpu_device pointer
635  *
636  * Tear down doorbell driver information (CIK)
637  */
638 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
639 {
640 	iounmap(adev->doorbell.ptr);
641 	adev->doorbell.ptr = NULL;
642 }
643 
644 
645 
646 /*
647  * amdgpu_device_wb_*()
648  * Writeback is the method by which the GPU updates special pages in memory
649  * with the status of certain GPU events (fences, ring pointers,etc.).
650  */
651 
652 /**
653  * amdgpu_device_wb_fini - Disable Writeback and free memory
654  *
655  * @adev: amdgpu_device pointer
656  *
657  * Disables Writeback and frees the Writeback memory (all asics).
658  * Used at driver shutdown.
659  */
660 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
661 {
662 	if (adev->wb.wb_obj) {
663 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
664 				      &adev->wb.gpu_addr,
665 				      (void **)&adev->wb.wb);
666 		adev->wb.wb_obj = NULL;
667 	}
668 }
669 
670 /**
671  * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
672  *
673  * @adev: amdgpu_device pointer
674  *
675  * Initializes writeback and allocates writeback memory (all asics).
676  * Used at driver startup.
677  * Returns 0 on success or an -error on failure.
678  */
679 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
680 {
681 	int r;
682 
683 	if (adev->wb.wb_obj == NULL) {
684 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
685 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
686 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
687 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
688 					    (void **)&adev->wb.wb);
689 		if (r) {
690 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
691 			return r;
692 		}
693 
694 		adev->wb.num_wb = AMDGPU_MAX_WB;
695 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
696 
697 		/* clear wb memory */
698 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
699 	}
700 
701 	return 0;
702 }
703 
704 /**
705  * amdgpu_device_wb_get - Allocate a wb entry
706  *
707  * @adev: amdgpu_device pointer
708  * @wb: wb index
709  *
710  * Allocate a wb slot for use by the driver (all asics).
711  * Returns 0 on success or -EINVAL on failure.
712  */
713 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
714 {
715 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
716 
717 	if (offset < adev->wb.num_wb) {
718 		__set_bit(offset, adev->wb.used);
719 		*wb = offset << 3; /* convert to dw offset */
720 		return 0;
721 	} else {
722 		return -EINVAL;
723 	}
724 }
725 
726 /**
727  * amdgpu_device_wb_free - Free a wb entry
728  *
729  * @adev: amdgpu_device pointer
730  * @wb: wb index
731  *
732  * Free a wb slot allocated for use by the driver (all asics)
733  */
734 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
735 {
736 	wb >>= 3;
737 	if (wb < adev->wb.num_wb)
738 		__clear_bit(wb, adev->wb.used);
739 }
740 
741 /**
742  * amdgpu_device_resize_fb_bar - try to resize FB BAR
743  *
744  * @adev: amdgpu_device pointer
745  *
746  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
747  * to fail, but if any of the BARs is not accessible after the size we abort
748  * driver loading by returning -ENODEV.
749  */
750 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
751 {
752 	u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
753 	u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
754 	struct pci_bus *root;
755 	struct resource *res;
756 	unsigned i;
757 	u16 cmd;
758 	int r;
759 
760 	/* Bypass for VF */
761 	if (amdgpu_sriov_vf(adev))
762 		return 0;
763 
764 	/* Check if the root BUS has 64bit memory resources */
765 	root = adev->pdev->bus;
766 	while (root->parent)
767 		root = root->parent;
768 
769 	pci_bus_for_each_resource(root, res, i) {
770 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
771 		    res->start > 0x100000000ull)
772 			break;
773 	}
774 
775 	/* Trying to resize is pointless without a root hub window above 4GB */
776 	if (!res)
777 		return 0;
778 
779 	/* Disable memory decoding while we change the BAR addresses and size */
780 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
781 	pci_write_config_word(adev->pdev, PCI_COMMAND,
782 			      cmd & ~PCI_COMMAND_MEMORY);
783 
784 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
785 	amdgpu_device_doorbell_fini(adev);
786 	if (adev->asic_type >= CHIP_BONAIRE)
787 		pci_release_resource(adev->pdev, 2);
788 
789 	pci_release_resource(adev->pdev, 0);
790 
791 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
792 	if (r == -ENOSPC)
793 		DRM_INFO("Not enough PCI address space for a large BAR.");
794 	else if (r && r != -ENOTSUPP)
795 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
796 
797 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
798 
799 	/* When the doorbell or fb BAR isn't available we have no chance of
800 	 * using the device.
801 	 */
802 	r = amdgpu_device_doorbell_init(adev);
803 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
804 		return -ENODEV;
805 
806 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
807 
808 	return 0;
809 }
810 
811 /*
812  * GPU helpers function.
813  */
814 /**
815  * amdgpu_device_need_post - check if the hw need post or not
816  *
817  * @adev: amdgpu_device pointer
818  *
819  * Check if the asic has been initialized (all asics) at driver startup
820  * or post is needed if  hw reset is performed.
821  * Returns true if need or false if not.
822  */
823 bool amdgpu_device_need_post(struct amdgpu_device *adev)
824 {
825 	uint32_t reg;
826 
827 	if (amdgpu_sriov_vf(adev))
828 		return false;
829 
830 	if (amdgpu_passthrough(adev)) {
831 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
832 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
833 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
834 		 * vpost executed for smc version below 22.15
835 		 */
836 		if (adev->asic_type == CHIP_FIJI) {
837 			int err;
838 			uint32_t fw_ver;
839 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
840 			/* force vPost if error occured */
841 			if (err)
842 				return true;
843 
844 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
845 			if (fw_ver < 0x00160e00)
846 				return true;
847 		}
848 	}
849 
850 	if (adev->has_hw_reset) {
851 		adev->has_hw_reset = false;
852 		return true;
853 	}
854 
855 	/* bios scratch used on CIK+ */
856 	if (adev->asic_type >= CHIP_BONAIRE)
857 		return amdgpu_atombios_scratch_need_asic_init(adev);
858 
859 	/* check MEM_SIZE for older asics */
860 	reg = amdgpu_asic_get_config_memsize(adev);
861 
862 	if ((reg != 0) && (reg != 0xffffffff))
863 		return false;
864 
865 	return true;
866 }
867 
868 /* if we get transitioned to only one device, take VGA back */
869 /**
870  * amdgpu_device_vga_set_decode - enable/disable vga decode
871  *
872  * @cookie: amdgpu_device pointer
873  * @state: enable/disable vga decode
874  *
875  * Enable/disable vga decode (all asics).
876  * Returns VGA resource flags.
877  */
878 static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
879 {
880 	struct amdgpu_device *adev = cookie;
881 	amdgpu_asic_set_vga_state(adev, state);
882 	if (state)
883 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
884 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
885 	else
886 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
887 }
888 
889 /**
890  * amdgpu_device_check_block_size - validate the vm block size
891  *
892  * @adev: amdgpu_device pointer
893  *
894  * Validates the vm block size specified via module parameter.
895  * The vm block size defines number of bits in page table versus page directory,
896  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
897  * page table and the remaining bits are in the page directory.
898  */
899 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
900 {
901 	/* defines number of bits in page table versus page directory,
902 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
903 	 * page table and the remaining bits are in the page directory */
904 	if (amdgpu_vm_block_size == -1)
905 		return;
906 
907 	if (amdgpu_vm_block_size < 9) {
908 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
909 			 amdgpu_vm_block_size);
910 		amdgpu_vm_block_size = -1;
911 	}
912 }
913 
914 /**
915  * amdgpu_device_check_vm_size - validate the vm size
916  *
917  * @adev: amdgpu_device pointer
918  *
919  * Validates the vm size in GB specified via module parameter.
920  * The VM size is the size of the GPU virtual memory space in GB.
921  */
922 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
923 {
924 	/* no need to check the default value */
925 	if (amdgpu_vm_size == -1)
926 		return;
927 
928 	if (amdgpu_vm_size < 1) {
929 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
930 			 amdgpu_vm_size);
931 		amdgpu_vm_size = -1;
932 	}
933 }
934 
935 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
936 {
937 	struct sysinfo si;
938 	bool is_os_64 = (sizeof(void *) == 8) ? true : false;
939 	uint64_t total_memory;
940 	uint64_t dram_size_seven_GB = 0x1B8000000;
941 	uint64_t dram_size_three_GB = 0xB8000000;
942 
943 	if (amdgpu_smu_memory_pool_size == 0)
944 		return;
945 
946 	if (!is_os_64) {
947 		DRM_WARN("Not 64-bit OS, feature not supported\n");
948 		goto def_value;
949 	}
950 	si_meminfo(&si);
951 	total_memory = (uint64_t)si.totalram * si.mem_unit;
952 
953 	if ((amdgpu_smu_memory_pool_size == 1) ||
954 		(amdgpu_smu_memory_pool_size == 2)) {
955 		if (total_memory < dram_size_three_GB)
956 			goto def_value1;
957 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
958 		(amdgpu_smu_memory_pool_size == 8)) {
959 		if (total_memory < dram_size_seven_GB)
960 			goto def_value1;
961 	} else {
962 		DRM_WARN("Smu memory pool size not supported\n");
963 		goto def_value;
964 	}
965 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
966 
967 	return;
968 
969 def_value1:
970 	DRM_WARN("No enough system memory\n");
971 def_value:
972 	adev->pm.smu_prv_buffer_size = 0;
973 }
974 
975 /**
976  * amdgpu_device_check_arguments - validate module params
977  *
978  * @adev: amdgpu_device pointer
979  *
980  * Validates certain module parameters and updates
981  * the associated values used by the driver (all asics).
982  */
983 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
984 {
985 	int ret = 0;
986 
987 	if (amdgpu_sched_jobs < 4) {
988 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
989 			 amdgpu_sched_jobs);
990 		amdgpu_sched_jobs = 4;
991 	} else if (!is_power_of_2(amdgpu_sched_jobs)){
992 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
993 			 amdgpu_sched_jobs);
994 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
995 	}
996 
997 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
998 		/* gart size must be greater or equal to 32M */
999 		dev_warn(adev->dev, "gart size (%d) too small\n",
1000 			 amdgpu_gart_size);
1001 		amdgpu_gart_size = -1;
1002 	}
1003 
1004 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1005 		/* gtt size must be greater or equal to 32M */
1006 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1007 				 amdgpu_gtt_size);
1008 		amdgpu_gtt_size = -1;
1009 	}
1010 
1011 	/* valid range is between 4 and 9 inclusive */
1012 	if (amdgpu_vm_fragment_size != -1 &&
1013 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1014 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1015 		amdgpu_vm_fragment_size = -1;
1016 	}
1017 
1018 	amdgpu_device_check_smu_prv_buffer_size(adev);
1019 
1020 	amdgpu_device_check_vm_size(adev);
1021 
1022 	amdgpu_device_check_block_size(adev);
1023 
1024 	ret = amdgpu_device_get_job_timeout_settings(adev);
1025 	if (ret) {
1026 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
1027 		return ret;
1028 	}
1029 
1030 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1031 
1032 	return ret;
1033 }
1034 
1035 /**
1036  * amdgpu_switcheroo_set_state - set switcheroo state
1037  *
1038  * @pdev: pci dev pointer
1039  * @state: vga_switcheroo state
1040  *
1041  * Callback for the switcheroo driver.  Suspends or resumes the
1042  * the asics before or after it is powered up using ACPI methods.
1043  */
1044 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1045 {
1046 	struct drm_device *dev = pci_get_drvdata(pdev);
1047 
1048 	if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
1049 		return;
1050 
1051 	if (state == VGA_SWITCHEROO_ON) {
1052 		pr_info("amdgpu: switched on\n");
1053 		/* don't suspend or resume card normally */
1054 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1055 
1056 		amdgpu_device_resume(dev, true, true);
1057 
1058 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1059 		drm_kms_helper_poll_enable(dev);
1060 	} else {
1061 		pr_info("amdgpu: switched off\n");
1062 		drm_kms_helper_poll_disable(dev);
1063 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1064 		amdgpu_device_suspend(dev, true, true);
1065 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1066 	}
1067 }
1068 
1069 /**
1070  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1071  *
1072  * @pdev: pci dev pointer
1073  *
1074  * Callback for the switcheroo driver.  Check of the switcheroo
1075  * state can be changed.
1076  * Returns true if the state can be changed, false if not.
1077  */
1078 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1079 {
1080 	struct drm_device *dev = pci_get_drvdata(pdev);
1081 
1082 	/*
1083 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1084 	* locking inversion with the driver load path. And the access here is
1085 	* completely racy anyway. So don't bother with locking for now.
1086 	*/
1087 	return dev->open_count == 0;
1088 }
1089 
1090 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1091 	.set_gpu_state = amdgpu_switcheroo_set_state,
1092 	.reprobe = NULL,
1093 	.can_switch = amdgpu_switcheroo_can_switch,
1094 };
1095 
1096 /**
1097  * amdgpu_device_ip_set_clockgating_state - set the CG state
1098  *
1099  * @dev: amdgpu_device pointer
1100  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1101  * @state: clockgating state (gate or ungate)
1102  *
1103  * Sets the requested clockgating state for all instances of
1104  * the hardware IP specified.
1105  * Returns the error code from the last instance.
1106  */
1107 int amdgpu_device_ip_set_clockgating_state(void *dev,
1108 					   enum amd_ip_block_type block_type,
1109 					   enum amd_clockgating_state state)
1110 {
1111 	struct amdgpu_device *adev = dev;
1112 	int i, r = 0;
1113 
1114 	for (i = 0; i < adev->num_ip_blocks; i++) {
1115 		if (!adev->ip_blocks[i].status.valid)
1116 			continue;
1117 		if (adev->ip_blocks[i].version->type != block_type)
1118 			continue;
1119 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1120 			continue;
1121 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1122 			(void *)adev, state);
1123 		if (r)
1124 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1125 				  adev->ip_blocks[i].version->funcs->name, r);
1126 	}
1127 	return r;
1128 }
1129 
1130 /**
1131  * amdgpu_device_ip_set_powergating_state - set the PG state
1132  *
1133  * @dev: amdgpu_device pointer
1134  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1135  * @state: powergating state (gate or ungate)
1136  *
1137  * Sets the requested powergating state for all instances of
1138  * the hardware IP specified.
1139  * Returns the error code from the last instance.
1140  */
1141 int amdgpu_device_ip_set_powergating_state(void *dev,
1142 					   enum amd_ip_block_type block_type,
1143 					   enum amd_powergating_state state)
1144 {
1145 	struct amdgpu_device *adev = dev;
1146 	int i, r = 0;
1147 
1148 	for (i = 0; i < adev->num_ip_blocks; i++) {
1149 		if (!adev->ip_blocks[i].status.valid)
1150 			continue;
1151 		if (adev->ip_blocks[i].version->type != block_type)
1152 			continue;
1153 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1154 			continue;
1155 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1156 			(void *)adev, state);
1157 		if (r)
1158 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1159 				  adev->ip_blocks[i].version->funcs->name, r);
1160 	}
1161 	return r;
1162 }
1163 
1164 /**
1165  * amdgpu_device_ip_get_clockgating_state - get the CG state
1166  *
1167  * @adev: amdgpu_device pointer
1168  * @flags: clockgating feature flags
1169  *
1170  * Walks the list of IPs on the device and updates the clockgating
1171  * flags for each IP.
1172  * Updates @flags with the feature flags for each hardware IP where
1173  * clockgating is enabled.
1174  */
1175 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1176 					    u32 *flags)
1177 {
1178 	int i;
1179 
1180 	for (i = 0; i < adev->num_ip_blocks; i++) {
1181 		if (!adev->ip_blocks[i].status.valid)
1182 			continue;
1183 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1184 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1185 	}
1186 }
1187 
1188 /**
1189  * amdgpu_device_ip_wait_for_idle - wait for idle
1190  *
1191  * @adev: amdgpu_device pointer
1192  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1193  *
1194  * Waits for the request hardware IP to be idle.
1195  * Returns 0 for success or a negative error code on failure.
1196  */
1197 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1198 				   enum amd_ip_block_type block_type)
1199 {
1200 	int i, r;
1201 
1202 	for (i = 0; i < adev->num_ip_blocks; i++) {
1203 		if (!adev->ip_blocks[i].status.valid)
1204 			continue;
1205 		if (adev->ip_blocks[i].version->type == block_type) {
1206 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1207 			if (r)
1208 				return r;
1209 			break;
1210 		}
1211 	}
1212 	return 0;
1213 
1214 }
1215 
1216 /**
1217  * amdgpu_device_ip_is_idle - is the hardware IP idle
1218  *
1219  * @adev: amdgpu_device pointer
1220  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1221  *
1222  * Check if the hardware IP is idle or not.
1223  * Returns true if it the IP is idle, false if not.
1224  */
1225 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1226 			      enum amd_ip_block_type block_type)
1227 {
1228 	int i;
1229 
1230 	for (i = 0; i < adev->num_ip_blocks; i++) {
1231 		if (!adev->ip_blocks[i].status.valid)
1232 			continue;
1233 		if (adev->ip_blocks[i].version->type == block_type)
1234 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1235 	}
1236 	return true;
1237 
1238 }
1239 
1240 /**
1241  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1242  *
1243  * @adev: amdgpu_device pointer
1244  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1245  *
1246  * Returns a pointer to the hardware IP block structure
1247  * if it exists for the asic, otherwise NULL.
1248  */
1249 struct amdgpu_ip_block *
1250 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1251 			      enum amd_ip_block_type type)
1252 {
1253 	int i;
1254 
1255 	for (i = 0; i < adev->num_ip_blocks; i++)
1256 		if (adev->ip_blocks[i].version->type == type)
1257 			return &adev->ip_blocks[i];
1258 
1259 	return NULL;
1260 }
1261 
1262 /**
1263  * amdgpu_device_ip_block_version_cmp
1264  *
1265  * @adev: amdgpu_device pointer
1266  * @type: enum amd_ip_block_type
1267  * @major: major version
1268  * @minor: minor version
1269  *
1270  * return 0 if equal or greater
1271  * return 1 if smaller or the ip_block doesn't exist
1272  */
1273 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1274 				       enum amd_ip_block_type type,
1275 				       u32 major, u32 minor)
1276 {
1277 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1278 
1279 	if (ip_block && ((ip_block->version->major > major) ||
1280 			((ip_block->version->major == major) &&
1281 			(ip_block->version->minor >= minor))))
1282 		return 0;
1283 
1284 	return 1;
1285 }
1286 
1287 /**
1288  * amdgpu_device_ip_block_add
1289  *
1290  * @adev: amdgpu_device pointer
1291  * @ip_block_version: pointer to the IP to add
1292  *
1293  * Adds the IP block driver information to the collection of IPs
1294  * on the asic.
1295  */
1296 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1297 			       const struct amdgpu_ip_block_version *ip_block_version)
1298 {
1299 	if (!ip_block_version)
1300 		return -EINVAL;
1301 
1302 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1303 		  ip_block_version->funcs->name);
1304 
1305 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1306 
1307 	return 0;
1308 }
1309 
1310 /**
1311  * amdgpu_device_enable_virtual_display - enable virtual display feature
1312  *
1313  * @adev: amdgpu_device pointer
1314  *
1315  * Enabled the virtual display feature if the user has enabled it via
1316  * the module parameter virtual_display.  This feature provides a virtual
1317  * display hardware on headless boards or in virtualized environments.
1318  * This function parses and validates the configuration string specified by
1319  * the user and configues the virtual display configuration (number of
1320  * virtual connectors, crtcs, etc.) specified.
1321  */
1322 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1323 {
1324 	adev->enable_virtual_display = false;
1325 
1326 	if (amdgpu_virtual_display) {
1327 		struct drm_device *ddev = adev->ddev;
1328 		const char *pci_address_name = pci_name(ddev->pdev);
1329 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1330 
1331 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1332 		pciaddstr_tmp = pciaddstr;
1333 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1334 			pciaddname = strsep(&pciaddname_tmp, ",");
1335 			if (!strcmp("all", pciaddname)
1336 			    || !strcmp(pci_address_name, pciaddname)) {
1337 				long num_crtc;
1338 				int res = -1;
1339 
1340 				adev->enable_virtual_display = true;
1341 
1342 				if (pciaddname_tmp)
1343 					res = kstrtol(pciaddname_tmp, 10,
1344 						      &num_crtc);
1345 
1346 				if (!res) {
1347 					if (num_crtc < 1)
1348 						num_crtc = 1;
1349 					if (num_crtc > 6)
1350 						num_crtc = 6;
1351 					adev->mode_info.num_crtc = num_crtc;
1352 				} else {
1353 					adev->mode_info.num_crtc = 1;
1354 				}
1355 				break;
1356 			}
1357 		}
1358 
1359 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1360 			 amdgpu_virtual_display, pci_address_name,
1361 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
1362 
1363 		kfree(pciaddstr);
1364 	}
1365 }
1366 
1367 /**
1368  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1369  *
1370  * @adev: amdgpu_device pointer
1371  *
1372  * Parses the asic configuration parameters specified in the gpu info
1373  * firmware and makes them availale to the driver for use in configuring
1374  * the asic.
1375  * Returns 0 on success, -EINVAL on failure.
1376  */
1377 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1378 {
1379 	const char *chip_name;
1380 	char fw_name[30];
1381 	int err;
1382 	const struct gpu_info_firmware_header_v1_0 *hdr;
1383 
1384 	adev->firmware.gpu_info_fw = NULL;
1385 
1386 	switch (adev->asic_type) {
1387 	case CHIP_TOPAZ:
1388 	case CHIP_TONGA:
1389 	case CHIP_FIJI:
1390 	case CHIP_POLARIS10:
1391 	case CHIP_POLARIS11:
1392 	case CHIP_POLARIS12:
1393 	case CHIP_VEGAM:
1394 	case CHIP_CARRIZO:
1395 	case CHIP_STONEY:
1396 #ifdef CONFIG_DRM_AMDGPU_SI
1397 	case CHIP_VERDE:
1398 	case CHIP_TAHITI:
1399 	case CHIP_PITCAIRN:
1400 	case CHIP_OLAND:
1401 	case CHIP_HAINAN:
1402 #endif
1403 #ifdef CONFIG_DRM_AMDGPU_CIK
1404 	case CHIP_BONAIRE:
1405 	case CHIP_HAWAII:
1406 	case CHIP_KAVERI:
1407 	case CHIP_KABINI:
1408 	case CHIP_MULLINS:
1409 #endif
1410 	case CHIP_VEGA20:
1411 	default:
1412 		return 0;
1413 	case CHIP_VEGA10:
1414 		chip_name = "vega10";
1415 		break;
1416 	case CHIP_VEGA12:
1417 		chip_name = "vega12";
1418 		break;
1419 	case CHIP_RAVEN:
1420 		if (adev->rev_id >= 8)
1421 			chip_name = "raven2";
1422 		else if (adev->pdev->device == 0x15d8)
1423 			chip_name = "picasso";
1424 		else
1425 			chip_name = "raven";
1426 		break;
1427 	case CHIP_ARCTURUS:
1428 		chip_name = "arcturus";
1429 		break;
1430 	case CHIP_NAVI10:
1431 		chip_name = "navi10";
1432 		break;
1433 	case CHIP_NAVI14:
1434 		chip_name = "navi14";
1435 		break;
1436 	case CHIP_NAVI12:
1437 		chip_name = "navi12";
1438 		break;
1439 	}
1440 
1441 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1442 	err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
1443 	if (err) {
1444 		dev_err(adev->dev,
1445 			"Failed to load gpu_info firmware \"%s\"\n",
1446 			fw_name);
1447 		goto out;
1448 	}
1449 	err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
1450 	if (err) {
1451 		dev_err(adev->dev,
1452 			"Failed to validate gpu_info firmware \"%s\"\n",
1453 			fw_name);
1454 		goto out;
1455 	}
1456 
1457 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1458 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1459 
1460 	switch (hdr->version_major) {
1461 	case 1:
1462 	{
1463 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1464 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1465 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1466 
1467 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1468 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1469 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1470 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1471 		adev->gfx.config.max_texture_channel_caches =
1472 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
1473 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1474 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1475 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1476 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1477 		adev->gfx.config.double_offchip_lds_buf =
1478 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1479 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1480 		adev->gfx.cu_info.max_waves_per_simd =
1481 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1482 		adev->gfx.cu_info.max_scratch_slots_per_cu =
1483 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1484 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1485 		if (hdr->version_minor >= 1) {
1486 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1487 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1488 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1489 			adev->gfx.config.num_sc_per_sh =
1490 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1491 			adev->gfx.config.num_packer_per_sc =
1492 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1493 		}
1494 #ifdef CONFIG_DRM_AMD_DC_DCN2_0
1495 		if (hdr->version_minor == 2) {
1496 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1497 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1498 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1499 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1500 		}
1501 #endif
1502 		break;
1503 	}
1504 	default:
1505 		dev_err(adev->dev,
1506 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1507 		err = -EINVAL;
1508 		goto out;
1509 	}
1510 out:
1511 	return err;
1512 }
1513 
1514 /**
1515  * amdgpu_device_ip_early_init - run early init for hardware IPs
1516  *
1517  * @adev: amdgpu_device pointer
1518  *
1519  * Early initialization pass for hardware IPs.  The hardware IPs that make
1520  * up each asic are discovered each IP's early_init callback is run.  This
1521  * is the first stage in initializing the asic.
1522  * Returns 0 on success, negative error code on failure.
1523  */
1524 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1525 {
1526 	int i, r;
1527 
1528 	amdgpu_device_enable_virtual_display(adev);
1529 
1530 	switch (adev->asic_type) {
1531 	case CHIP_TOPAZ:
1532 	case CHIP_TONGA:
1533 	case CHIP_FIJI:
1534 	case CHIP_POLARIS10:
1535 	case CHIP_POLARIS11:
1536 	case CHIP_POLARIS12:
1537 	case CHIP_VEGAM:
1538 	case CHIP_CARRIZO:
1539 	case CHIP_STONEY:
1540 		if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
1541 			adev->family = AMDGPU_FAMILY_CZ;
1542 		else
1543 			adev->family = AMDGPU_FAMILY_VI;
1544 
1545 		r = vi_set_ip_blocks(adev);
1546 		if (r)
1547 			return r;
1548 		break;
1549 #ifdef CONFIG_DRM_AMDGPU_SI
1550 	case CHIP_VERDE:
1551 	case CHIP_TAHITI:
1552 	case CHIP_PITCAIRN:
1553 	case CHIP_OLAND:
1554 	case CHIP_HAINAN:
1555 		adev->family = AMDGPU_FAMILY_SI;
1556 		r = si_set_ip_blocks(adev);
1557 		if (r)
1558 			return r;
1559 		break;
1560 #endif
1561 #ifdef CONFIG_DRM_AMDGPU_CIK
1562 	case CHIP_BONAIRE:
1563 	case CHIP_HAWAII:
1564 	case CHIP_KAVERI:
1565 	case CHIP_KABINI:
1566 	case CHIP_MULLINS:
1567 		if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1568 			adev->family = AMDGPU_FAMILY_CI;
1569 		else
1570 			adev->family = AMDGPU_FAMILY_KV;
1571 
1572 		r = cik_set_ip_blocks(adev);
1573 		if (r)
1574 			return r;
1575 		break;
1576 #endif
1577 	case CHIP_VEGA10:
1578 	case CHIP_VEGA12:
1579 	case CHIP_VEGA20:
1580 	case CHIP_RAVEN:
1581 	case CHIP_ARCTURUS:
1582 		if (adev->asic_type == CHIP_RAVEN)
1583 			adev->family = AMDGPU_FAMILY_RV;
1584 		else
1585 			adev->family = AMDGPU_FAMILY_AI;
1586 
1587 		r = soc15_set_ip_blocks(adev);
1588 		if (r)
1589 			return r;
1590 		break;
1591 	case  CHIP_NAVI10:
1592 	case  CHIP_NAVI14:
1593 	case  CHIP_NAVI12:
1594 		adev->family = AMDGPU_FAMILY_NV;
1595 
1596 		r = nv_set_ip_blocks(adev);
1597 		if (r)
1598 			return r;
1599 		break;
1600 	default:
1601 		/* FIXME: not supported yet */
1602 		return -EINVAL;
1603 	}
1604 
1605 	r = amdgpu_device_parse_gpu_info_fw(adev);
1606 	if (r)
1607 		return r;
1608 
1609 	amdgpu_amdkfd_device_probe(adev);
1610 
1611 	if (amdgpu_sriov_vf(adev)) {
1612 		r = amdgpu_virt_request_full_gpu(adev, true);
1613 		if (r)
1614 			return -EAGAIN;
1615 	}
1616 
1617 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
1618 	if (amdgpu_sriov_vf(adev))
1619 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1620 
1621 	for (i = 0; i < adev->num_ip_blocks; i++) {
1622 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
1623 			DRM_ERROR("disabled ip block: %d <%s>\n",
1624 				  i, adev->ip_blocks[i].version->funcs->name);
1625 			adev->ip_blocks[i].status.valid = false;
1626 		} else {
1627 			if (adev->ip_blocks[i].version->funcs->early_init) {
1628 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
1629 				if (r == -ENOENT) {
1630 					adev->ip_blocks[i].status.valid = false;
1631 				} else if (r) {
1632 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
1633 						  adev->ip_blocks[i].version->funcs->name, r);
1634 					return r;
1635 				} else {
1636 					adev->ip_blocks[i].status.valid = true;
1637 				}
1638 			} else {
1639 				adev->ip_blocks[i].status.valid = true;
1640 			}
1641 		}
1642 		/* get the vbios after the asic_funcs are set up */
1643 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1644 			/* Read BIOS */
1645 			if (!amdgpu_get_bios(adev))
1646 				return -EINVAL;
1647 
1648 			r = amdgpu_atombios_init(adev);
1649 			if (r) {
1650 				dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1651 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1652 				return r;
1653 			}
1654 		}
1655 	}
1656 
1657 	adev->cg_flags &= amdgpu_cg_mask;
1658 	adev->pg_flags &= amdgpu_pg_mask;
1659 
1660 	return 0;
1661 }
1662 
1663 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1664 {
1665 	int i, r;
1666 
1667 	for (i = 0; i < adev->num_ip_blocks; i++) {
1668 		if (!adev->ip_blocks[i].status.sw)
1669 			continue;
1670 		if (adev->ip_blocks[i].status.hw)
1671 			continue;
1672 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1673 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
1674 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1675 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1676 			if (r) {
1677 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1678 					  adev->ip_blocks[i].version->funcs->name, r);
1679 				return r;
1680 			}
1681 			adev->ip_blocks[i].status.hw = true;
1682 		}
1683 	}
1684 
1685 	return 0;
1686 }
1687 
1688 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1689 {
1690 	int i, r;
1691 
1692 	for (i = 0; i < adev->num_ip_blocks; i++) {
1693 		if (!adev->ip_blocks[i].status.sw)
1694 			continue;
1695 		if (adev->ip_blocks[i].status.hw)
1696 			continue;
1697 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1698 		if (r) {
1699 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1700 				  adev->ip_blocks[i].version->funcs->name, r);
1701 			return r;
1702 		}
1703 		adev->ip_blocks[i].status.hw = true;
1704 	}
1705 
1706 	return 0;
1707 }
1708 
1709 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1710 {
1711 	int r = 0;
1712 	int i;
1713 	uint32_t smu_version;
1714 
1715 	if (adev->asic_type >= CHIP_VEGA10) {
1716 		for (i = 0; i < adev->num_ip_blocks; i++) {
1717 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1718 				continue;
1719 
1720 			/* no need to do the fw loading again if already done*/
1721 			if (adev->ip_blocks[i].status.hw == true)
1722 				break;
1723 
1724 			if (adev->in_gpu_reset || adev->in_suspend) {
1725 				r = adev->ip_blocks[i].version->funcs->resume(adev);
1726 				if (r) {
1727 					DRM_ERROR("resume of IP block <%s> failed %d\n",
1728 							  adev->ip_blocks[i].version->funcs->name, r);
1729 					return r;
1730 				}
1731 			} else {
1732 				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1733 				if (r) {
1734 					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1735 							  adev->ip_blocks[i].version->funcs->name, r);
1736 					return r;
1737 				}
1738 			}
1739 
1740 			adev->ip_blocks[i].status.hw = true;
1741 			break;
1742 		}
1743 	}
1744 
1745 	r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
1746 
1747 	return r;
1748 }
1749 
1750 /**
1751  * amdgpu_device_ip_init - run init for hardware IPs
1752  *
1753  * @adev: amdgpu_device pointer
1754  *
1755  * Main initialization pass for hardware IPs.  The list of all the hardware
1756  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1757  * are run.  sw_init initializes the software state associated with each IP
1758  * and hw_init initializes the hardware associated with each IP.
1759  * Returns 0 on success, negative error code on failure.
1760  */
1761 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1762 {
1763 	int i, r;
1764 
1765 	r = amdgpu_ras_init(adev);
1766 	if (r)
1767 		return r;
1768 
1769 	for (i = 0; i < adev->num_ip_blocks; i++) {
1770 		if (!adev->ip_blocks[i].status.valid)
1771 			continue;
1772 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
1773 		if (r) {
1774 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1775 				  adev->ip_blocks[i].version->funcs->name, r);
1776 			goto init_failed;
1777 		}
1778 		adev->ip_blocks[i].status.sw = true;
1779 
1780 		/* need to do gmc hw init early so we can allocate gpu mem */
1781 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
1782 			r = amdgpu_device_vram_scratch_init(adev);
1783 			if (r) {
1784 				DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
1785 				goto init_failed;
1786 			}
1787 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
1788 			if (r) {
1789 				DRM_ERROR("hw_init %d failed %d\n", i, r);
1790 				goto init_failed;
1791 			}
1792 			r = amdgpu_device_wb_init(adev);
1793 			if (r) {
1794 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
1795 				goto init_failed;
1796 			}
1797 			adev->ip_blocks[i].status.hw = true;
1798 
1799 			/* right after GMC hw init, we create CSA */
1800 			if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1801 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1802 								AMDGPU_GEM_DOMAIN_VRAM,
1803 								AMDGPU_CSA_SIZE);
1804 				if (r) {
1805 					DRM_ERROR("allocate CSA failed %d\n", r);
1806 					goto init_failed;
1807 				}
1808 			}
1809 		}
1810 	}
1811 
1812 	r = amdgpu_ib_pool_init(adev);
1813 	if (r) {
1814 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1815 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1816 		goto init_failed;
1817 	}
1818 
1819 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1820 	if (r)
1821 		goto init_failed;
1822 
1823 	r = amdgpu_device_ip_hw_init_phase1(adev);
1824 	if (r)
1825 		goto init_failed;
1826 
1827 	r = amdgpu_device_fw_loading(adev);
1828 	if (r)
1829 		goto init_failed;
1830 
1831 	r = amdgpu_device_ip_hw_init_phase2(adev);
1832 	if (r)
1833 		goto init_failed;
1834 
1835 	if (adev->gmc.xgmi.num_physical_nodes > 1)
1836 		amdgpu_xgmi_add_device(adev);
1837 	amdgpu_amdkfd_device_init(adev);
1838 
1839 init_failed:
1840 	if (amdgpu_sriov_vf(adev)) {
1841 		if (!r)
1842 			amdgpu_virt_init_data_exchange(adev);
1843 		amdgpu_virt_release_full_gpu(adev, true);
1844 	}
1845 
1846 	return r;
1847 }
1848 
1849 /**
1850  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1851  *
1852  * @adev: amdgpu_device pointer
1853  *
1854  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
1855  * this function before a GPU reset.  If the value is retained after a
1856  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
1857  */
1858 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
1859 {
1860 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1861 }
1862 
1863 /**
1864  * amdgpu_device_check_vram_lost - check if vram is valid
1865  *
1866  * @adev: amdgpu_device pointer
1867  *
1868  * Checks the reset magic value written to the gart pointer in VRAM.
1869  * The driver calls this after a GPU reset to see if the contents of
1870  * VRAM is lost or now.
1871  * returns true if vram is lost, false if not.
1872  */
1873 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
1874 {
1875 	return !!memcmp(adev->gart.ptr, adev->reset_magic,
1876 			AMDGPU_RESET_MAGIC_NUM);
1877 }
1878 
1879 /**
1880  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
1881  *
1882  * @adev: amdgpu_device pointer
1883  *
1884  * The list of all the hardware IPs that make up the asic is walked and the
1885  * set_clockgating_state callbacks are run.
1886  * Late initialization pass enabling clockgating for hardware IPs.
1887  * Fini or suspend, pass disabling clockgating for hardware IPs.
1888  * Returns 0 on success, negative error code on failure.
1889  */
1890 
1891 static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1892 						enum amd_clockgating_state state)
1893 {
1894 	int i, j, r;
1895 
1896 	if (amdgpu_emu_mode == 1)
1897 		return 0;
1898 
1899 	for (j = 0; j < adev->num_ip_blocks; j++) {
1900 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
1901 		if (!adev->ip_blocks[i].status.late_initialized)
1902 			continue;
1903 		/* skip CG for VCE/UVD, it's handled specially */
1904 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1905 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1906 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1907 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
1908 			/* enable clockgating to save power */
1909 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1910 										     state);
1911 			if (r) {
1912 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
1913 					  adev->ip_blocks[i].version->funcs->name, r);
1914 				return r;
1915 			}
1916 		}
1917 	}
1918 
1919 	return 0;
1920 }
1921 
1922 static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
1923 {
1924 	int i, j, r;
1925 
1926 	if (amdgpu_emu_mode == 1)
1927 		return 0;
1928 
1929 	for (j = 0; j < adev->num_ip_blocks; j++) {
1930 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
1931 		if (!adev->ip_blocks[i].status.late_initialized)
1932 			continue;
1933 		/* skip CG for VCE/UVD, it's handled specially */
1934 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1935 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1936 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1937 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
1938 			/* enable powergating to save power */
1939 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1940 											state);
1941 			if (r) {
1942 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
1943 					  adev->ip_blocks[i].version->funcs->name, r);
1944 				return r;
1945 			}
1946 		}
1947 	}
1948 	return 0;
1949 }
1950 
1951 static int amdgpu_device_enable_mgpu_fan_boost(void)
1952 {
1953 	struct amdgpu_gpu_instance *gpu_ins;
1954 	struct amdgpu_device *adev;
1955 	int i, ret = 0;
1956 
1957 	mutex_lock(&mgpu_info.mutex);
1958 
1959 	/*
1960 	 * MGPU fan boost feature should be enabled
1961 	 * only when there are two or more dGPUs in
1962 	 * the system
1963 	 */
1964 	if (mgpu_info.num_dgpu < 2)
1965 		goto out;
1966 
1967 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
1968 		gpu_ins = &(mgpu_info.gpu_ins[i]);
1969 		adev = gpu_ins->adev;
1970 		if (!(adev->flags & AMD_IS_APU) &&
1971 		    !gpu_ins->mgpu_fan_enabled &&
1972 		    adev->powerplay.pp_funcs &&
1973 		    adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
1974 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
1975 			if (ret)
1976 				break;
1977 
1978 			gpu_ins->mgpu_fan_enabled = 1;
1979 		}
1980 	}
1981 
1982 out:
1983 	mutex_unlock(&mgpu_info.mutex);
1984 
1985 	return ret;
1986 }
1987 
1988 /**
1989  * amdgpu_device_ip_late_init - run late init for hardware IPs
1990  *
1991  * @adev: amdgpu_device pointer
1992  *
1993  * Late initialization pass for hardware IPs.  The list of all the hardware
1994  * IPs that make up the asic is walked and the late_init callbacks are run.
1995  * late_init covers any special initialization that an IP requires
1996  * after all of the have been initialized or something that needs to happen
1997  * late in the init process.
1998  * Returns 0 on success, negative error code on failure.
1999  */
2000 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2001 {
2002 	int i = 0, r;
2003 
2004 	for (i = 0; i < adev->num_ip_blocks; i++) {
2005 		if (!adev->ip_blocks[i].status.hw)
2006 			continue;
2007 		if (adev->ip_blocks[i].version->funcs->late_init) {
2008 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2009 			if (r) {
2010 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
2011 					  adev->ip_blocks[i].version->funcs->name, r);
2012 				return r;
2013 			}
2014 		}
2015 		adev->ip_blocks[i].status.late_initialized = true;
2016 	}
2017 
2018 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2019 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2020 
2021 	amdgpu_device_fill_reset_magic(adev);
2022 
2023 	r = amdgpu_device_enable_mgpu_fan_boost();
2024 	if (r)
2025 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2026 
2027 	/* set to low pstate by default */
2028 	amdgpu_xgmi_set_pstate(adev, 0);
2029 
2030 	return 0;
2031 }
2032 
2033 /**
2034  * amdgpu_device_ip_fini - run fini for hardware IPs
2035  *
2036  * @adev: amdgpu_device pointer
2037  *
2038  * Main teardown pass for hardware IPs.  The list of all the hardware
2039  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2040  * are run.  hw_fini tears down the hardware associated with each IP
2041  * and sw_fini tears down any software state associated with each IP.
2042  * Returns 0 on success, negative error code on failure.
2043  */
2044 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2045 {
2046 	int i, r;
2047 
2048 	amdgpu_ras_pre_fini(adev);
2049 
2050 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2051 		amdgpu_xgmi_remove_device(adev);
2052 
2053 	amdgpu_amdkfd_device_fini(adev);
2054 
2055 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2056 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2057 
2058 	/* need to disable SMC first */
2059 	for (i = 0; i < adev->num_ip_blocks; i++) {
2060 		if (!adev->ip_blocks[i].status.hw)
2061 			continue;
2062 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2063 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2064 			/* XXX handle errors */
2065 			if (r) {
2066 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2067 					  adev->ip_blocks[i].version->funcs->name, r);
2068 			}
2069 			adev->ip_blocks[i].status.hw = false;
2070 			break;
2071 		}
2072 	}
2073 
2074 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2075 		if (!adev->ip_blocks[i].status.hw)
2076 			continue;
2077 
2078 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2079 		/* XXX handle errors */
2080 		if (r) {
2081 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2082 				  adev->ip_blocks[i].version->funcs->name, r);
2083 		}
2084 
2085 		adev->ip_blocks[i].status.hw = false;
2086 	}
2087 
2088 
2089 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2090 		if (!adev->ip_blocks[i].status.sw)
2091 			continue;
2092 
2093 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2094 			amdgpu_ucode_free_bo(adev);
2095 			amdgpu_free_static_csa(&adev->virt.csa_obj);
2096 			amdgpu_device_wb_fini(adev);
2097 			amdgpu_device_vram_scratch_fini(adev);
2098 			amdgpu_ib_pool_fini(adev);
2099 		}
2100 
2101 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2102 		/* XXX handle errors */
2103 		if (r) {
2104 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2105 				  adev->ip_blocks[i].version->funcs->name, r);
2106 		}
2107 		adev->ip_blocks[i].status.sw = false;
2108 		adev->ip_blocks[i].status.valid = false;
2109 	}
2110 
2111 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2112 		if (!adev->ip_blocks[i].status.late_initialized)
2113 			continue;
2114 		if (adev->ip_blocks[i].version->funcs->late_fini)
2115 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2116 		adev->ip_blocks[i].status.late_initialized = false;
2117 	}
2118 
2119 	amdgpu_ras_fini(adev);
2120 
2121 	if (amdgpu_sriov_vf(adev))
2122 		if (amdgpu_virt_release_full_gpu(adev, false))
2123 			DRM_ERROR("failed to release exclusive mode on fini\n");
2124 
2125 	return 0;
2126 }
2127 
2128 /**
2129  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2130  *
2131  * @work: work_struct.
2132  */
2133 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2134 {
2135 	struct amdgpu_device *adev =
2136 		container_of(work, struct amdgpu_device, delayed_init_work.work);
2137 	int r;
2138 
2139 	r = amdgpu_ib_ring_tests(adev);
2140 	if (r)
2141 		DRM_ERROR("ib ring test failed (%d).\n", r);
2142 }
2143 
2144 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2145 {
2146 	struct amdgpu_device *adev =
2147 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2148 
2149 	mutex_lock(&adev->gfx.gfx_off_mutex);
2150 	if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2151 		if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2152 			adev->gfx.gfx_off_state = true;
2153 	}
2154 	mutex_unlock(&adev->gfx.gfx_off_mutex);
2155 }
2156 
2157 /**
2158  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2159  *
2160  * @adev: amdgpu_device pointer
2161  *
2162  * Main suspend function for hardware IPs.  The list of all the hardware
2163  * IPs that make up the asic is walked, clockgating is disabled and the
2164  * suspend callbacks are run.  suspend puts the hardware and software state
2165  * in each IP into a state suitable for suspend.
2166  * Returns 0 on success, negative error code on failure.
2167  */
2168 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2169 {
2170 	int i, r;
2171 
2172 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2173 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2174 
2175 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2176 		if (!adev->ip_blocks[i].status.valid)
2177 			continue;
2178 		/* displays are handled separately */
2179 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
2180 			/* XXX handle errors */
2181 			r = adev->ip_blocks[i].version->funcs->suspend(adev);
2182 			/* XXX handle errors */
2183 			if (r) {
2184 				DRM_ERROR("suspend of IP block <%s> failed %d\n",
2185 					  adev->ip_blocks[i].version->funcs->name, r);
2186 				return r;
2187 			}
2188 			adev->ip_blocks[i].status.hw = false;
2189 		}
2190 	}
2191 
2192 	return 0;
2193 }
2194 
2195 /**
2196  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2197  *
2198  * @adev: amdgpu_device pointer
2199  *
2200  * Main suspend function for hardware IPs.  The list of all the hardware
2201  * IPs that make up the asic is walked, clockgating is disabled and the
2202  * suspend callbacks are run.  suspend puts the hardware and software state
2203  * in each IP into a state suitable for suspend.
2204  * Returns 0 on success, negative error code on failure.
2205  */
2206 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
2207 {
2208 	int i, r;
2209 
2210 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2211 		if (!adev->ip_blocks[i].status.valid)
2212 			continue;
2213 		/* displays are handled in phase1 */
2214 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2215 			continue;
2216 		/* XXX handle errors */
2217 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
2218 		/* XXX handle errors */
2219 		if (r) {
2220 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
2221 				  adev->ip_blocks[i].version->funcs->name, r);
2222 		}
2223 		adev->ip_blocks[i].status.hw = false;
2224 		/* handle putting the SMC in the appropriate state */
2225 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2226 			if (is_support_sw_smu(adev)) {
2227 				/* todo */
2228 			} else if (adev->powerplay.pp_funcs &&
2229 					   adev->powerplay.pp_funcs->set_mp1_state) {
2230 				r = adev->powerplay.pp_funcs->set_mp1_state(
2231 					adev->powerplay.pp_handle,
2232 					adev->mp1_state);
2233 				if (r) {
2234 					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2235 						  adev->mp1_state, r);
2236 					return r;
2237 				}
2238 			}
2239 		}
2240 
2241 		adev->ip_blocks[i].status.hw = false;
2242 	}
2243 
2244 	return 0;
2245 }
2246 
2247 /**
2248  * amdgpu_device_ip_suspend - run suspend for hardware IPs
2249  *
2250  * @adev: amdgpu_device pointer
2251  *
2252  * Main suspend function for hardware IPs.  The list of all the hardware
2253  * IPs that make up the asic is walked, clockgating is disabled and the
2254  * suspend callbacks are run.  suspend puts the hardware and software state
2255  * in each IP into a state suitable for suspend.
2256  * Returns 0 on success, negative error code on failure.
2257  */
2258 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2259 {
2260 	int r;
2261 
2262 	if (amdgpu_sriov_vf(adev))
2263 		amdgpu_virt_request_full_gpu(adev, false);
2264 
2265 	r = amdgpu_device_ip_suspend_phase1(adev);
2266 	if (r)
2267 		return r;
2268 	r = amdgpu_device_ip_suspend_phase2(adev);
2269 
2270 	if (amdgpu_sriov_vf(adev))
2271 		amdgpu_virt_release_full_gpu(adev, false);
2272 
2273 	return r;
2274 }
2275 
2276 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
2277 {
2278 	int i, r;
2279 
2280 	static enum amd_ip_block_type ip_order[] = {
2281 		AMD_IP_BLOCK_TYPE_GMC,
2282 		AMD_IP_BLOCK_TYPE_COMMON,
2283 		AMD_IP_BLOCK_TYPE_PSP,
2284 		AMD_IP_BLOCK_TYPE_IH,
2285 	};
2286 
2287 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2288 		int j;
2289 		struct amdgpu_ip_block *block;
2290 
2291 		for (j = 0; j < adev->num_ip_blocks; j++) {
2292 			block = &adev->ip_blocks[j];
2293 
2294 			block->status.hw = false;
2295 			if (block->version->type != ip_order[i] ||
2296 				!block->status.valid)
2297 				continue;
2298 
2299 			r = block->version->funcs->hw_init(adev);
2300 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2301 			if (r)
2302 				return r;
2303 			block->status.hw = true;
2304 		}
2305 	}
2306 
2307 	return 0;
2308 }
2309 
2310 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
2311 {
2312 	int i, r;
2313 
2314 	static enum amd_ip_block_type ip_order[] = {
2315 		AMD_IP_BLOCK_TYPE_SMC,
2316 		AMD_IP_BLOCK_TYPE_DCE,
2317 		AMD_IP_BLOCK_TYPE_GFX,
2318 		AMD_IP_BLOCK_TYPE_SDMA,
2319 		AMD_IP_BLOCK_TYPE_UVD,
2320 		AMD_IP_BLOCK_TYPE_VCE
2321 	};
2322 
2323 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2324 		int j;
2325 		struct amdgpu_ip_block *block;
2326 
2327 		for (j = 0; j < adev->num_ip_blocks; j++) {
2328 			block = &adev->ip_blocks[j];
2329 
2330 			if (block->version->type != ip_order[i] ||
2331 				!block->status.valid ||
2332 				block->status.hw)
2333 				continue;
2334 
2335 			r = block->version->funcs->hw_init(adev);
2336 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2337 			if (r)
2338 				return r;
2339 			block->status.hw = true;
2340 		}
2341 	}
2342 
2343 	return 0;
2344 }
2345 
2346 /**
2347  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2348  *
2349  * @adev: amdgpu_device pointer
2350  *
2351  * First resume function for hardware IPs.  The list of all the hardware
2352  * IPs that make up the asic is walked and the resume callbacks are run for
2353  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
2354  * after a suspend and updates the software state as necessary.  This
2355  * function is also used for restoring the GPU after a GPU reset.
2356  * Returns 0 on success, negative error code on failure.
2357  */
2358 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
2359 {
2360 	int i, r;
2361 
2362 	for (i = 0; i < adev->num_ip_blocks; i++) {
2363 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2364 			continue;
2365 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2366 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2367 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2368 
2369 			r = adev->ip_blocks[i].version->funcs->resume(adev);
2370 			if (r) {
2371 				DRM_ERROR("resume of IP block <%s> failed %d\n",
2372 					  adev->ip_blocks[i].version->funcs->name, r);
2373 				return r;
2374 			}
2375 			adev->ip_blocks[i].status.hw = true;
2376 		}
2377 	}
2378 
2379 	return 0;
2380 }
2381 
2382 /**
2383  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2384  *
2385  * @adev: amdgpu_device pointer
2386  *
2387  * First resume function for hardware IPs.  The list of all the hardware
2388  * IPs that make up the asic is walked and the resume callbacks are run for
2389  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
2390  * functional state after a suspend and updates the software state as
2391  * necessary.  This function is also used for restoring the GPU after a GPU
2392  * reset.
2393  * Returns 0 on success, negative error code on failure.
2394  */
2395 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
2396 {
2397 	int i, r;
2398 
2399 	for (i = 0; i < adev->num_ip_blocks; i++) {
2400 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2401 			continue;
2402 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2403 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2404 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2405 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
2406 			continue;
2407 		r = adev->ip_blocks[i].version->funcs->resume(adev);
2408 		if (r) {
2409 			DRM_ERROR("resume of IP block <%s> failed %d\n",
2410 				  adev->ip_blocks[i].version->funcs->name, r);
2411 			return r;
2412 		}
2413 		adev->ip_blocks[i].status.hw = true;
2414 	}
2415 
2416 	return 0;
2417 }
2418 
2419 /**
2420  * amdgpu_device_ip_resume - run resume for hardware IPs
2421  *
2422  * @adev: amdgpu_device pointer
2423  *
2424  * Main resume function for hardware IPs.  The hardware IPs
2425  * are split into two resume functions because they are
2426  * are also used in in recovering from a GPU reset and some additional
2427  * steps need to be take between them.  In this case (S3/S4) they are
2428  * run sequentially.
2429  * Returns 0 on success, negative error code on failure.
2430  */
2431 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
2432 {
2433 	int r;
2434 
2435 	r = amdgpu_device_ip_resume_phase1(adev);
2436 	if (r)
2437 		return r;
2438 
2439 	r = amdgpu_device_fw_loading(adev);
2440 	if (r)
2441 		return r;
2442 
2443 	r = amdgpu_device_ip_resume_phase2(adev);
2444 
2445 	return r;
2446 }
2447 
2448 /**
2449  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2450  *
2451  * @adev: amdgpu_device pointer
2452  *
2453  * Query the VBIOS data tables to determine if the board supports SR-IOV.
2454  */
2455 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
2456 {
2457 	if (amdgpu_sriov_vf(adev)) {
2458 		if (adev->is_atom_fw) {
2459 			if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2460 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2461 		} else {
2462 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2463 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2464 		}
2465 
2466 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2467 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
2468 	}
2469 }
2470 
2471 /**
2472  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2473  *
2474  * @asic_type: AMD asic type
2475  *
2476  * Check if there is DC (new modesetting infrastructre) support for an asic.
2477  * returns true if DC has support, false if not.
2478  */
2479 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2480 {
2481 	switch (asic_type) {
2482 #if defined(CONFIG_DRM_AMD_DC)
2483 	case CHIP_BONAIRE:
2484 	case CHIP_KAVERI:
2485 	case CHIP_KABINI:
2486 	case CHIP_MULLINS:
2487 		/*
2488 		 * We have systems in the wild with these ASICs that require
2489 		 * LVDS and VGA support which is not supported with DC.
2490 		 *
2491 		 * Fallback to the non-DC driver here by default so as not to
2492 		 * cause regressions.
2493 		 */
2494 		return amdgpu_dc > 0;
2495 	case CHIP_HAWAII:
2496 	case CHIP_CARRIZO:
2497 	case CHIP_STONEY:
2498 	case CHIP_POLARIS10:
2499 	case CHIP_POLARIS11:
2500 	case CHIP_POLARIS12:
2501 	case CHIP_VEGAM:
2502 	case CHIP_TONGA:
2503 	case CHIP_FIJI:
2504 	case CHIP_VEGA10:
2505 	case CHIP_VEGA12:
2506 	case CHIP_VEGA20:
2507 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
2508 	case CHIP_RAVEN:
2509 #endif
2510 #if defined(CONFIG_DRM_AMD_DC_DCN2_0)
2511 	case CHIP_NAVI10:
2512 	case CHIP_NAVI14:
2513 	case CHIP_NAVI12:
2514 #endif
2515 		return amdgpu_dc != 0;
2516 #endif
2517 	default:
2518 		return false;
2519 	}
2520 }
2521 
2522 /**
2523  * amdgpu_device_has_dc_support - check if dc is supported
2524  *
2525  * @adev: amdgpu_device_pointer
2526  *
2527  * Returns true for supported, false for not supported
2528  */
2529 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2530 {
2531 	if (amdgpu_sriov_vf(adev))
2532 		return false;
2533 
2534 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
2535 }
2536 
2537 
2538 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2539 {
2540 	struct amdgpu_device *adev =
2541 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
2542 
2543 	adev->asic_reset_res =  amdgpu_asic_reset(adev);
2544 	if (adev->asic_reset_res)
2545 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
2546 			 adev->asic_reset_res, adev->ddev->unique);
2547 }
2548 
2549 
2550 /**
2551  * amdgpu_device_init - initialize the driver
2552  *
2553  * @adev: amdgpu_device pointer
2554  * @ddev: drm dev pointer
2555  * @pdev: pci dev pointer
2556  * @flags: driver flags
2557  *
2558  * Initializes the driver info and hw (all asics).
2559  * Returns 0 for success or an error on failure.
2560  * Called at driver startup.
2561  */
2562 int amdgpu_device_init(struct amdgpu_device *adev,
2563 		       struct drm_device *ddev,
2564 		       struct pci_dev *pdev,
2565 		       uint32_t flags)
2566 {
2567 	int r, i;
2568 	bool runtime = false;
2569 	u32 max_MBps;
2570 
2571 	adev->shutdown = false;
2572 	adev->dev = &pdev->dev;
2573 	adev->ddev = ddev;
2574 	adev->pdev = pdev;
2575 	adev->flags = flags;
2576 	adev->asic_type = flags & AMD_ASIC_MASK;
2577 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
2578 	if (amdgpu_emu_mode == 1)
2579 		adev->usec_timeout *= 2;
2580 	adev->gmc.gart_size = 512 * 1024 * 1024;
2581 	adev->accel_working = false;
2582 	adev->num_rings = 0;
2583 	adev->mman.buffer_funcs = NULL;
2584 	adev->mman.buffer_funcs_ring = NULL;
2585 	adev->vm_manager.vm_pte_funcs = NULL;
2586 	adev->vm_manager.vm_pte_num_rqs = 0;
2587 	adev->gmc.gmc_funcs = NULL;
2588 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
2589 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2590 
2591 	adev->smc_rreg = &amdgpu_invalid_rreg;
2592 	adev->smc_wreg = &amdgpu_invalid_wreg;
2593 	adev->pcie_rreg = &amdgpu_invalid_rreg;
2594 	adev->pcie_wreg = &amdgpu_invalid_wreg;
2595 	adev->pciep_rreg = &amdgpu_invalid_rreg;
2596 	adev->pciep_wreg = &amdgpu_invalid_wreg;
2597 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2598 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
2599 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2600 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2601 	adev->didt_rreg = &amdgpu_invalid_rreg;
2602 	adev->didt_wreg = &amdgpu_invalid_wreg;
2603 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2604 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
2605 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2606 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2607 
2608 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2609 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2610 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
2611 
2612 	/* mutex initialization are all done here so we
2613 	 * can recall function without having locking issues */
2614 	atomic_set(&adev->irq.ih.lock, 0);
2615 	mutex_init(&adev->firmware.mutex);
2616 	mutex_init(&adev->pm.mutex);
2617 	mutex_init(&adev->gfx.gpu_clock_mutex);
2618 	mutex_init(&adev->srbm_mutex);
2619 	mutex_init(&adev->gfx.pipe_reserve_mutex);
2620 	mutex_init(&adev->gfx.gfx_off_mutex);
2621 	mutex_init(&adev->grbm_idx_mutex);
2622 	mutex_init(&adev->mn_lock);
2623 	mutex_init(&adev->virt.vf_errors.lock);
2624 	hash_init(adev->mn_hash);
2625 	mutex_init(&adev->lock_reset);
2626 	mutex_init(&adev->virt.dpm_mutex);
2627 	mutex_init(&adev->psp.mutex);
2628 
2629 	r = amdgpu_device_check_arguments(adev);
2630 	if (r)
2631 		return r;
2632 
2633 	spin_lock_init(&adev->mmio_idx_lock);
2634 	spin_lock_init(&adev->smc_idx_lock);
2635 	spin_lock_init(&adev->pcie_idx_lock);
2636 	spin_lock_init(&adev->uvd_ctx_idx_lock);
2637 	spin_lock_init(&adev->didt_idx_lock);
2638 	spin_lock_init(&adev->gc_cac_idx_lock);
2639 	spin_lock_init(&adev->se_cac_idx_lock);
2640 	spin_lock_init(&adev->audio_endpt_idx_lock);
2641 	spin_lock_init(&adev->mm_stats.lock);
2642 
2643 	INIT_LIST_HEAD(&adev->shadow_list);
2644 	mutex_init(&adev->shadow_list_lock);
2645 
2646 	INIT_LIST_HEAD(&adev->ring_lru_list);
2647 	spin_lock_init(&adev->ring_lru_list_lock);
2648 
2649 	INIT_DELAYED_WORK(&adev->delayed_init_work,
2650 			  amdgpu_device_delayed_init_work_handler);
2651 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2652 			  amdgpu_device_delay_enable_gfx_off);
2653 
2654 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2655 
2656 	adev->gfx.gfx_off_req_count = 1;
2657 	adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2658 
2659 	/* Registers mapping */
2660 	/* TODO: block userspace mapping of io register */
2661 	if (adev->asic_type >= CHIP_BONAIRE) {
2662 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2663 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2664 	} else {
2665 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2666 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2667 	}
2668 
2669 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2670 	if (adev->rmmio == NULL) {
2671 		return -ENOMEM;
2672 	}
2673 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2674 	DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2675 
2676 	/* io port mapping */
2677 	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2678 		if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2679 			adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2680 			adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2681 			break;
2682 		}
2683 	}
2684 	if (adev->rio_mem == NULL)
2685 		DRM_INFO("PCI I/O BAR is not found.\n");
2686 
2687 	/* enable PCIE atomic ops */
2688 	r = pci_enable_atomic_ops_to_root(adev->pdev,
2689 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
2690 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
2691 	if (r) {
2692 		adev->have_atomics_support = false;
2693 		DRM_INFO("PCIE atomic ops is not supported\n");
2694 	} else {
2695 		adev->have_atomics_support = true;
2696 	}
2697 
2698 	amdgpu_device_get_pcie_info(adev);
2699 
2700 	if (amdgpu_mcbp)
2701 		DRM_INFO("MCBP is enabled\n");
2702 
2703 	if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
2704 		adev->enable_mes = true;
2705 
2706 	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
2707 		r = amdgpu_discovery_init(adev);
2708 		if (r) {
2709 			dev_err(adev->dev, "amdgpu_discovery_init failed\n");
2710 			return r;
2711 		}
2712 	}
2713 
2714 	/* early init functions */
2715 	r = amdgpu_device_ip_early_init(adev);
2716 	if (r)
2717 		return r;
2718 
2719 	/* doorbell bar mapping and doorbell index init*/
2720 	amdgpu_device_doorbell_init(adev);
2721 
2722 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2723 	/* this will fail for cards that aren't VGA class devices, just
2724 	 * ignore it */
2725 	vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
2726 
2727 	if (amdgpu_device_is_px(ddev))
2728 		runtime = true;
2729 	if (!pci_is_thunderbolt_attached(adev->pdev))
2730 		vga_switcheroo_register_client(adev->pdev,
2731 					       &amdgpu_switcheroo_ops, runtime);
2732 	if (runtime)
2733 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2734 
2735 	if (amdgpu_emu_mode == 1) {
2736 		/* post the asic on emulation mode */
2737 		emu_soc_asic_init(adev);
2738 		goto fence_driver_init;
2739 	}
2740 
2741 	/* detect if we are with an SRIOV vbios */
2742 	amdgpu_device_detect_sriov_bios(adev);
2743 
2744 	/* check if we need to reset the asic
2745 	 *  E.g., driver was not cleanly unloaded previously, etc.
2746 	 */
2747 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
2748 		r = amdgpu_asic_reset(adev);
2749 		if (r) {
2750 			dev_err(adev->dev, "asic reset on init failed\n");
2751 			goto failed;
2752 		}
2753 	}
2754 
2755 	/* Post card if necessary */
2756 	if (amdgpu_device_need_post(adev)) {
2757 		if (!adev->bios) {
2758 			dev_err(adev->dev, "no vBIOS found\n");
2759 			r = -EINVAL;
2760 			goto failed;
2761 		}
2762 		DRM_INFO("GPU posting now...\n");
2763 		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2764 		if (r) {
2765 			dev_err(adev->dev, "gpu post error!\n");
2766 			goto failed;
2767 		}
2768 	}
2769 
2770 	if (adev->is_atom_fw) {
2771 		/* Initialize clocks */
2772 		r = amdgpu_atomfirmware_get_clock_info(adev);
2773 		if (r) {
2774 			dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
2775 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
2776 			goto failed;
2777 		}
2778 	} else {
2779 		/* Initialize clocks */
2780 		r = amdgpu_atombios_get_clock_info(adev);
2781 		if (r) {
2782 			dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
2783 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
2784 			goto failed;
2785 		}
2786 		/* init i2c buses */
2787 		if (!amdgpu_device_has_dc_support(adev))
2788 			amdgpu_atombios_i2c_init(adev);
2789 	}
2790 
2791 fence_driver_init:
2792 	/* Fence driver */
2793 	r = amdgpu_fence_driver_init(adev);
2794 	if (r) {
2795 		dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
2796 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
2797 		goto failed;
2798 	}
2799 
2800 	/* init the mode config */
2801 	drm_mode_config_init(adev->ddev);
2802 
2803 	r = amdgpu_device_ip_init(adev);
2804 	if (r) {
2805 		/* failed in exclusive mode due to timeout */
2806 		if (amdgpu_sriov_vf(adev) &&
2807 		    !amdgpu_sriov_runtime(adev) &&
2808 		    amdgpu_virt_mmio_blocked(adev) &&
2809 		    !amdgpu_virt_wait_reset(adev)) {
2810 			dev_err(adev->dev, "VF exclusive mode timeout\n");
2811 			/* Don't send request since VF is inactive. */
2812 			adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2813 			adev->virt.ops = NULL;
2814 			r = -EAGAIN;
2815 			goto failed;
2816 		}
2817 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
2818 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
2819 		if (amdgpu_virt_request_full_gpu(adev, false))
2820 			amdgpu_virt_release_full_gpu(adev, false);
2821 		goto failed;
2822 	}
2823 
2824 	adev->accel_working = true;
2825 
2826 	amdgpu_vm_check_compute_bug(adev);
2827 
2828 	/* Initialize the buffer migration limit. */
2829 	if (amdgpu_moverate >= 0)
2830 		max_MBps = amdgpu_moverate;
2831 	else
2832 		max_MBps = 8; /* Allow 8 MB/s. */
2833 	/* Get a log2 for easy divisions. */
2834 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2835 
2836 	amdgpu_fbdev_init(adev);
2837 
2838 	if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2839 		amdgpu_pm_virt_sysfs_init(adev);
2840 
2841 	r = amdgpu_pm_sysfs_init(adev);
2842 	if (r)
2843 		DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2844 
2845 	r = amdgpu_ucode_sysfs_init(adev);
2846 	if (r)
2847 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
2848 
2849 	r = amdgpu_debugfs_gem_init(adev);
2850 	if (r)
2851 		DRM_ERROR("registering gem debugfs failed (%d).\n", r);
2852 
2853 	r = amdgpu_debugfs_regs_init(adev);
2854 	if (r)
2855 		DRM_ERROR("registering register debugfs failed (%d).\n", r);
2856 
2857 	r = amdgpu_debugfs_firmware_init(adev);
2858 	if (r)
2859 		DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
2860 
2861 	r = amdgpu_debugfs_init(adev);
2862 	if (r)
2863 		DRM_ERROR("Creating debugfs files failed (%d).\n", r);
2864 
2865 	if ((amdgpu_testing & 1)) {
2866 		if (adev->accel_working)
2867 			amdgpu_test_moves(adev);
2868 		else
2869 			DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2870 	}
2871 	if (amdgpu_benchmarking) {
2872 		if (adev->accel_working)
2873 			amdgpu_benchmark(adev, amdgpu_benchmarking);
2874 		else
2875 			DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2876 	}
2877 
2878 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
2879 	 * explicit gating rather than handling it automatically.
2880 	 */
2881 	r = amdgpu_device_ip_late_init(adev);
2882 	if (r) {
2883 		dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
2884 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
2885 		goto failed;
2886 	}
2887 
2888 	/* must succeed. */
2889 	amdgpu_ras_resume(adev);
2890 
2891 	queue_delayed_work(system_wq, &adev->delayed_init_work,
2892 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
2893 
2894 	r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
2895 	if (r) {
2896 		dev_err(adev->dev, "Could not create pcie_replay_count");
2897 		return r;
2898 	}
2899 
2900 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
2901 		r = amdgpu_pmu_init(adev);
2902 	if (r)
2903 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
2904 
2905 	return 0;
2906 
2907 failed:
2908 	amdgpu_vf_error_trans_all(adev);
2909 	if (runtime)
2910 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
2911 
2912 	return r;
2913 }
2914 
2915 /**
2916  * amdgpu_device_fini - tear down the driver
2917  *
2918  * @adev: amdgpu_device pointer
2919  *
2920  * Tear down the driver info (all asics).
2921  * Called at driver shutdown.
2922  */
2923 void amdgpu_device_fini(struct amdgpu_device *adev)
2924 {
2925 	int r;
2926 
2927 	DRM_INFO("amdgpu: finishing device.\n");
2928 	adev->shutdown = true;
2929 	/* disable all interrupts */
2930 	amdgpu_irq_disable_all(adev);
2931 	if (adev->mode_info.mode_config_initialized){
2932 		if (!amdgpu_device_has_dc_support(adev))
2933 			drm_helper_force_disable_all(adev->ddev);
2934 		else
2935 			drm_atomic_helper_shutdown(adev->ddev);
2936 	}
2937 	amdgpu_fence_driver_fini(adev);
2938 	amdgpu_pm_sysfs_fini(adev);
2939 	amdgpu_fbdev_fini(adev);
2940 	r = amdgpu_device_ip_fini(adev);
2941 	if (adev->firmware.gpu_info_fw) {
2942 		release_firmware(adev->firmware.gpu_info_fw);
2943 		adev->firmware.gpu_info_fw = NULL;
2944 	}
2945 	adev->accel_working = false;
2946 	cancel_delayed_work_sync(&adev->delayed_init_work);
2947 	/* free i2c buses */
2948 	if (!amdgpu_device_has_dc_support(adev))
2949 		amdgpu_i2c_fini(adev);
2950 
2951 	if (amdgpu_emu_mode != 1)
2952 		amdgpu_atombios_fini(adev);
2953 
2954 	kfree(adev->bios);
2955 	adev->bios = NULL;
2956 	if (!pci_is_thunderbolt_attached(adev->pdev))
2957 		vga_switcheroo_unregister_client(adev->pdev);
2958 	if (adev->flags & AMD_IS_PX)
2959 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
2960 	vga_client_register(adev->pdev, NULL, NULL, NULL);
2961 	if (adev->rio_mem)
2962 		pci_iounmap(adev->pdev, adev->rio_mem);
2963 	adev->rio_mem = NULL;
2964 	iounmap(adev->rmmio);
2965 	adev->rmmio = NULL;
2966 	amdgpu_device_doorbell_fini(adev);
2967 	if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2968 		amdgpu_pm_virt_sysfs_fini(adev);
2969 
2970 	amdgpu_debugfs_regs_cleanup(adev);
2971 	device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
2972 	amdgpu_ucode_sysfs_fini(adev);
2973 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
2974 		amdgpu_pmu_fini(adev);
2975 	amdgpu_debugfs_preempt_cleanup(adev);
2976 	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
2977 		amdgpu_discovery_fini(adev);
2978 }
2979 
2980 
2981 /*
2982  * Suspend & resume.
2983  */
2984 /**
2985  * amdgpu_device_suspend - initiate device suspend
2986  *
2987  * @dev: drm dev pointer
2988  * @suspend: suspend state
2989  * @fbcon : notify the fbdev of suspend
2990  *
2991  * Puts the hw in the suspend state (all asics).
2992  * Returns 0 for success or an error on failure.
2993  * Called at driver suspend.
2994  */
2995 int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
2996 {
2997 	struct amdgpu_device *adev;
2998 	struct drm_crtc *crtc;
2999 	struct drm_connector *connector;
3000 	int r;
3001 
3002 	if (dev == NULL || dev->dev_private == NULL) {
3003 		return -ENODEV;
3004 	}
3005 
3006 	adev = dev->dev_private;
3007 
3008 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3009 		return 0;
3010 
3011 	adev->in_suspend = true;
3012 	drm_kms_helper_poll_disable(dev);
3013 
3014 	if (fbcon)
3015 		amdgpu_fbdev_set_suspend(adev, 1);
3016 
3017 	cancel_delayed_work_sync(&adev->delayed_init_work);
3018 
3019 	if (!amdgpu_device_has_dc_support(adev)) {
3020 		/* turn off display hw */
3021 		drm_modeset_lock_all(dev);
3022 		list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
3023 			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
3024 		}
3025 		drm_modeset_unlock_all(dev);
3026 			/* unpin the front buffers and cursors */
3027 		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3028 			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3029 			struct drm_framebuffer *fb = crtc->primary->fb;
3030 			struct amdgpu_bo *robj;
3031 
3032 			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
3033 				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3034 				r = amdgpu_bo_reserve(aobj, true);
3035 				if (r == 0) {
3036 					amdgpu_bo_unpin(aobj);
3037 					amdgpu_bo_unreserve(aobj);
3038 				}
3039 			}
3040 
3041 			if (fb == NULL || fb->obj[0] == NULL) {
3042 				continue;
3043 			}
3044 			robj = gem_to_amdgpu_bo(fb->obj[0]);
3045 			/* don't unpin kernel fb objects */
3046 			if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3047 				r = amdgpu_bo_reserve(robj, true);
3048 				if (r == 0) {
3049 					amdgpu_bo_unpin(robj);
3050 					amdgpu_bo_unreserve(robj);
3051 				}
3052 			}
3053 		}
3054 	}
3055 
3056 	amdgpu_amdkfd_suspend(adev);
3057 
3058 	amdgpu_ras_suspend(adev);
3059 
3060 	r = amdgpu_device_ip_suspend_phase1(adev);
3061 
3062 	/* evict vram memory */
3063 	amdgpu_bo_evict_vram(adev);
3064 
3065 	amdgpu_fence_driver_suspend(adev);
3066 
3067 	r = amdgpu_device_ip_suspend_phase2(adev);
3068 
3069 	/* evict remaining vram memory
3070 	 * This second call to evict vram is to evict the gart page table
3071 	 * using the CPU.
3072 	 */
3073 	amdgpu_bo_evict_vram(adev);
3074 
3075 	pci_save_state(dev->pdev);
3076 	if (suspend) {
3077 		/* Shut down the device */
3078 		pci_disable_device(dev->pdev);
3079 		pci_set_power_state(dev->pdev, PCI_D3hot);
3080 	} else {
3081 		r = amdgpu_asic_reset(adev);
3082 		if (r)
3083 			DRM_ERROR("amdgpu asic reset failed\n");
3084 	}
3085 
3086 	return 0;
3087 }
3088 
3089 /**
3090  * amdgpu_device_resume - initiate device resume
3091  *
3092  * @dev: drm dev pointer
3093  * @resume: resume state
3094  * @fbcon : notify the fbdev of resume
3095  *
3096  * Bring the hw back to operating state (all asics).
3097  * Returns 0 for success or an error on failure.
3098  * Called at driver resume.
3099  */
3100 int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
3101 {
3102 	struct drm_connector *connector;
3103 	struct amdgpu_device *adev = dev->dev_private;
3104 	struct drm_crtc *crtc;
3105 	int r = 0;
3106 
3107 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3108 		return 0;
3109 
3110 	if (resume) {
3111 		pci_set_power_state(dev->pdev, PCI_D0);
3112 		pci_restore_state(dev->pdev);
3113 		r = pci_enable_device(dev->pdev);
3114 		if (r)
3115 			return r;
3116 	}
3117 
3118 	/* post card */
3119 	if (amdgpu_device_need_post(adev)) {
3120 		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3121 		if (r)
3122 			DRM_ERROR("amdgpu asic init failed\n");
3123 	}
3124 
3125 	r = amdgpu_device_ip_resume(adev);
3126 	if (r) {
3127 		DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
3128 		return r;
3129 	}
3130 	amdgpu_fence_driver_resume(adev);
3131 
3132 
3133 	r = amdgpu_device_ip_late_init(adev);
3134 	if (r)
3135 		return r;
3136 
3137 	queue_delayed_work(system_wq, &adev->delayed_init_work,
3138 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
3139 
3140 	if (!amdgpu_device_has_dc_support(adev)) {
3141 		/* pin cursors */
3142 		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3143 			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3144 
3145 			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
3146 				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3147 				r = amdgpu_bo_reserve(aobj, true);
3148 				if (r == 0) {
3149 					r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3150 					if (r != 0)
3151 						DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3152 					amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3153 					amdgpu_bo_unreserve(aobj);
3154 				}
3155 			}
3156 		}
3157 	}
3158 	r = amdgpu_amdkfd_resume(adev);
3159 	if (r)
3160 		return r;
3161 
3162 	/* Make sure IB tests flushed */
3163 	flush_delayed_work(&adev->delayed_init_work);
3164 
3165 	/* blat the mode back in */
3166 	if (fbcon) {
3167 		if (!amdgpu_device_has_dc_support(adev)) {
3168 			/* pre DCE11 */
3169 			drm_helper_resume_force_mode(dev);
3170 
3171 			/* turn on display hw */
3172 			drm_modeset_lock_all(dev);
3173 			list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
3174 				drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
3175 			}
3176 			drm_modeset_unlock_all(dev);
3177 		}
3178 		amdgpu_fbdev_set_suspend(adev, 0);
3179 	}
3180 
3181 	drm_kms_helper_poll_enable(dev);
3182 
3183 	amdgpu_ras_resume(adev);
3184 
3185 	/*
3186 	 * Most of the connector probing functions try to acquire runtime pm
3187 	 * refs to ensure that the GPU is powered on when connector polling is
3188 	 * performed. Since we're calling this from a runtime PM callback,
3189 	 * trying to acquire rpm refs will cause us to deadlock.
3190 	 *
3191 	 * Since we're guaranteed to be holding the rpm lock, it's safe to
3192 	 * temporarily disable the rpm helpers so this doesn't deadlock us.
3193 	 */
3194 #ifdef CONFIG_PM
3195 	dev->dev->power.disable_depth++;
3196 #endif
3197 	if (!amdgpu_device_has_dc_support(adev))
3198 		drm_helper_hpd_irq_event(dev);
3199 	else
3200 		drm_kms_helper_hotplug_event(dev);
3201 #ifdef CONFIG_PM
3202 	dev->dev->power.disable_depth--;
3203 #endif
3204 	adev->in_suspend = false;
3205 
3206 	return 0;
3207 }
3208 
3209 /**
3210  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3211  *
3212  * @adev: amdgpu_device pointer
3213  *
3214  * The list of all the hardware IPs that make up the asic is walked and
3215  * the check_soft_reset callbacks are run.  check_soft_reset determines
3216  * if the asic is still hung or not.
3217  * Returns true if any of the IPs are still in a hung state, false if not.
3218  */
3219 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
3220 {
3221 	int i;
3222 	bool asic_hang = false;
3223 
3224 	if (amdgpu_sriov_vf(adev))
3225 		return true;
3226 
3227 	if (amdgpu_asic_need_full_reset(adev))
3228 		return true;
3229 
3230 	for (i = 0; i < adev->num_ip_blocks; i++) {
3231 		if (!adev->ip_blocks[i].status.valid)
3232 			continue;
3233 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3234 			adev->ip_blocks[i].status.hang =
3235 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3236 		if (adev->ip_blocks[i].status.hang) {
3237 			DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
3238 			asic_hang = true;
3239 		}
3240 	}
3241 	return asic_hang;
3242 }
3243 
3244 /**
3245  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3246  *
3247  * @adev: amdgpu_device pointer
3248  *
3249  * The list of all the hardware IPs that make up the asic is walked and the
3250  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
3251  * handles any IP specific hardware or software state changes that are
3252  * necessary for a soft reset to succeed.
3253  * Returns 0 on success, negative error code on failure.
3254  */
3255 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
3256 {
3257 	int i, r = 0;
3258 
3259 	for (i = 0; i < adev->num_ip_blocks; i++) {
3260 		if (!adev->ip_blocks[i].status.valid)
3261 			continue;
3262 		if (adev->ip_blocks[i].status.hang &&
3263 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3264 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
3265 			if (r)
3266 				return r;
3267 		}
3268 	}
3269 
3270 	return 0;
3271 }
3272 
3273 /**
3274  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3275  *
3276  * @adev: amdgpu_device pointer
3277  *
3278  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
3279  * reset is necessary to recover.
3280  * Returns true if a full asic reset is required, false if not.
3281  */
3282 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
3283 {
3284 	int i;
3285 
3286 	if (amdgpu_asic_need_full_reset(adev))
3287 		return true;
3288 
3289 	for (i = 0; i < adev->num_ip_blocks; i++) {
3290 		if (!adev->ip_blocks[i].status.valid)
3291 			continue;
3292 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3293 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3294 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
3295 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3296 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3297 			if (adev->ip_blocks[i].status.hang) {
3298 				DRM_INFO("Some block need full reset!\n");
3299 				return true;
3300 			}
3301 		}
3302 	}
3303 	return false;
3304 }
3305 
3306 /**
3307  * amdgpu_device_ip_soft_reset - do a soft reset
3308  *
3309  * @adev: amdgpu_device pointer
3310  *
3311  * The list of all the hardware IPs that make up the asic is walked and the
3312  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
3313  * IP specific hardware or software state changes that are necessary to soft
3314  * reset the IP.
3315  * Returns 0 on success, negative error code on failure.
3316  */
3317 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
3318 {
3319 	int i, r = 0;
3320 
3321 	for (i = 0; i < adev->num_ip_blocks; i++) {
3322 		if (!adev->ip_blocks[i].status.valid)
3323 			continue;
3324 		if (adev->ip_blocks[i].status.hang &&
3325 		    adev->ip_blocks[i].version->funcs->soft_reset) {
3326 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
3327 			if (r)
3328 				return r;
3329 		}
3330 	}
3331 
3332 	return 0;
3333 }
3334 
3335 /**
3336  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3337  *
3338  * @adev: amdgpu_device pointer
3339  *
3340  * The list of all the hardware IPs that make up the asic is walked and the
3341  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
3342  * handles any IP specific hardware or software state changes that are
3343  * necessary after the IP has been soft reset.
3344  * Returns 0 on success, negative error code on failure.
3345  */
3346 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
3347 {
3348 	int i, r = 0;
3349 
3350 	for (i = 0; i < adev->num_ip_blocks; i++) {
3351 		if (!adev->ip_blocks[i].status.valid)
3352 			continue;
3353 		if (adev->ip_blocks[i].status.hang &&
3354 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
3355 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
3356 		if (r)
3357 			return r;
3358 	}
3359 
3360 	return 0;
3361 }
3362 
3363 /**
3364  * amdgpu_device_recover_vram - Recover some VRAM contents
3365  *
3366  * @adev: amdgpu_device pointer
3367  *
3368  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
3369  * restore things like GPUVM page tables after a GPU reset where
3370  * the contents of VRAM might be lost.
3371  *
3372  * Returns:
3373  * 0 on success, negative error code on failure.
3374  */
3375 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
3376 {
3377 	struct dma_fence *fence = NULL, *next = NULL;
3378 	struct amdgpu_bo *shadow;
3379 	long r = 1, tmo;
3380 
3381 	if (amdgpu_sriov_runtime(adev))
3382 		tmo = msecs_to_jiffies(8000);
3383 	else
3384 		tmo = msecs_to_jiffies(100);
3385 
3386 	DRM_INFO("recover vram bo from shadow start\n");
3387 	mutex_lock(&adev->shadow_list_lock);
3388 	list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3389 
3390 		/* No need to recover an evicted BO */
3391 		if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
3392 		    shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
3393 		    shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3394 			continue;
3395 
3396 		r = amdgpu_bo_restore_shadow(shadow, &next);
3397 		if (r)
3398 			break;
3399 
3400 		if (fence) {
3401 			tmo = dma_fence_wait_timeout(fence, false, tmo);
3402 			dma_fence_put(fence);
3403 			fence = next;
3404 			if (tmo == 0) {
3405 				r = -ETIMEDOUT;
3406 				break;
3407 			} else if (tmo < 0) {
3408 				r = tmo;
3409 				break;
3410 			}
3411 		} else {
3412 			fence = next;
3413 		}
3414 	}
3415 	mutex_unlock(&adev->shadow_list_lock);
3416 
3417 	if (fence)
3418 		tmo = dma_fence_wait_timeout(fence, false, tmo);
3419 	dma_fence_put(fence);
3420 
3421 	if (r < 0 || tmo <= 0) {
3422 		DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
3423 		return -EIO;
3424 	}
3425 
3426 	DRM_INFO("recover vram bo from shadow done\n");
3427 	return 0;
3428 }
3429 
3430 
3431 /**
3432  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
3433  *
3434  * @adev: amdgpu device pointer
3435  * @from_hypervisor: request from hypervisor
3436  *
3437  * do VF FLR and reinitialize Asic
3438  * return 0 means succeeded otherwise failed
3439  */
3440 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3441 				     bool from_hypervisor)
3442 {
3443 	int r;
3444 
3445 	if (from_hypervisor)
3446 		r = amdgpu_virt_request_full_gpu(adev, true);
3447 	else
3448 		r = amdgpu_virt_reset_gpu(adev);
3449 	if (r)
3450 		return r;
3451 
3452 	amdgpu_amdkfd_pre_reset(adev);
3453 
3454 	/* Resume IP prior to SMC */
3455 	r = amdgpu_device_ip_reinit_early_sriov(adev);
3456 	if (r)
3457 		goto error;
3458 
3459 	/* we need recover gart prior to run SMC/CP/SDMA resume */
3460 	amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
3461 
3462 	r = amdgpu_device_fw_loading(adev);
3463 	if (r)
3464 		return r;
3465 
3466 	/* now we are okay to resume SMC/CP/SDMA */
3467 	r = amdgpu_device_ip_reinit_late_sriov(adev);
3468 	if (r)
3469 		goto error;
3470 
3471 	amdgpu_irq_gpu_reset_resume_helper(adev);
3472 	r = amdgpu_ib_ring_tests(adev);
3473 	amdgpu_amdkfd_post_reset(adev);
3474 
3475 error:
3476 	amdgpu_virt_init_data_exchange(adev);
3477 	amdgpu_virt_release_full_gpu(adev, true);
3478 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3479 		atomic_inc(&adev->vram_lost_counter);
3480 		r = amdgpu_device_recover_vram(adev);
3481 	}
3482 
3483 	return r;
3484 }
3485 
3486 /**
3487  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3488  *
3489  * @adev: amdgpu device pointer
3490  *
3491  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3492  * a hung GPU.
3493  */
3494 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3495 {
3496 	if (!amdgpu_device_ip_check_soft_reset(adev)) {
3497 		DRM_INFO("Timeout, but no hardware hang detected.\n");
3498 		return false;
3499 	}
3500 
3501 	if (amdgpu_gpu_recovery == 0)
3502 		goto disabled;
3503 
3504 	if (amdgpu_sriov_vf(adev))
3505 		return true;
3506 
3507 	if (amdgpu_gpu_recovery == -1) {
3508 		switch (adev->asic_type) {
3509 		case CHIP_BONAIRE:
3510 		case CHIP_HAWAII:
3511 		case CHIP_TOPAZ:
3512 		case CHIP_TONGA:
3513 		case CHIP_FIJI:
3514 		case CHIP_POLARIS10:
3515 		case CHIP_POLARIS11:
3516 		case CHIP_POLARIS12:
3517 		case CHIP_VEGAM:
3518 		case CHIP_VEGA20:
3519 		case CHIP_VEGA10:
3520 		case CHIP_VEGA12:
3521 			break;
3522 		default:
3523 			goto disabled;
3524 		}
3525 	}
3526 
3527 	return true;
3528 
3529 disabled:
3530 		DRM_INFO("GPU recovery disabled.\n");
3531 		return false;
3532 }
3533 
3534 
3535 static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3536 					struct amdgpu_job *job,
3537 					bool *need_full_reset_arg)
3538 {
3539 	int i, r = 0;
3540 	bool need_full_reset  = *need_full_reset_arg;
3541 
3542 	/* block all schedulers and reset given job's ring */
3543 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3544 		struct amdgpu_ring *ring = adev->rings[i];
3545 
3546 		if (!ring || !ring->sched.thread)
3547 			continue;
3548 
3549 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3550 		amdgpu_fence_driver_force_completion(ring);
3551 	}
3552 
3553 	if(job)
3554 		drm_sched_increase_karma(&job->base);
3555 
3556 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
3557 	if (!amdgpu_sriov_vf(adev)) {
3558 
3559 		if (!need_full_reset)
3560 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3561 
3562 		if (!need_full_reset) {
3563 			amdgpu_device_ip_pre_soft_reset(adev);
3564 			r = amdgpu_device_ip_soft_reset(adev);
3565 			amdgpu_device_ip_post_soft_reset(adev);
3566 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3567 				DRM_INFO("soft reset failed, will fallback to full reset!\n");
3568 				need_full_reset = true;
3569 			}
3570 		}
3571 
3572 		if (need_full_reset)
3573 			r = amdgpu_device_ip_suspend(adev);
3574 
3575 		*need_full_reset_arg = need_full_reset;
3576 	}
3577 
3578 	return r;
3579 }
3580 
3581 static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3582 			       struct list_head *device_list_handle,
3583 			       bool *need_full_reset_arg)
3584 {
3585 	struct amdgpu_device *tmp_adev = NULL;
3586 	bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3587 	int r = 0;
3588 
3589 	/*
3590 	 * ASIC reset has to be done on all HGMI hive nodes ASAP
3591 	 * to allow proper links negotiation in FW (within 1 sec)
3592 	 */
3593 	if (need_full_reset) {
3594 		list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3595 			/* For XGMI run all resets in parallel to speed up the process */
3596 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3597 				if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
3598 					r = -EALREADY;
3599 			} else
3600 				r = amdgpu_asic_reset(tmp_adev);
3601 
3602 			if (r) {
3603 				DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
3604 					 r, tmp_adev->ddev->unique);
3605 				break;
3606 			}
3607 		}
3608 
3609 		/* For XGMI wait for all PSP resets to complete before proceed */
3610 		if (!r) {
3611 			list_for_each_entry(tmp_adev, device_list_handle,
3612 					    gmc.xgmi.head) {
3613 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3614 					flush_work(&tmp_adev->xgmi_reset_work);
3615 					r = tmp_adev->asic_reset_res;
3616 					if (r)
3617 						break;
3618 				}
3619 			}
3620 
3621 			list_for_each_entry(tmp_adev, device_list_handle,
3622 					gmc.xgmi.head) {
3623 				amdgpu_ras_reserve_bad_pages(tmp_adev);
3624 			}
3625 		}
3626 	}
3627 
3628 
3629 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3630 		if (need_full_reset) {
3631 			/* post card */
3632 			if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3633 				DRM_WARN("asic atom init failed!");
3634 
3635 			if (!r) {
3636 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3637 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
3638 				if (r)
3639 					goto out;
3640 
3641 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3642 				if (vram_lost) {
3643 					DRM_INFO("VRAM is lost due to GPU reset!\n");
3644 					atomic_inc(&tmp_adev->vram_lost_counter);
3645 				}
3646 
3647 				r = amdgpu_gtt_mgr_recover(
3648 					&tmp_adev->mman.bdev.man[TTM_PL_TT]);
3649 				if (r)
3650 					goto out;
3651 
3652 				r = amdgpu_device_fw_loading(tmp_adev);
3653 				if (r)
3654 					return r;
3655 
3656 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
3657 				if (r)
3658 					goto out;
3659 
3660 				if (vram_lost)
3661 					amdgpu_device_fill_reset_magic(tmp_adev);
3662 
3663 				/*
3664 				 * Add this ASIC as tracked as reset was already
3665 				 * complete successfully.
3666 				 */
3667 				amdgpu_register_gpu_instance(tmp_adev);
3668 
3669 				r = amdgpu_device_ip_late_init(tmp_adev);
3670 				if (r)
3671 					goto out;
3672 
3673 				/* must succeed. */
3674 				amdgpu_ras_resume(tmp_adev);
3675 
3676 				/* Update PSP FW topology after reset */
3677 				if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3678 					r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3679 			}
3680 		}
3681 
3682 
3683 out:
3684 		if (!r) {
3685 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3686 			r = amdgpu_ib_ring_tests(tmp_adev);
3687 			if (r) {
3688 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3689 				r = amdgpu_device_ip_suspend(tmp_adev);
3690 				need_full_reset = true;
3691 				r = -EAGAIN;
3692 				goto end;
3693 			}
3694 		}
3695 
3696 		if (!r)
3697 			r = amdgpu_device_recover_vram(tmp_adev);
3698 		else
3699 			tmp_adev->asic_reset_res = r;
3700 	}
3701 
3702 end:
3703 	*need_full_reset_arg = need_full_reset;
3704 	return r;
3705 }
3706 
3707 static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
3708 {
3709 	if (trylock) {
3710 		if (!mutex_trylock(&adev->lock_reset))
3711 			return false;
3712 	} else
3713 		mutex_lock(&adev->lock_reset);
3714 
3715 	atomic_inc(&adev->gpu_reset_counter);
3716 	adev->in_gpu_reset = 1;
3717 	switch (amdgpu_asic_reset_method(adev)) {
3718 	case AMD_RESET_METHOD_MODE1:
3719 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
3720 		break;
3721 	case AMD_RESET_METHOD_MODE2:
3722 		adev->mp1_state = PP_MP1_STATE_RESET;
3723 		break;
3724 	default:
3725 		adev->mp1_state = PP_MP1_STATE_NONE;
3726 		break;
3727 	}
3728 	/* Block kfd: SRIOV would do it separately */
3729 	if (!amdgpu_sriov_vf(adev))
3730                 amdgpu_amdkfd_pre_reset(adev);
3731 
3732 	return true;
3733 }
3734 
3735 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3736 {
3737 	/*unlock kfd: SRIOV would do it separately */
3738 	if (!amdgpu_sriov_vf(adev))
3739                 amdgpu_amdkfd_post_reset(adev);
3740 	amdgpu_vf_error_trans_all(adev);
3741 	adev->mp1_state = PP_MP1_STATE_NONE;
3742 	adev->in_gpu_reset = 0;
3743 	mutex_unlock(&adev->lock_reset);
3744 }
3745 
3746 
3747 /**
3748  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3749  *
3750  * @adev: amdgpu device pointer
3751  * @job: which job trigger hang
3752  *
3753  * Attempt to reset the GPU if it has hung (all asics).
3754  * Attempt to do soft-reset or full-reset and reinitialize Asic
3755  * Returns 0 for success or an error on failure.
3756  */
3757 
3758 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3759 			      struct amdgpu_job *job)
3760 {
3761 	struct list_head device_list, *device_list_handle =  NULL;
3762 	bool need_full_reset, job_signaled;
3763 	struct amdgpu_hive_info *hive = NULL;
3764 	struct amdgpu_device *tmp_adev = NULL;
3765 	int i, r = 0;
3766 
3767 	need_full_reset = job_signaled = false;
3768 	INIT_LIST_HEAD(&device_list);
3769 
3770 	dev_info(adev->dev, "GPU reset begin!\n");
3771 
3772 	cancel_delayed_work_sync(&adev->delayed_init_work);
3773 
3774 	hive = amdgpu_get_xgmi_hive(adev, false);
3775 
3776 	/*
3777 	 * Here we trylock to avoid chain of resets executing from
3778 	 * either trigger by jobs on different adevs in XGMI hive or jobs on
3779 	 * different schedulers for same device while this TO handler is running.
3780 	 * We always reset all schedulers for device and all devices for XGMI
3781 	 * hive so that should take care of them too.
3782 	 */
3783 
3784 	if (hive && !mutex_trylock(&hive->reset_lock)) {
3785 		DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
3786 			 job->base.id, hive->hive_id);
3787 		return 0;
3788 	}
3789 
3790 	/* Start with adev pre asic reset first for soft reset check.*/
3791 	if (!amdgpu_device_lock_adev(adev, !hive)) {
3792 		DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
3793 					 job->base.id);
3794 		return 0;
3795 	}
3796 
3797 	/* Build list of devices to reset */
3798 	if  (adev->gmc.xgmi.num_physical_nodes > 1) {
3799 		if (!hive) {
3800 			amdgpu_device_unlock_adev(adev);
3801 			return -ENODEV;
3802 		}
3803 
3804 		/*
3805 		 * In case we are in XGMI hive mode device reset is done for all the
3806 		 * nodes in the hive to retrain all XGMI links and hence the reset
3807 		 * sequence is executed in loop on all nodes.
3808 		 */
3809 		device_list_handle = &hive->device_list;
3810 	} else {
3811 		list_add_tail(&adev->gmc.xgmi.head, &device_list);
3812 		device_list_handle = &device_list;
3813 	}
3814 
3815 	/*
3816 	 * Mark these ASICs to be reseted as untracked first
3817 	 * And add them back after reset completed
3818 	 */
3819 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)
3820 		amdgpu_unregister_gpu_instance(tmp_adev);
3821 
3822 	/* block all schedulers and reset given job's ring */
3823 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3824 		/* disable ras on ALL IPs */
3825 		if (amdgpu_device_ip_need_full_reset(tmp_adev))
3826 			amdgpu_ras_suspend(tmp_adev);
3827 
3828 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3829 			struct amdgpu_ring *ring = tmp_adev->rings[i];
3830 
3831 			if (!ring || !ring->sched.thread)
3832 				continue;
3833 
3834 			drm_sched_stop(&ring->sched, &job->base);
3835 		}
3836 	}
3837 
3838 
3839 	/*
3840 	 * Must check guilty signal here since after this point all old
3841 	 * HW fences are force signaled.
3842 	 *
3843 	 * job->base holds a reference to parent fence
3844 	 */
3845 	if (job && job->base.s_fence->parent &&
3846 	    dma_fence_is_signaled(job->base.s_fence->parent))
3847 		job_signaled = true;
3848 
3849 	if (!amdgpu_device_ip_need_full_reset(adev))
3850 		device_list_handle = &device_list;
3851 
3852 	if (job_signaled) {
3853 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
3854 		goto skip_hw_reset;
3855 	}
3856 
3857 
3858 	/* Guilty job will be freed after this*/
3859 	r = amdgpu_device_pre_asic_reset(adev,
3860 					 job,
3861 					 &need_full_reset);
3862 	if (r) {
3863 		/*TODO Should we stop ?*/
3864 		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3865 			  r, adev->ddev->unique);
3866 		adev->asic_reset_res = r;
3867 	}
3868 
3869 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
3870 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3871 
3872 		if (tmp_adev == adev)
3873 			continue;
3874 
3875 		amdgpu_device_lock_adev(tmp_adev, false);
3876 		r = amdgpu_device_pre_asic_reset(tmp_adev,
3877 						 NULL,
3878 						 &need_full_reset);
3879 		/*TODO Should we stop ?*/
3880 		if (r) {
3881 			DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3882 				  r, tmp_adev->ddev->unique);
3883 			tmp_adev->asic_reset_res = r;
3884 		}
3885 	}
3886 
3887 	/* Actual ASIC resets if needed.*/
3888 	/* TODO Implement XGMI hive reset logic for SRIOV */
3889 	if (amdgpu_sriov_vf(adev)) {
3890 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
3891 		if (r)
3892 			adev->asic_reset_res = r;
3893 	} else {
3894 		r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
3895 		if (r && r == -EAGAIN)
3896 			goto retry;
3897 	}
3898 
3899 skip_hw_reset:
3900 
3901 	/* Post ASIC reset for all devs .*/
3902 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3903 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3904 			struct amdgpu_ring *ring = tmp_adev->rings[i];
3905 
3906 			if (!ring || !ring->sched.thread)
3907 				continue;
3908 
3909 			/* No point to resubmit jobs if we didn't HW reset*/
3910 			if (!tmp_adev->asic_reset_res && !job_signaled)
3911 				drm_sched_resubmit_jobs(&ring->sched);
3912 
3913 			drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
3914 		}
3915 
3916 		if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
3917 			drm_helper_resume_force_mode(tmp_adev->ddev);
3918 		}
3919 
3920 		tmp_adev->asic_reset_res = 0;
3921 
3922 		if (r) {
3923 			/* bad news, how to tell it to userspace ? */
3924 			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
3925 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3926 		} else {
3927 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
3928 		}
3929 
3930 		amdgpu_device_unlock_adev(tmp_adev);
3931 	}
3932 
3933 	if (hive)
3934 		mutex_unlock(&hive->reset_lock);
3935 
3936 	if (r)
3937 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
3938 	return r;
3939 }
3940 
3941 /**
3942  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3943  *
3944  * @adev: amdgpu_device pointer
3945  *
3946  * Fetchs and stores in the driver the PCIE capabilities (gen speed
3947  * and lanes) of the slot the device is in. Handles APUs and
3948  * virtualized environments where PCIE config space may not be available.
3949  */
3950 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
3951 {
3952 	struct pci_dev *pdev;
3953 	enum pci_bus_speed speed_cap, platform_speed_cap;
3954 	enum pcie_link_width platform_link_width;
3955 
3956 	if (amdgpu_pcie_gen_cap)
3957 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
3958 
3959 	if (amdgpu_pcie_lane_cap)
3960 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
3961 
3962 	/* covers APUs as well */
3963 	if (pci_is_root_bus(adev->pdev->bus)) {
3964 		if (adev->pm.pcie_gen_mask == 0)
3965 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
3966 		if (adev->pm.pcie_mlw_mask == 0)
3967 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
3968 		return;
3969 	}
3970 
3971 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
3972 		return;
3973 
3974 	pcie_bandwidth_available(adev->pdev, NULL,
3975 				 &platform_speed_cap, &platform_link_width);
3976 
3977 	if (adev->pm.pcie_gen_mask == 0) {
3978 		/* asic caps */
3979 		pdev = adev->pdev;
3980 		speed_cap = pcie_get_speed_cap(pdev);
3981 		if (speed_cap == PCI_SPEED_UNKNOWN) {
3982 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3983 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3984 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
3985 		} else {
3986 			if (speed_cap == PCIE_SPEED_16_0GT)
3987 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3988 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3989 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3990 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
3991 			else if (speed_cap == PCIE_SPEED_8_0GT)
3992 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3993 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3994 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
3995 			else if (speed_cap == PCIE_SPEED_5_0GT)
3996 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3997 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
3998 			else
3999 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4000 		}
4001 		/* platform caps */
4002 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
4003 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4004 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4005 		} else {
4006 			if (platform_speed_cap == PCIE_SPEED_16_0GT)
4007 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4008 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4009 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4010 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
4011 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
4012 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4013 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4014 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
4015 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
4016 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4017 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4018 			else
4019 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4020 
4021 		}
4022 	}
4023 	if (adev->pm.pcie_mlw_mask == 0) {
4024 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
4025 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4026 		} else {
4027 			switch (platform_link_width) {
4028 			case PCIE_LNK_X32:
4029 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4030 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4031 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4032 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4033 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4034 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4035 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4036 				break;
4037 			case PCIE_LNK_X16:
4038 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4039 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4040 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4041 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4042 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4043 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4044 				break;
4045 			case PCIE_LNK_X12:
4046 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4047 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4048 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4049 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4050 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4051 				break;
4052 			case PCIE_LNK_X8:
4053 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4054 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4055 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4056 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4057 				break;
4058 			case PCIE_LNK_X4:
4059 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4060 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4061 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4062 				break;
4063 			case PCIE_LNK_X2:
4064 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4065 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4066 				break;
4067 			case PCIE_LNK_X1:
4068 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4069 				break;
4070 			default:
4071 				break;
4072 			}
4073 		}
4074 	}
4075 }
4076 
4077