xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c (revision fe8db3bcf2e5f9d9056f923b12c7158d6541e435)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 
34 #include <drm/drm_atomic_helper.h>
35 #include <drm/drm_probe_helper.h>
36 #include <drm/amdgpu_drm.h>
37 #include <linux/vgaarb.h>
38 #include <linux/vga_switcheroo.h>
39 #include <linux/efi.h>
40 #include "amdgpu.h"
41 #include "amdgpu_trace.h"
42 #include "amdgpu_i2c.h"
43 #include "atom.h"
44 #include "amdgpu_atombios.h"
45 #include "amdgpu_atomfirmware.h"
46 #include "amd_pcie.h"
47 #ifdef CONFIG_DRM_AMDGPU_SI
48 #include "si.h"
49 #endif
50 #ifdef CONFIG_DRM_AMDGPU_CIK
51 #include "cik.h"
52 #endif
53 #include "vi.h"
54 #include "soc15.h"
55 #include "nv.h"
56 #include "bif/bif_4_1_d.h"
57 #include <linux/pci.h>
58 #include <linux/firmware.h>
59 #include "amdgpu_vf_error.h"
60 
61 #include "amdgpu_amdkfd.h"
62 #include "amdgpu_pm.h"
63 
64 #include "amdgpu_xgmi.h"
65 #include "amdgpu_ras.h"
66 #include "amdgpu_pmu.h"
67 #include "amdgpu_fru_eeprom.h"
68 
69 #include <linux/suspend.h>
70 #include <drm/task_barrier.h>
71 
72 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
74 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
75 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
76 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
77 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
78 MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
79 MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
80 MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
81 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
82 
83 #define AMDGPU_RESUME_MS		2000
84 
85 const char *amdgpu_asic_name[] = {
86 	"TAHITI",
87 	"PITCAIRN",
88 	"VERDE",
89 	"OLAND",
90 	"HAINAN",
91 	"BONAIRE",
92 	"KAVERI",
93 	"KABINI",
94 	"HAWAII",
95 	"MULLINS",
96 	"TOPAZ",
97 	"TONGA",
98 	"FIJI",
99 	"CARRIZO",
100 	"STONEY",
101 	"POLARIS10",
102 	"POLARIS11",
103 	"POLARIS12",
104 	"VEGAM",
105 	"VEGA10",
106 	"VEGA12",
107 	"VEGA20",
108 	"RAVEN",
109 	"ARCTURUS",
110 	"RENOIR",
111 	"NAVI10",
112 	"NAVI14",
113 	"NAVI12",
114 	"LAST",
115 };
116 
117 /**
118  * DOC: pcie_replay_count
119  *
120  * The amdgpu driver provides a sysfs API for reporting the total number
121  * of PCIe replays (NAKs)
122  * The file pcie_replay_count is used for this and returns the total
123  * number of replays as a sum of the NAKs generated and NAKs received
124  */
125 
126 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
127 		struct device_attribute *attr, char *buf)
128 {
129 	struct drm_device *ddev = dev_get_drvdata(dev);
130 	struct amdgpu_device *adev = ddev->dev_private;
131 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
132 
133 	return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
134 }
135 
136 static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
137 		amdgpu_device_get_pcie_replay_count, NULL);
138 
139 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
140 
141 /**
142  * DOC: product_name
143  *
144  * The amdgpu driver provides a sysfs API for reporting the product name
145  * for the device
146  * The file serial_number is used for this and returns the product name
147  * as returned from the FRU.
148  * NOTE: This is only available for certain server cards
149  */
150 
151 static ssize_t amdgpu_device_get_product_name(struct device *dev,
152 		struct device_attribute *attr, char *buf)
153 {
154 	struct drm_device *ddev = dev_get_drvdata(dev);
155 	struct amdgpu_device *adev = ddev->dev_private;
156 
157 	return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
158 }
159 
160 static DEVICE_ATTR(product_name, S_IRUGO,
161 		amdgpu_device_get_product_name, NULL);
162 
163 /**
164  * DOC: product_number
165  *
166  * The amdgpu driver provides a sysfs API for reporting the part number
167  * for the device
168  * The file serial_number is used for this and returns the part number
169  * as returned from the FRU.
170  * NOTE: This is only available for certain server cards
171  */
172 
173 static ssize_t amdgpu_device_get_product_number(struct device *dev,
174 		struct device_attribute *attr, char *buf)
175 {
176 	struct drm_device *ddev = dev_get_drvdata(dev);
177 	struct amdgpu_device *adev = ddev->dev_private;
178 
179 	return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
180 }
181 
182 static DEVICE_ATTR(product_number, S_IRUGO,
183 		amdgpu_device_get_product_number, NULL);
184 
185 /**
186  * DOC: serial_number
187  *
188  * The amdgpu driver provides a sysfs API for reporting the serial number
189  * for the device
190  * The file serial_number is used for this and returns the serial number
191  * as returned from the FRU.
192  * NOTE: This is only available for certain server cards
193  */
194 
195 static ssize_t amdgpu_device_get_serial_number(struct device *dev,
196 		struct device_attribute *attr, char *buf)
197 {
198 	struct drm_device *ddev = dev_get_drvdata(dev);
199 	struct amdgpu_device *adev = ddev->dev_private;
200 
201 	return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
202 }
203 
204 static DEVICE_ATTR(serial_number, S_IRUGO,
205 		amdgpu_device_get_serial_number, NULL);
206 
207 /**
208  * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
209  *
210  * @dev: drm_device pointer
211  *
212  * Returns true if the device is a dGPU with HG/PX power control,
213  * otherwise return false.
214  */
215 bool amdgpu_device_supports_boco(struct drm_device *dev)
216 {
217 	struct amdgpu_device *adev = dev->dev_private;
218 
219 	if (adev->flags & AMD_IS_PX)
220 		return true;
221 	return false;
222 }
223 
224 /**
225  * amdgpu_device_supports_baco - Does the device support BACO
226  *
227  * @dev: drm_device pointer
228  *
229  * Returns true if the device supporte BACO,
230  * otherwise return false.
231  */
232 bool amdgpu_device_supports_baco(struct drm_device *dev)
233 {
234 	struct amdgpu_device *adev = dev->dev_private;
235 
236 	return amdgpu_asic_supports_baco(adev);
237 }
238 
239 /**
240  * VRAM access helper functions.
241  *
242  * amdgpu_device_vram_access - read/write a buffer in vram
243  *
244  * @adev: amdgpu_device pointer
245  * @pos: offset of the buffer in vram
246  * @buf: virtual address of the buffer in system memory
247  * @size: read/write size, sizeof(@buf) must > @size
248  * @write: true - write to vram, otherwise - read from vram
249  */
250 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
251 			       uint32_t *buf, size_t size, bool write)
252 {
253 	unsigned long flags;
254 	uint32_t hi = ~0;
255 	uint64_t last;
256 
257 
258 #ifdef CONFIG_64BIT
259 	last = min(pos + size, adev->gmc.visible_vram_size);
260 	if (last > pos) {
261 		void __iomem *addr = adev->mman.aper_base_kaddr + pos;
262 		size_t count = last - pos;
263 
264 		if (write) {
265 			memcpy_toio(addr, buf, count);
266 			mb();
267 			amdgpu_asic_flush_hdp(adev, NULL);
268 		} else {
269 			amdgpu_asic_invalidate_hdp(adev, NULL);
270 			mb();
271 			memcpy_fromio(buf, addr, count);
272 		}
273 
274 		if (count == size)
275 			return;
276 
277 		pos += count;
278 		buf += count / 4;
279 		size -= count;
280 	}
281 #endif
282 
283 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
284 	for (last = pos + size; pos < last; pos += 4) {
285 		uint32_t tmp = pos >> 31;
286 
287 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
288 		if (tmp != hi) {
289 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
290 			hi = tmp;
291 		}
292 		if (write)
293 			WREG32_NO_KIQ(mmMM_DATA, *buf++);
294 		else
295 			*buf++ = RREG32_NO_KIQ(mmMM_DATA);
296 	}
297 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
298 }
299 
300 /*
301  * MMIO register access helper functions.
302  */
303 /**
304  * amdgpu_mm_rreg - read a memory mapped IO register
305  *
306  * @adev: amdgpu_device pointer
307  * @reg: dword aligned register offset
308  * @acc_flags: access flags which require special behavior
309  *
310  * Returns the 32 bit value from the offset specified.
311  */
312 uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
313 			uint32_t acc_flags)
314 {
315 	uint32_t ret;
316 
317 	if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
318 		return amdgpu_kiq_rreg(adev, reg);
319 
320 	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
321 		ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
322 	else {
323 		unsigned long flags;
324 
325 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
326 		writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
327 		ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
328 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
329 	}
330 	trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
331 	return ret;
332 }
333 
334 /*
335  * MMIO register read with bytes helper functions
336  * @offset:bytes offset from MMIO start
337  *
338 */
339 
340 /**
341  * amdgpu_mm_rreg8 - read a memory mapped IO register
342  *
343  * @adev: amdgpu_device pointer
344  * @offset: byte aligned register offset
345  *
346  * Returns the 8 bit value from the offset specified.
347  */
348 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
349 	if (offset < adev->rmmio_size)
350 		return (readb(adev->rmmio + offset));
351 	BUG();
352 }
353 
354 /*
355  * MMIO register write with bytes helper functions
356  * @offset:bytes offset from MMIO start
357  * @value: the value want to be written to the register
358  *
359 */
360 /**
361  * amdgpu_mm_wreg8 - read a memory mapped IO register
362  *
363  * @adev: amdgpu_device pointer
364  * @offset: byte aligned register offset
365  * @value: 8 bit value to write
366  *
367  * Writes the value specified to the offset specified.
368  */
369 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
370 	if (offset < adev->rmmio_size)
371 		writeb(value, adev->rmmio + offset);
372 	else
373 		BUG();
374 }
375 
376 void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags)
377 {
378 	trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
379 
380 	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
381 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
382 	else {
383 		unsigned long flags;
384 
385 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
386 		writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
387 		writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
388 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
389 	}
390 
391 	if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
392 		udelay(500);
393 	}
394 }
395 
396 /**
397  * amdgpu_mm_wreg - write to a memory mapped IO register
398  *
399  * @adev: amdgpu_device pointer
400  * @reg: dword aligned register offset
401  * @v: 32 bit value to write to the register
402  * @acc_flags: access flags which require special behavior
403  *
404  * Writes the value specified to the offset specified.
405  */
406 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
407 		    uint32_t acc_flags)
408 {
409 	if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
410 		adev->last_mm_index = v;
411 	}
412 
413 	if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
414 		return amdgpu_kiq_wreg(adev, reg, v);
415 
416 	amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
417 }
418 
419 /*
420  * amdgpu_mm_wreg_mmio_rlc -  write register either with mmio or with RLC path if in range
421  *
422  * this function is invoked only the debugfs register access
423  * */
424 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
425 		    uint32_t acc_flags)
426 {
427 	if (amdgpu_sriov_fullaccess(adev) &&
428 		adev->gfx.rlc.funcs &&
429 		adev->gfx.rlc.funcs->is_rlcg_access_range) {
430 
431 		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
432 			return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
433 	}
434 
435 	amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
436 }
437 
438 /**
439  * amdgpu_io_rreg - read an IO register
440  *
441  * @adev: amdgpu_device pointer
442  * @reg: dword aligned register offset
443  *
444  * Returns the 32 bit value from the offset specified.
445  */
446 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
447 {
448 	if ((reg * 4) < adev->rio_mem_size)
449 		return ioread32(adev->rio_mem + (reg * 4));
450 	else {
451 		iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
452 		return ioread32(adev->rio_mem + (mmMM_DATA * 4));
453 	}
454 }
455 
456 /**
457  * amdgpu_io_wreg - write to an IO register
458  *
459  * @adev: amdgpu_device pointer
460  * @reg: dword aligned register offset
461  * @v: 32 bit value to write to the register
462  *
463  * Writes the value specified to the offset specified.
464  */
465 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
466 {
467 	if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
468 		adev->last_mm_index = v;
469 	}
470 
471 	if ((reg * 4) < adev->rio_mem_size)
472 		iowrite32(v, adev->rio_mem + (reg * 4));
473 	else {
474 		iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
475 		iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
476 	}
477 
478 	if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
479 		udelay(500);
480 	}
481 }
482 
483 /**
484  * amdgpu_mm_rdoorbell - read a doorbell dword
485  *
486  * @adev: amdgpu_device pointer
487  * @index: doorbell index
488  *
489  * Returns the value in the doorbell aperture at the
490  * requested doorbell index (CIK).
491  */
492 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
493 {
494 	if (index < adev->doorbell.num_doorbells) {
495 		return readl(adev->doorbell.ptr + index);
496 	} else {
497 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
498 		return 0;
499 	}
500 }
501 
502 /**
503  * amdgpu_mm_wdoorbell - write a doorbell dword
504  *
505  * @adev: amdgpu_device pointer
506  * @index: doorbell index
507  * @v: value to write
508  *
509  * Writes @v to the doorbell aperture at the
510  * requested doorbell index (CIK).
511  */
512 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
513 {
514 	if (index < adev->doorbell.num_doorbells) {
515 		writel(v, adev->doorbell.ptr + index);
516 	} else {
517 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
518 	}
519 }
520 
521 /**
522  * amdgpu_mm_rdoorbell64 - read a doorbell Qword
523  *
524  * @adev: amdgpu_device pointer
525  * @index: doorbell index
526  *
527  * Returns the value in the doorbell aperture at the
528  * requested doorbell index (VEGA10+).
529  */
530 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
531 {
532 	if (index < adev->doorbell.num_doorbells) {
533 		return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
534 	} else {
535 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
536 		return 0;
537 	}
538 }
539 
540 /**
541  * amdgpu_mm_wdoorbell64 - write a doorbell Qword
542  *
543  * @adev: amdgpu_device pointer
544  * @index: doorbell index
545  * @v: value to write
546  *
547  * Writes @v to the doorbell aperture at the
548  * requested doorbell index (VEGA10+).
549  */
550 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
551 {
552 	if (index < adev->doorbell.num_doorbells) {
553 		atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
554 	} else {
555 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
556 	}
557 }
558 
559 /**
560  * amdgpu_invalid_rreg - dummy reg read function
561  *
562  * @adev: amdgpu device pointer
563  * @reg: offset of register
564  *
565  * Dummy register read function.  Used for register blocks
566  * that certain asics don't have (all asics).
567  * Returns the value in the register.
568  */
569 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
570 {
571 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
572 	BUG();
573 	return 0;
574 }
575 
576 /**
577  * amdgpu_invalid_wreg - dummy reg write function
578  *
579  * @adev: amdgpu device pointer
580  * @reg: offset of register
581  * @v: value to write to the register
582  *
583  * Dummy register read function.  Used for register blocks
584  * that certain asics don't have (all asics).
585  */
586 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
587 {
588 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
589 		  reg, v);
590 	BUG();
591 }
592 
593 /**
594  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
595  *
596  * @adev: amdgpu device pointer
597  * @reg: offset of register
598  *
599  * Dummy register read function.  Used for register blocks
600  * that certain asics don't have (all asics).
601  * Returns the value in the register.
602  */
603 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
604 {
605 	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
606 	BUG();
607 	return 0;
608 }
609 
610 /**
611  * amdgpu_invalid_wreg64 - dummy reg write function
612  *
613  * @adev: amdgpu device pointer
614  * @reg: offset of register
615  * @v: value to write to the register
616  *
617  * Dummy register read function.  Used for register blocks
618  * that certain asics don't have (all asics).
619  */
620 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
621 {
622 	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
623 		  reg, v);
624 	BUG();
625 }
626 
627 /**
628  * amdgpu_block_invalid_rreg - dummy reg read function
629  *
630  * @adev: amdgpu device pointer
631  * @block: offset of instance
632  * @reg: offset of register
633  *
634  * Dummy register read function.  Used for register blocks
635  * that certain asics don't have (all asics).
636  * Returns the value in the register.
637  */
638 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
639 					  uint32_t block, uint32_t reg)
640 {
641 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
642 		  reg, block);
643 	BUG();
644 	return 0;
645 }
646 
647 /**
648  * amdgpu_block_invalid_wreg - dummy reg write function
649  *
650  * @adev: amdgpu device pointer
651  * @block: offset of instance
652  * @reg: offset of register
653  * @v: value to write to the register
654  *
655  * Dummy register read function.  Used for register blocks
656  * that certain asics don't have (all asics).
657  */
658 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
659 				      uint32_t block,
660 				      uint32_t reg, uint32_t v)
661 {
662 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
663 		  reg, block, v);
664 	BUG();
665 }
666 
667 /**
668  * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
669  *
670  * @adev: amdgpu device pointer
671  *
672  * Allocates a scratch page of VRAM for use by various things in the
673  * driver.
674  */
675 static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
676 {
677 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
678 				       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
679 				       &adev->vram_scratch.robj,
680 				       &adev->vram_scratch.gpu_addr,
681 				       (void **)&adev->vram_scratch.ptr);
682 }
683 
684 /**
685  * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
686  *
687  * @adev: amdgpu device pointer
688  *
689  * Frees the VRAM scratch page.
690  */
691 static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
692 {
693 	amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
694 }
695 
696 /**
697  * amdgpu_device_program_register_sequence - program an array of registers.
698  *
699  * @adev: amdgpu_device pointer
700  * @registers: pointer to the register array
701  * @array_size: size of the register array
702  *
703  * Programs an array or registers with and and or masks.
704  * This is a helper for setting golden registers.
705  */
706 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
707 					     const u32 *registers,
708 					     const u32 array_size)
709 {
710 	u32 tmp, reg, and_mask, or_mask;
711 	int i;
712 
713 	if (array_size % 3)
714 		return;
715 
716 	for (i = 0; i < array_size; i +=3) {
717 		reg = registers[i + 0];
718 		and_mask = registers[i + 1];
719 		or_mask = registers[i + 2];
720 
721 		if (and_mask == 0xffffffff) {
722 			tmp = or_mask;
723 		} else {
724 			tmp = RREG32(reg);
725 			tmp &= ~and_mask;
726 			if (adev->family >= AMDGPU_FAMILY_AI)
727 				tmp |= (or_mask & and_mask);
728 			else
729 				tmp |= or_mask;
730 		}
731 		WREG32(reg, tmp);
732 	}
733 }
734 
735 /**
736  * amdgpu_device_pci_config_reset - reset the GPU
737  *
738  * @adev: amdgpu_device pointer
739  *
740  * Resets the GPU using the pci config reset sequence.
741  * Only applicable to asics prior to vega10.
742  */
743 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
744 {
745 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
746 }
747 
748 /*
749  * GPU doorbell aperture helpers function.
750  */
751 /**
752  * amdgpu_device_doorbell_init - Init doorbell driver information.
753  *
754  * @adev: amdgpu_device pointer
755  *
756  * Init doorbell driver information (CIK)
757  * Returns 0 on success, error on failure.
758  */
759 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
760 {
761 
762 	/* No doorbell on SI hardware generation */
763 	if (adev->asic_type < CHIP_BONAIRE) {
764 		adev->doorbell.base = 0;
765 		adev->doorbell.size = 0;
766 		adev->doorbell.num_doorbells = 0;
767 		adev->doorbell.ptr = NULL;
768 		return 0;
769 	}
770 
771 	if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
772 		return -EINVAL;
773 
774 	amdgpu_asic_init_doorbell_index(adev);
775 
776 	/* doorbell bar mapping */
777 	adev->doorbell.base = pci_resource_start(adev->pdev, 2);
778 	adev->doorbell.size = pci_resource_len(adev->pdev, 2);
779 
780 	adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
781 					     adev->doorbell_index.max_assignment+1);
782 	if (adev->doorbell.num_doorbells == 0)
783 		return -EINVAL;
784 
785 	/* For Vega, reserve and map two pages on doorbell BAR since SDMA
786 	 * paging queue doorbell use the second page. The
787 	 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
788 	 * doorbells are in the first page. So with paging queue enabled,
789 	 * the max num_doorbells should + 1 page (0x400 in dword)
790 	 */
791 	if (adev->asic_type >= CHIP_VEGA10)
792 		adev->doorbell.num_doorbells += 0x400;
793 
794 	adev->doorbell.ptr = ioremap(adev->doorbell.base,
795 				     adev->doorbell.num_doorbells *
796 				     sizeof(u32));
797 	if (adev->doorbell.ptr == NULL)
798 		return -ENOMEM;
799 
800 	return 0;
801 }
802 
803 /**
804  * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
805  *
806  * @adev: amdgpu_device pointer
807  *
808  * Tear down doorbell driver information (CIK)
809  */
810 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
811 {
812 	iounmap(adev->doorbell.ptr);
813 	adev->doorbell.ptr = NULL;
814 }
815 
816 
817 
818 /*
819  * amdgpu_device_wb_*()
820  * Writeback is the method by which the GPU updates special pages in memory
821  * with the status of certain GPU events (fences, ring pointers,etc.).
822  */
823 
824 /**
825  * amdgpu_device_wb_fini - Disable Writeback and free memory
826  *
827  * @adev: amdgpu_device pointer
828  *
829  * Disables Writeback and frees the Writeback memory (all asics).
830  * Used at driver shutdown.
831  */
832 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
833 {
834 	if (adev->wb.wb_obj) {
835 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
836 				      &adev->wb.gpu_addr,
837 				      (void **)&adev->wb.wb);
838 		adev->wb.wb_obj = NULL;
839 	}
840 }
841 
842 /**
843  * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
844  *
845  * @adev: amdgpu_device pointer
846  *
847  * Initializes writeback and allocates writeback memory (all asics).
848  * Used at driver startup.
849  * Returns 0 on success or an -error on failure.
850  */
851 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
852 {
853 	int r;
854 
855 	if (adev->wb.wb_obj == NULL) {
856 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
857 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
858 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
859 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
860 					    (void **)&adev->wb.wb);
861 		if (r) {
862 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
863 			return r;
864 		}
865 
866 		adev->wb.num_wb = AMDGPU_MAX_WB;
867 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
868 
869 		/* clear wb memory */
870 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
871 	}
872 
873 	return 0;
874 }
875 
876 /**
877  * amdgpu_device_wb_get - Allocate a wb entry
878  *
879  * @adev: amdgpu_device pointer
880  * @wb: wb index
881  *
882  * Allocate a wb slot for use by the driver (all asics).
883  * Returns 0 on success or -EINVAL on failure.
884  */
885 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
886 {
887 	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
888 
889 	if (offset < adev->wb.num_wb) {
890 		__set_bit(offset, adev->wb.used);
891 		*wb = offset << 3; /* convert to dw offset */
892 		return 0;
893 	} else {
894 		return -EINVAL;
895 	}
896 }
897 
898 /**
899  * amdgpu_device_wb_free - Free a wb entry
900  *
901  * @adev: amdgpu_device pointer
902  * @wb: wb index
903  *
904  * Free a wb slot allocated for use by the driver (all asics)
905  */
906 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
907 {
908 	wb >>= 3;
909 	if (wb < adev->wb.num_wb)
910 		__clear_bit(wb, adev->wb.used);
911 }
912 
913 /**
914  * amdgpu_device_resize_fb_bar - try to resize FB BAR
915  *
916  * @adev: amdgpu_device pointer
917  *
918  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
919  * to fail, but if any of the BARs is not accessible after the size we abort
920  * driver loading by returning -ENODEV.
921  */
922 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
923 {
924 	u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
925 	u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
926 	struct pci_bus *root;
927 	struct resource *res;
928 	unsigned i;
929 	u16 cmd;
930 	int r;
931 
932 	/* Bypass for VF */
933 	if (amdgpu_sriov_vf(adev))
934 		return 0;
935 
936 	/* Check if the root BUS has 64bit memory resources */
937 	root = adev->pdev->bus;
938 	while (root->parent)
939 		root = root->parent;
940 
941 	pci_bus_for_each_resource(root, res, i) {
942 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
943 		    res->start > 0x100000000ull)
944 			break;
945 	}
946 
947 	/* Trying to resize is pointless without a root hub window above 4GB */
948 	if (!res)
949 		return 0;
950 
951 	/* Disable memory decoding while we change the BAR addresses and size */
952 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
953 	pci_write_config_word(adev->pdev, PCI_COMMAND,
954 			      cmd & ~PCI_COMMAND_MEMORY);
955 
956 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
957 	amdgpu_device_doorbell_fini(adev);
958 	if (adev->asic_type >= CHIP_BONAIRE)
959 		pci_release_resource(adev->pdev, 2);
960 
961 	pci_release_resource(adev->pdev, 0);
962 
963 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
964 	if (r == -ENOSPC)
965 		DRM_INFO("Not enough PCI address space for a large BAR.");
966 	else if (r && r != -ENOTSUPP)
967 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
968 
969 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
970 
971 	/* When the doorbell or fb BAR isn't available we have no chance of
972 	 * using the device.
973 	 */
974 	r = amdgpu_device_doorbell_init(adev);
975 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
976 		return -ENODEV;
977 
978 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
979 
980 	return 0;
981 }
982 
983 /*
984  * GPU helpers function.
985  */
986 /**
987  * amdgpu_device_need_post - check if the hw need post or not
988  *
989  * @adev: amdgpu_device pointer
990  *
991  * Check if the asic has been initialized (all asics) at driver startup
992  * or post is needed if  hw reset is performed.
993  * Returns true if need or false if not.
994  */
995 bool amdgpu_device_need_post(struct amdgpu_device *adev)
996 {
997 	uint32_t reg;
998 
999 	if (amdgpu_sriov_vf(adev))
1000 		return false;
1001 
1002 	if (amdgpu_passthrough(adev)) {
1003 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1004 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1005 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1006 		 * vpost executed for smc version below 22.15
1007 		 */
1008 		if (adev->asic_type == CHIP_FIJI) {
1009 			int err;
1010 			uint32_t fw_ver;
1011 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1012 			/* force vPost if error occured */
1013 			if (err)
1014 				return true;
1015 
1016 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1017 			if (fw_ver < 0x00160e00)
1018 				return true;
1019 		}
1020 	}
1021 
1022 	if (adev->has_hw_reset) {
1023 		adev->has_hw_reset = false;
1024 		return true;
1025 	}
1026 
1027 	/* bios scratch used on CIK+ */
1028 	if (adev->asic_type >= CHIP_BONAIRE)
1029 		return amdgpu_atombios_scratch_need_asic_init(adev);
1030 
1031 	/* check MEM_SIZE for older asics */
1032 	reg = amdgpu_asic_get_config_memsize(adev);
1033 
1034 	if ((reg != 0) && (reg != 0xffffffff))
1035 		return false;
1036 
1037 	return true;
1038 }
1039 
1040 /* if we get transitioned to only one device, take VGA back */
1041 /**
1042  * amdgpu_device_vga_set_decode - enable/disable vga decode
1043  *
1044  * @cookie: amdgpu_device pointer
1045  * @state: enable/disable vga decode
1046  *
1047  * Enable/disable vga decode (all asics).
1048  * Returns VGA resource flags.
1049  */
1050 static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
1051 {
1052 	struct amdgpu_device *adev = cookie;
1053 	amdgpu_asic_set_vga_state(adev, state);
1054 	if (state)
1055 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1056 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1057 	else
1058 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1059 }
1060 
1061 /**
1062  * amdgpu_device_check_block_size - validate the vm block size
1063  *
1064  * @adev: amdgpu_device pointer
1065  *
1066  * Validates the vm block size specified via module parameter.
1067  * The vm block size defines number of bits in page table versus page directory,
1068  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1069  * page table and the remaining bits are in the page directory.
1070  */
1071 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1072 {
1073 	/* defines number of bits in page table versus page directory,
1074 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1075 	 * page table and the remaining bits are in the page directory */
1076 	if (amdgpu_vm_block_size == -1)
1077 		return;
1078 
1079 	if (amdgpu_vm_block_size < 9) {
1080 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1081 			 amdgpu_vm_block_size);
1082 		amdgpu_vm_block_size = -1;
1083 	}
1084 }
1085 
1086 /**
1087  * amdgpu_device_check_vm_size - validate the vm size
1088  *
1089  * @adev: amdgpu_device pointer
1090  *
1091  * Validates the vm size in GB specified via module parameter.
1092  * The VM size is the size of the GPU virtual memory space in GB.
1093  */
1094 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1095 {
1096 	/* no need to check the default value */
1097 	if (amdgpu_vm_size == -1)
1098 		return;
1099 
1100 	if (amdgpu_vm_size < 1) {
1101 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1102 			 amdgpu_vm_size);
1103 		amdgpu_vm_size = -1;
1104 	}
1105 }
1106 
1107 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1108 {
1109 	struct sysinfo si;
1110 	bool is_os_64 = (sizeof(void *) == 8);
1111 	uint64_t total_memory;
1112 	uint64_t dram_size_seven_GB = 0x1B8000000;
1113 	uint64_t dram_size_three_GB = 0xB8000000;
1114 
1115 	if (amdgpu_smu_memory_pool_size == 0)
1116 		return;
1117 
1118 	if (!is_os_64) {
1119 		DRM_WARN("Not 64-bit OS, feature not supported\n");
1120 		goto def_value;
1121 	}
1122 	si_meminfo(&si);
1123 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1124 
1125 	if ((amdgpu_smu_memory_pool_size == 1) ||
1126 		(amdgpu_smu_memory_pool_size == 2)) {
1127 		if (total_memory < dram_size_three_GB)
1128 			goto def_value1;
1129 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1130 		(amdgpu_smu_memory_pool_size == 8)) {
1131 		if (total_memory < dram_size_seven_GB)
1132 			goto def_value1;
1133 	} else {
1134 		DRM_WARN("Smu memory pool size not supported\n");
1135 		goto def_value;
1136 	}
1137 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1138 
1139 	return;
1140 
1141 def_value1:
1142 	DRM_WARN("No enough system memory\n");
1143 def_value:
1144 	adev->pm.smu_prv_buffer_size = 0;
1145 }
1146 
1147 /**
1148  * amdgpu_device_check_arguments - validate module params
1149  *
1150  * @adev: amdgpu_device pointer
1151  *
1152  * Validates certain module parameters and updates
1153  * the associated values used by the driver (all asics).
1154  */
1155 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1156 {
1157 	if (amdgpu_sched_jobs < 4) {
1158 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1159 			 amdgpu_sched_jobs);
1160 		amdgpu_sched_jobs = 4;
1161 	} else if (!is_power_of_2(amdgpu_sched_jobs)){
1162 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1163 			 amdgpu_sched_jobs);
1164 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1165 	}
1166 
1167 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1168 		/* gart size must be greater or equal to 32M */
1169 		dev_warn(adev->dev, "gart size (%d) too small\n",
1170 			 amdgpu_gart_size);
1171 		amdgpu_gart_size = -1;
1172 	}
1173 
1174 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1175 		/* gtt size must be greater or equal to 32M */
1176 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1177 				 amdgpu_gtt_size);
1178 		amdgpu_gtt_size = -1;
1179 	}
1180 
1181 	/* valid range is between 4 and 9 inclusive */
1182 	if (amdgpu_vm_fragment_size != -1 &&
1183 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1184 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1185 		amdgpu_vm_fragment_size = -1;
1186 	}
1187 
1188 	amdgpu_device_check_smu_prv_buffer_size(adev);
1189 
1190 	amdgpu_device_check_vm_size(adev);
1191 
1192 	amdgpu_device_check_block_size(adev);
1193 
1194 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1195 
1196 	return 0;
1197 }
1198 
1199 /**
1200  * amdgpu_switcheroo_set_state - set switcheroo state
1201  *
1202  * @pdev: pci dev pointer
1203  * @state: vga_switcheroo state
1204  *
1205  * Callback for the switcheroo driver.  Suspends or resumes the
1206  * the asics before or after it is powered up using ACPI methods.
1207  */
1208 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1209 {
1210 	struct drm_device *dev = pci_get_drvdata(pdev);
1211 	int r;
1212 
1213 	if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
1214 		return;
1215 
1216 	if (state == VGA_SWITCHEROO_ON) {
1217 		pr_info("amdgpu: switched on\n");
1218 		/* don't suspend or resume card normally */
1219 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1220 
1221 		pci_set_power_state(dev->pdev, PCI_D0);
1222 		pci_restore_state(dev->pdev);
1223 		r = pci_enable_device(dev->pdev);
1224 		if (r)
1225 			DRM_WARN("pci_enable_device failed (%d)\n", r);
1226 		amdgpu_device_resume(dev, true);
1227 
1228 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1229 		drm_kms_helper_poll_enable(dev);
1230 	} else {
1231 		pr_info("amdgpu: switched off\n");
1232 		drm_kms_helper_poll_disable(dev);
1233 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1234 		amdgpu_device_suspend(dev, true);
1235 		pci_save_state(dev->pdev);
1236 		/* Shut down the device */
1237 		pci_disable_device(dev->pdev);
1238 		pci_set_power_state(dev->pdev, PCI_D3cold);
1239 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1240 	}
1241 }
1242 
1243 /**
1244  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1245  *
1246  * @pdev: pci dev pointer
1247  *
1248  * Callback for the switcheroo driver.  Check of the switcheroo
1249  * state can be changed.
1250  * Returns true if the state can be changed, false if not.
1251  */
1252 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1253 {
1254 	struct drm_device *dev = pci_get_drvdata(pdev);
1255 
1256 	/*
1257 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1258 	* locking inversion with the driver load path. And the access here is
1259 	* completely racy anyway. So don't bother with locking for now.
1260 	*/
1261 	return atomic_read(&dev->open_count) == 0;
1262 }
1263 
1264 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1265 	.set_gpu_state = amdgpu_switcheroo_set_state,
1266 	.reprobe = NULL,
1267 	.can_switch = amdgpu_switcheroo_can_switch,
1268 };
1269 
1270 /**
1271  * amdgpu_device_ip_set_clockgating_state - set the CG state
1272  *
1273  * @dev: amdgpu_device pointer
1274  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1275  * @state: clockgating state (gate or ungate)
1276  *
1277  * Sets the requested clockgating state for all instances of
1278  * the hardware IP specified.
1279  * Returns the error code from the last instance.
1280  */
1281 int amdgpu_device_ip_set_clockgating_state(void *dev,
1282 					   enum amd_ip_block_type block_type,
1283 					   enum amd_clockgating_state state)
1284 {
1285 	struct amdgpu_device *adev = dev;
1286 	int i, r = 0;
1287 
1288 	for (i = 0; i < adev->num_ip_blocks; i++) {
1289 		if (!adev->ip_blocks[i].status.valid)
1290 			continue;
1291 		if (adev->ip_blocks[i].version->type != block_type)
1292 			continue;
1293 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1294 			continue;
1295 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1296 			(void *)adev, state);
1297 		if (r)
1298 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1299 				  adev->ip_blocks[i].version->funcs->name, r);
1300 	}
1301 	return r;
1302 }
1303 
1304 /**
1305  * amdgpu_device_ip_set_powergating_state - set the PG state
1306  *
1307  * @dev: amdgpu_device pointer
1308  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1309  * @state: powergating state (gate or ungate)
1310  *
1311  * Sets the requested powergating state for all instances of
1312  * the hardware IP specified.
1313  * Returns the error code from the last instance.
1314  */
1315 int amdgpu_device_ip_set_powergating_state(void *dev,
1316 					   enum amd_ip_block_type block_type,
1317 					   enum amd_powergating_state state)
1318 {
1319 	struct amdgpu_device *adev = dev;
1320 	int i, r = 0;
1321 
1322 	for (i = 0; i < adev->num_ip_blocks; i++) {
1323 		if (!adev->ip_blocks[i].status.valid)
1324 			continue;
1325 		if (adev->ip_blocks[i].version->type != block_type)
1326 			continue;
1327 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1328 			continue;
1329 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1330 			(void *)adev, state);
1331 		if (r)
1332 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1333 				  adev->ip_blocks[i].version->funcs->name, r);
1334 	}
1335 	return r;
1336 }
1337 
1338 /**
1339  * amdgpu_device_ip_get_clockgating_state - get the CG state
1340  *
1341  * @adev: amdgpu_device pointer
1342  * @flags: clockgating feature flags
1343  *
1344  * Walks the list of IPs on the device and updates the clockgating
1345  * flags for each IP.
1346  * Updates @flags with the feature flags for each hardware IP where
1347  * clockgating is enabled.
1348  */
1349 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1350 					    u32 *flags)
1351 {
1352 	int i;
1353 
1354 	for (i = 0; i < adev->num_ip_blocks; i++) {
1355 		if (!adev->ip_blocks[i].status.valid)
1356 			continue;
1357 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1358 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1359 	}
1360 }
1361 
1362 /**
1363  * amdgpu_device_ip_wait_for_idle - wait for idle
1364  *
1365  * @adev: amdgpu_device pointer
1366  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1367  *
1368  * Waits for the request hardware IP to be idle.
1369  * Returns 0 for success or a negative error code on failure.
1370  */
1371 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1372 				   enum amd_ip_block_type block_type)
1373 {
1374 	int i, r;
1375 
1376 	for (i = 0; i < adev->num_ip_blocks; i++) {
1377 		if (!adev->ip_blocks[i].status.valid)
1378 			continue;
1379 		if (adev->ip_blocks[i].version->type == block_type) {
1380 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1381 			if (r)
1382 				return r;
1383 			break;
1384 		}
1385 	}
1386 	return 0;
1387 
1388 }
1389 
1390 /**
1391  * amdgpu_device_ip_is_idle - is the hardware IP idle
1392  *
1393  * @adev: amdgpu_device pointer
1394  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1395  *
1396  * Check if the hardware IP is idle or not.
1397  * Returns true if it the IP is idle, false if not.
1398  */
1399 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1400 			      enum amd_ip_block_type block_type)
1401 {
1402 	int i;
1403 
1404 	for (i = 0; i < adev->num_ip_blocks; i++) {
1405 		if (!adev->ip_blocks[i].status.valid)
1406 			continue;
1407 		if (adev->ip_blocks[i].version->type == block_type)
1408 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1409 	}
1410 	return true;
1411 
1412 }
1413 
1414 /**
1415  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1416  *
1417  * @adev: amdgpu_device pointer
1418  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1419  *
1420  * Returns a pointer to the hardware IP block structure
1421  * if it exists for the asic, otherwise NULL.
1422  */
1423 struct amdgpu_ip_block *
1424 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1425 			      enum amd_ip_block_type type)
1426 {
1427 	int i;
1428 
1429 	for (i = 0; i < adev->num_ip_blocks; i++)
1430 		if (adev->ip_blocks[i].version->type == type)
1431 			return &adev->ip_blocks[i];
1432 
1433 	return NULL;
1434 }
1435 
1436 /**
1437  * amdgpu_device_ip_block_version_cmp
1438  *
1439  * @adev: amdgpu_device pointer
1440  * @type: enum amd_ip_block_type
1441  * @major: major version
1442  * @minor: minor version
1443  *
1444  * return 0 if equal or greater
1445  * return 1 if smaller or the ip_block doesn't exist
1446  */
1447 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1448 				       enum amd_ip_block_type type,
1449 				       u32 major, u32 minor)
1450 {
1451 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1452 
1453 	if (ip_block && ((ip_block->version->major > major) ||
1454 			((ip_block->version->major == major) &&
1455 			(ip_block->version->minor >= minor))))
1456 		return 0;
1457 
1458 	return 1;
1459 }
1460 
1461 /**
1462  * amdgpu_device_ip_block_add
1463  *
1464  * @adev: amdgpu_device pointer
1465  * @ip_block_version: pointer to the IP to add
1466  *
1467  * Adds the IP block driver information to the collection of IPs
1468  * on the asic.
1469  */
1470 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1471 			       const struct amdgpu_ip_block_version *ip_block_version)
1472 {
1473 	if (!ip_block_version)
1474 		return -EINVAL;
1475 
1476 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1477 		  ip_block_version->funcs->name);
1478 
1479 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1480 
1481 	return 0;
1482 }
1483 
1484 /**
1485  * amdgpu_device_enable_virtual_display - enable virtual display feature
1486  *
1487  * @adev: amdgpu_device pointer
1488  *
1489  * Enabled the virtual display feature if the user has enabled it via
1490  * the module parameter virtual_display.  This feature provides a virtual
1491  * display hardware on headless boards or in virtualized environments.
1492  * This function parses and validates the configuration string specified by
1493  * the user and configues the virtual display configuration (number of
1494  * virtual connectors, crtcs, etc.) specified.
1495  */
1496 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1497 {
1498 	adev->enable_virtual_display = false;
1499 
1500 	if (amdgpu_virtual_display) {
1501 		struct drm_device *ddev = adev->ddev;
1502 		const char *pci_address_name = pci_name(ddev->pdev);
1503 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1504 
1505 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1506 		pciaddstr_tmp = pciaddstr;
1507 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1508 			pciaddname = strsep(&pciaddname_tmp, ",");
1509 			if (!strcmp("all", pciaddname)
1510 			    || !strcmp(pci_address_name, pciaddname)) {
1511 				long num_crtc;
1512 				int res = -1;
1513 
1514 				adev->enable_virtual_display = true;
1515 
1516 				if (pciaddname_tmp)
1517 					res = kstrtol(pciaddname_tmp, 10,
1518 						      &num_crtc);
1519 
1520 				if (!res) {
1521 					if (num_crtc < 1)
1522 						num_crtc = 1;
1523 					if (num_crtc > 6)
1524 						num_crtc = 6;
1525 					adev->mode_info.num_crtc = num_crtc;
1526 				} else {
1527 					adev->mode_info.num_crtc = 1;
1528 				}
1529 				break;
1530 			}
1531 		}
1532 
1533 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1534 			 amdgpu_virtual_display, pci_address_name,
1535 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
1536 
1537 		kfree(pciaddstr);
1538 	}
1539 }
1540 
1541 /**
1542  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1543  *
1544  * @adev: amdgpu_device pointer
1545  *
1546  * Parses the asic configuration parameters specified in the gpu info
1547  * firmware and makes them availale to the driver for use in configuring
1548  * the asic.
1549  * Returns 0 on success, -EINVAL on failure.
1550  */
1551 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1552 {
1553 	const char *chip_name;
1554 	char fw_name[30];
1555 	int err;
1556 	const struct gpu_info_firmware_header_v1_0 *hdr;
1557 
1558 	adev->firmware.gpu_info_fw = NULL;
1559 
1560 	switch (adev->asic_type) {
1561 	case CHIP_TOPAZ:
1562 	case CHIP_TONGA:
1563 	case CHIP_FIJI:
1564 	case CHIP_POLARIS10:
1565 	case CHIP_POLARIS11:
1566 	case CHIP_POLARIS12:
1567 	case CHIP_VEGAM:
1568 	case CHIP_CARRIZO:
1569 	case CHIP_STONEY:
1570 #ifdef CONFIG_DRM_AMDGPU_SI
1571 	case CHIP_VERDE:
1572 	case CHIP_TAHITI:
1573 	case CHIP_PITCAIRN:
1574 	case CHIP_OLAND:
1575 	case CHIP_HAINAN:
1576 #endif
1577 #ifdef CONFIG_DRM_AMDGPU_CIK
1578 	case CHIP_BONAIRE:
1579 	case CHIP_HAWAII:
1580 	case CHIP_KAVERI:
1581 	case CHIP_KABINI:
1582 	case CHIP_MULLINS:
1583 #endif
1584 	case CHIP_VEGA20:
1585 	default:
1586 		return 0;
1587 	case CHIP_VEGA10:
1588 		chip_name = "vega10";
1589 		break;
1590 	case CHIP_VEGA12:
1591 		chip_name = "vega12";
1592 		break;
1593 	case CHIP_RAVEN:
1594 		if (adev->rev_id >= 8)
1595 			chip_name = "raven2";
1596 		else if (adev->pdev->device == 0x15d8)
1597 			chip_name = "picasso";
1598 		else
1599 			chip_name = "raven";
1600 		break;
1601 	case CHIP_ARCTURUS:
1602 		chip_name = "arcturus";
1603 		break;
1604 	case CHIP_RENOIR:
1605 		chip_name = "renoir";
1606 		break;
1607 	case CHIP_NAVI10:
1608 		chip_name = "navi10";
1609 		break;
1610 	case CHIP_NAVI14:
1611 		chip_name = "navi14";
1612 		break;
1613 	case CHIP_NAVI12:
1614 		chip_name = "navi12";
1615 		break;
1616 	}
1617 
1618 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1619 	err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
1620 	if (err) {
1621 		dev_err(adev->dev,
1622 			"Failed to load gpu_info firmware \"%s\"\n",
1623 			fw_name);
1624 		goto out;
1625 	}
1626 	err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
1627 	if (err) {
1628 		dev_err(adev->dev,
1629 			"Failed to validate gpu_info firmware \"%s\"\n",
1630 			fw_name);
1631 		goto out;
1632 	}
1633 
1634 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1635 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1636 
1637 	switch (hdr->version_major) {
1638 	case 1:
1639 	{
1640 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1641 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1642 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1643 
1644 		if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1645 			goto parse_soc_bounding_box;
1646 
1647 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1648 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1649 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1650 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1651 		adev->gfx.config.max_texture_channel_caches =
1652 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
1653 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1654 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1655 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1656 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1657 		adev->gfx.config.double_offchip_lds_buf =
1658 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1659 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1660 		adev->gfx.cu_info.max_waves_per_simd =
1661 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1662 		adev->gfx.cu_info.max_scratch_slots_per_cu =
1663 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1664 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1665 		if (hdr->version_minor >= 1) {
1666 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1667 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1668 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1669 			adev->gfx.config.num_sc_per_sh =
1670 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1671 			adev->gfx.config.num_packer_per_sc =
1672 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1673 		}
1674 
1675 parse_soc_bounding_box:
1676 		/*
1677 		 * soc bounding box info is not integrated in disocovery table,
1678 		 * we always need to parse it from gpu info firmware.
1679 		 */
1680 		if (hdr->version_minor == 2) {
1681 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1682 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1683 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1684 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1685 		}
1686 		break;
1687 	}
1688 	default:
1689 		dev_err(adev->dev,
1690 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1691 		err = -EINVAL;
1692 		goto out;
1693 	}
1694 out:
1695 	return err;
1696 }
1697 
1698 /**
1699  * amdgpu_device_ip_early_init - run early init for hardware IPs
1700  *
1701  * @adev: amdgpu_device pointer
1702  *
1703  * Early initialization pass for hardware IPs.  The hardware IPs that make
1704  * up each asic are discovered each IP's early_init callback is run.  This
1705  * is the first stage in initializing the asic.
1706  * Returns 0 on success, negative error code on failure.
1707  */
1708 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1709 {
1710 	int i, r;
1711 
1712 	amdgpu_device_enable_virtual_display(adev);
1713 
1714 	switch (adev->asic_type) {
1715 	case CHIP_TOPAZ:
1716 	case CHIP_TONGA:
1717 	case CHIP_FIJI:
1718 	case CHIP_POLARIS10:
1719 	case CHIP_POLARIS11:
1720 	case CHIP_POLARIS12:
1721 	case CHIP_VEGAM:
1722 	case CHIP_CARRIZO:
1723 	case CHIP_STONEY:
1724 		if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
1725 			adev->family = AMDGPU_FAMILY_CZ;
1726 		else
1727 			adev->family = AMDGPU_FAMILY_VI;
1728 
1729 		r = vi_set_ip_blocks(adev);
1730 		if (r)
1731 			return r;
1732 		break;
1733 #ifdef CONFIG_DRM_AMDGPU_SI
1734 	case CHIP_VERDE:
1735 	case CHIP_TAHITI:
1736 	case CHIP_PITCAIRN:
1737 	case CHIP_OLAND:
1738 	case CHIP_HAINAN:
1739 		adev->family = AMDGPU_FAMILY_SI;
1740 		r = si_set_ip_blocks(adev);
1741 		if (r)
1742 			return r;
1743 		break;
1744 #endif
1745 #ifdef CONFIG_DRM_AMDGPU_CIK
1746 	case CHIP_BONAIRE:
1747 	case CHIP_HAWAII:
1748 	case CHIP_KAVERI:
1749 	case CHIP_KABINI:
1750 	case CHIP_MULLINS:
1751 		if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1752 			adev->family = AMDGPU_FAMILY_CI;
1753 		else
1754 			adev->family = AMDGPU_FAMILY_KV;
1755 
1756 		r = cik_set_ip_blocks(adev);
1757 		if (r)
1758 			return r;
1759 		break;
1760 #endif
1761 	case CHIP_VEGA10:
1762 	case CHIP_VEGA12:
1763 	case CHIP_VEGA20:
1764 	case CHIP_RAVEN:
1765 	case CHIP_ARCTURUS:
1766 	case CHIP_RENOIR:
1767 		if (adev->asic_type == CHIP_RAVEN ||
1768 		    adev->asic_type == CHIP_RENOIR)
1769 			adev->family = AMDGPU_FAMILY_RV;
1770 		else
1771 			adev->family = AMDGPU_FAMILY_AI;
1772 
1773 		r = soc15_set_ip_blocks(adev);
1774 		if (r)
1775 			return r;
1776 		break;
1777 	case  CHIP_NAVI10:
1778 	case  CHIP_NAVI14:
1779 	case  CHIP_NAVI12:
1780 		adev->family = AMDGPU_FAMILY_NV;
1781 
1782 		r = nv_set_ip_blocks(adev);
1783 		if (r)
1784 			return r;
1785 		break;
1786 	default:
1787 		/* FIXME: not supported yet */
1788 		return -EINVAL;
1789 	}
1790 
1791 	r = amdgpu_device_parse_gpu_info_fw(adev);
1792 	if (r)
1793 		return r;
1794 
1795 	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1796 		amdgpu_discovery_get_gfx_info(adev);
1797 
1798 	amdgpu_amdkfd_device_probe(adev);
1799 
1800 	if (amdgpu_sriov_vf(adev)) {
1801 		/* handle vbios stuff prior full access mode for new handshake */
1802 		if (adev->virt.req_init_data_ver == 1) {
1803 			if (!amdgpu_get_bios(adev)) {
1804 				DRM_ERROR("failed to get vbios\n");
1805 				return -EINVAL;
1806 			}
1807 
1808 			r = amdgpu_atombios_init(adev);
1809 			if (r) {
1810 				dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1811 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1812 				return r;
1813 			}
1814 		}
1815 	}
1816 
1817 	/* we need to send REQ_GPU here for legacy handshaker otherwise the vbios
1818 	 * will not be prepared by host for this VF */
1819 	if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) {
1820 		r = amdgpu_virt_request_full_gpu(adev, true);
1821 		if (r)
1822 			return r;
1823 	}
1824 
1825 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
1826 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
1827 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1828 
1829 	for (i = 0; i < adev->num_ip_blocks; i++) {
1830 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
1831 			DRM_ERROR("disabled ip block: %d <%s>\n",
1832 				  i, adev->ip_blocks[i].version->funcs->name);
1833 			adev->ip_blocks[i].status.valid = false;
1834 		} else {
1835 			if (adev->ip_blocks[i].version->funcs->early_init) {
1836 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
1837 				if (r == -ENOENT) {
1838 					adev->ip_blocks[i].status.valid = false;
1839 				} else if (r) {
1840 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
1841 						  adev->ip_blocks[i].version->funcs->name, r);
1842 					return r;
1843 				} else {
1844 					adev->ip_blocks[i].status.valid = true;
1845 				}
1846 			} else {
1847 				adev->ip_blocks[i].status.valid = true;
1848 			}
1849 		}
1850 		/* get the vbios after the asic_funcs are set up */
1851 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1852 			/* skip vbios handling for new handshake */
1853 			if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1)
1854 				continue;
1855 
1856 			/* Read BIOS */
1857 			if (!amdgpu_get_bios(adev))
1858 				return -EINVAL;
1859 
1860 			r = amdgpu_atombios_init(adev);
1861 			if (r) {
1862 				dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1863 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1864 				return r;
1865 			}
1866 		}
1867 	}
1868 
1869 	adev->cg_flags &= amdgpu_cg_mask;
1870 	adev->pg_flags &= amdgpu_pg_mask;
1871 
1872 	return 0;
1873 }
1874 
1875 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1876 {
1877 	int i, r;
1878 
1879 	for (i = 0; i < adev->num_ip_blocks; i++) {
1880 		if (!adev->ip_blocks[i].status.sw)
1881 			continue;
1882 		if (adev->ip_blocks[i].status.hw)
1883 			continue;
1884 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1885 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
1886 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1887 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1888 			if (r) {
1889 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1890 					  adev->ip_blocks[i].version->funcs->name, r);
1891 				return r;
1892 			}
1893 			adev->ip_blocks[i].status.hw = true;
1894 		}
1895 	}
1896 
1897 	return 0;
1898 }
1899 
1900 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1901 {
1902 	int i, r;
1903 
1904 	for (i = 0; i < adev->num_ip_blocks; i++) {
1905 		if (!adev->ip_blocks[i].status.sw)
1906 			continue;
1907 		if (adev->ip_blocks[i].status.hw)
1908 			continue;
1909 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1910 		if (r) {
1911 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1912 				  adev->ip_blocks[i].version->funcs->name, r);
1913 			return r;
1914 		}
1915 		adev->ip_blocks[i].status.hw = true;
1916 	}
1917 
1918 	return 0;
1919 }
1920 
1921 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1922 {
1923 	int r = 0;
1924 	int i;
1925 	uint32_t smu_version;
1926 
1927 	if (adev->asic_type >= CHIP_VEGA10) {
1928 		for (i = 0; i < adev->num_ip_blocks; i++) {
1929 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1930 				continue;
1931 
1932 			/* no need to do the fw loading again if already done*/
1933 			if (adev->ip_blocks[i].status.hw == true)
1934 				break;
1935 
1936 			if (adev->in_gpu_reset || adev->in_suspend) {
1937 				r = adev->ip_blocks[i].version->funcs->resume(adev);
1938 				if (r) {
1939 					DRM_ERROR("resume of IP block <%s> failed %d\n",
1940 							  adev->ip_blocks[i].version->funcs->name, r);
1941 					return r;
1942 				}
1943 			} else {
1944 				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1945 				if (r) {
1946 					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1947 							  adev->ip_blocks[i].version->funcs->name, r);
1948 					return r;
1949 				}
1950 			}
1951 
1952 			adev->ip_blocks[i].status.hw = true;
1953 			break;
1954 		}
1955 	}
1956 
1957 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1958 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
1959 
1960 	return r;
1961 }
1962 
1963 /**
1964  * amdgpu_device_ip_init - run init for hardware IPs
1965  *
1966  * @adev: amdgpu_device pointer
1967  *
1968  * Main initialization pass for hardware IPs.  The list of all the hardware
1969  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1970  * are run.  sw_init initializes the software state associated with each IP
1971  * and hw_init initializes the hardware associated with each IP.
1972  * Returns 0 on success, negative error code on failure.
1973  */
1974 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1975 {
1976 	int i, r;
1977 
1978 	r = amdgpu_ras_init(adev);
1979 	if (r)
1980 		return r;
1981 
1982 	if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) {
1983 		r = amdgpu_virt_request_full_gpu(adev, true);
1984 		if (r)
1985 			return -EAGAIN;
1986 	}
1987 
1988 	for (i = 0; i < adev->num_ip_blocks; i++) {
1989 		if (!adev->ip_blocks[i].status.valid)
1990 			continue;
1991 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
1992 		if (r) {
1993 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1994 				  adev->ip_blocks[i].version->funcs->name, r);
1995 			goto init_failed;
1996 		}
1997 		adev->ip_blocks[i].status.sw = true;
1998 
1999 		/* need to do gmc hw init early so we can allocate gpu mem */
2000 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2001 			r = amdgpu_device_vram_scratch_init(adev);
2002 			if (r) {
2003 				DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
2004 				goto init_failed;
2005 			}
2006 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2007 			if (r) {
2008 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2009 				goto init_failed;
2010 			}
2011 			r = amdgpu_device_wb_init(adev);
2012 			if (r) {
2013 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2014 				goto init_failed;
2015 			}
2016 			adev->ip_blocks[i].status.hw = true;
2017 
2018 			/* right after GMC hw init, we create CSA */
2019 			if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
2020 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2021 								AMDGPU_GEM_DOMAIN_VRAM,
2022 								AMDGPU_CSA_SIZE);
2023 				if (r) {
2024 					DRM_ERROR("allocate CSA failed %d\n", r);
2025 					goto init_failed;
2026 				}
2027 			}
2028 		}
2029 	}
2030 
2031 	if (amdgpu_sriov_vf(adev))
2032 		amdgpu_virt_init_data_exchange(adev);
2033 
2034 	r = amdgpu_ib_pool_init(adev);
2035 	if (r) {
2036 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2037 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2038 		goto init_failed;
2039 	}
2040 
2041 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2042 	if (r)
2043 		goto init_failed;
2044 
2045 	r = amdgpu_device_ip_hw_init_phase1(adev);
2046 	if (r)
2047 		goto init_failed;
2048 
2049 	r = amdgpu_device_fw_loading(adev);
2050 	if (r)
2051 		goto init_failed;
2052 
2053 	r = amdgpu_device_ip_hw_init_phase2(adev);
2054 	if (r)
2055 		goto init_failed;
2056 
2057 	/*
2058 	 * retired pages will be loaded from eeprom and reserved here,
2059 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2060 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2061 	 * for I2C communication which only true at this point.
2062 	 * recovery_init may fail, but it can free all resources allocated by
2063 	 * itself and its failure should not stop amdgpu init process.
2064 	 *
2065 	 * Note: theoretically, this should be called before all vram allocations
2066 	 * to protect retired page from abusing
2067 	 */
2068 	amdgpu_ras_recovery_init(adev);
2069 
2070 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2071 		amdgpu_xgmi_add_device(adev);
2072 	amdgpu_amdkfd_device_init(adev);
2073 
2074 	amdgpu_fru_get_product_info(adev);
2075 
2076 init_failed:
2077 	if (amdgpu_sriov_vf(adev))
2078 		amdgpu_virt_release_full_gpu(adev, true);
2079 
2080 	return r;
2081 }
2082 
2083 /**
2084  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2085  *
2086  * @adev: amdgpu_device pointer
2087  *
2088  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2089  * this function before a GPU reset.  If the value is retained after a
2090  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2091  */
2092 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2093 {
2094 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2095 }
2096 
2097 /**
2098  * amdgpu_device_check_vram_lost - check if vram is valid
2099  *
2100  * @adev: amdgpu_device pointer
2101  *
2102  * Checks the reset magic value written to the gart pointer in VRAM.
2103  * The driver calls this after a GPU reset to see if the contents of
2104  * VRAM is lost or now.
2105  * returns true if vram is lost, false if not.
2106  */
2107 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2108 {
2109 	return !!memcmp(adev->gart.ptr, adev->reset_magic,
2110 			AMDGPU_RESET_MAGIC_NUM);
2111 }
2112 
2113 /**
2114  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2115  *
2116  * @adev: amdgpu_device pointer
2117  * @state: clockgating state (gate or ungate)
2118  *
2119  * The list of all the hardware IPs that make up the asic is walked and the
2120  * set_clockgating_state callbacks are run.
2121  * Late initialization pass enabling clockgating for hardware IPs.
2122  * Fini or suspend, pass disabling clockgating for hardware IPs.
2123  * Returns 0 on success, negative error code on failure.
2124  */
2125 
2126 static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2127 						enum amd_clockgating_state state)
2128 {
2129 	int i, j, r;
2130 
2131 	if (amdgpu_emu_mode == 1)
2132 		return 0;
2133 
2134 	for (j = 0; j < adev->num_ip_blocks; j++) {
2135 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2136 		if (!adev->ip_blocks[i].status.late_initialized)
2137 			continue;
2138 		/* skip CG for VCE/UVD, it's handled specially */
2139 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2140 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2141 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2142 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2143 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2144 			/* enable clockgating to save power */
2145 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2146 										     state);
2147 			if (r) {
2148 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2149 					  adev->ip_blocks[i].version->funcs->name, r);
2150 				return r;
2151 			}
2152 		}
2153 	}
2154 
2155 	return 0;
2156 }
2157 
2158 static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
2159 {
2160 	int i, j, r;
2161 
2162 	if (amdgpu_emu_mode == 1)
2163 		return 0;
2164 
2165 	for (j = 0; j < adev->num_ip_blocks; j++) {
2166 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2167 		if (!adev->ip_blocks[i].status.late_initialized)
2168 			continue;
2169 		/* skip CG for VCE/UVD, it's handled specially */
2170 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2171 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2172 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2173 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2174 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2175 			/* enable powergating to save power */
2176 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2177 											state);
2178 			if (r) {
2179 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2180 					  adev->ip_blocks[i].version->funcs->name, r);
2181 				return r;
2182 			}
2183 		}
2184 	}
2185 	return 0;
2186 }
2187 
2188 static int amdgpu_device_enable_mgpu_fan_boost(void)
2189 {
2190 	struct amdgpu_gpu_instance *gpu_ins;
2191 	struct amdgpu_device *adev;
2192 	int i, ret = 0;
2193 
2194 	mutex_lock(&mgpu_info.mutex);
2195 
2196 	/*
2197 	 * MGPU fan boost feature should be enabled
2198 	 * only when there are two or more dGPUs in
2199 	 * the system
2200 	 */
2201 	if (mgpu_info.num_dgpu < 2)
2202 		goto out;
2203 
2204 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2205 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2206 		adev = gpu_ins->adev;
2207 		if (!(adev->flags & AMD_IS_APU) &&
2208 		    !gpu_ins->mgpu_fan_enabled &&
2209 		    adev->powerplay.pp_funcs &&
2210 		    adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2211 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2212 			if (ret)
2213 				break;
2214 
2215 			gpu_ins->mgpu_fan_enabled = 1;
2216 		}
2217 	}
2218 
2219 out:
2220 	mutex_unlock(&mgpu_info.mutex);
2221 
2222 	return ret;
2223 }
2224 
2225 /**
2226  * amdgpu_device_ip_late_init - run late init for hardware IPs
2227  *
2228  * @adev: amdgpu_device pointer
2229  *
2230  * Late initialization pass for hardware IPs.  The list of all the hardware
2231  * IPs that make up the asic is walked and the late_init callbacks are run.
2232  * late_init covers any special initialization that an IP requires
2233  * after all of the have been initialized or something that needs to happen
2234  * late in the init process.
2235  * Returns 0 on success, negative error code on failure.
2236  */
2237 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2238 {
2239 	struct amdgpu_gpu_instance *gpu_instance;
2240 	int i = 0, r;
2241 
2242 	for (i = 0; i < adev->num_ip_blocks; i++) {
2243 		if (!adev->ip_blocks[i].status.hw)
2244 			continue;
2245 		if (adev->ip_blocks[i].version->funcs->late_init) {
2246 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2247 			if (r) {
2248 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
2249 					  adev->ip_blocks[i].version->funcs->name, r);
2250 				return r;
2251 			}
2252 		}
2253 		adev->ip_blocks[i].status.late_initialized = true;
2254 	}
2255 
2256 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2257 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2258 
2259 	amdgpu_device_fill_reset_magic(adev);
2260 
2261 	r = amdgpu_device_enable_mgpu_fan_boost();
2262 	if (r)
2263 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2264 
2265 
2266 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2267 		mutex_lock(&mgpu_info.mutex);
2268 
2269 		/*
2270 		 * Reset device p-state to low as this was booted with high.
2271 		 *
2272 		 * This should be performed only after all devices from the same
2273 		 * hive get initialized.
2274 		 *
2275 		 * However, it's unknown how many device in the hive in advance.
2276 		 * As this is counted one by one during devices initializations.
2277 		 *
2278 		 * So, we wait for all XGMI interlinked devices initialized.
2279 		 * This may bring some delays as those devices may come from
2280 		 * different hives. But that should be OK.
2281 		 */
2282 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2283 			for (i = 0; i < mgpu_info.num_gpu; i++) {
2284 				gpu_instance = &(mgpu_info.gpu_ins[i]);
2285 				if (gpu_instance->adev->flags & AMD_IS_APU)
2286 					continue;
2287 
2288 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
2289 				if (r) {
2290 					DRM_ERROR("pstate setting failed (%d).\n", r);
2291 					break;
2292 				}
2293 			}
2294 		}
2295 
2296 		mutex_unlock(&mgpu_info.mutex);
2297 	}
2298 
2299 	return 0;
2300 }
2301 
2302 /**
2303  * amdgpu_device_ip_fini - run fini for hardware IPs
2304  *
2305  * @adev: amdgpu_device pointer
2306  *
2307  * Main teardown pass for hardware IPs.  The list of all the hardware
2308  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2309  * are run.  hw_fini tears down the hardware associated with each IP
2310  * and sw_fini tears down any software state associated with each IP.
2311  * Returns 0 on success, negative error code on failure.
2312  */
2313 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2314 {
2315 	int i, r;
2316 
2317 	amdgpu_ras_pre_fini(adev);
2318 
2319 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2320 		amdgpu_xgmi_remove_device(adev);
2321 
2322 	amdgpu_amdkfd_device_fini(adev);
2323 
2324 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2325 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2326 
2327 	/* need to disable SMC first */
2328 	for (i = 0; i < adev->num_ip_blocks; i++) {
2329 		if (!adev->ip_blocks[i].status.hw)
2330 			continue;
2331 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2332 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2333 			/* XXX handle errors */
2334 			if (r) {
2335 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2336 					  adev->ip_blocks[i].version->funcs->name, r);
2337 			}
2338 			adev->ip_blocks[i].status.hw = false;
2339 			break;
2340 		}
2341 	}
2342 
2343 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2344 		if (!adev->ip_blocks[i].status.hw)
2345 			continue;
2346 
2347 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2348 		/* XXX handle errors */
2349 		if (r) {
2350 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2351 				  adev->ip_blocks[i].version->funcs->name, r);
2352 		}
2353 
2354 		adev->ip_blocks[i].status.hw = false;
2355 	}
2356 
2357 
2358 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2359 		if (!adev->ip_blocks[i].status.sw)
2360 			continue;
2361 
2362 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2363 			amdgpu_ucode_free_bo(adev);
2364 			amdgpu_free_static_csa(&adev->virt.csa_obj);
2365 			amdgpu_device_wb_fini(adev);
2366 			amdgpu_device_vram_scratch_fini(adev);
2367 			amdgpu_ib_pool_fini(adev);
2368 		}
2369 
2370 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2371 		/* XXX handle errors */
2372 		if (r) {
2373 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2374 				  adev->ip_blocks[i].version->funcs->name, r);
2375 		}
2376 		adev->ip_blocks[i].status.sw = false;
2377 		adev->ip_blocks[i].status.valid = false;
2378 	}
2379 
2380 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2381 		if (!adev->ip_blocks[i].status.late_initialized)
2382 			continue;
2383 		if (adev->ip_blocks[i].version->funcs->late_fini)
2384 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2385 		adev->ip_blocks[i].status.late_initialized = false;
2386 	}
2387 
2388 	amdgpu_ras_fini(adev);
2389 
2390 	if (amdgpu_sriov_vf(adev))
2391 		if (amdgpu_virt_release_full_gpu(adev, false))
2392 			DRM_ERROR("failed to release exclusive mode on fini\n");
2393 
2394 	return 0;
2395 }
2396 
2397 /**
2398  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2399  *
2400  * @work: work_struct.
2401  */
2402 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2403 {
2404 	struct amdgpu_device *adev =
2405 		container_of(work, struct amdgpu_device, delayed_init_work.work);
2406 	int r;
2407 
2408 	r = amdgpu_ib_ring_tests(adev);
2409 	if (r)
2410 		DRM_ERROR("ib ring test failed (%d).\n", r);
2411 }
2412 
2413 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2414 {
2415 	struct amdgpu_device *adev =
2416 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2417 
2418 	mutex_lock(&adev->gfx.gfx_off_mutex);
2419 	if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2420 		if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2421 			adev->gfx.gfx_off_state = true;
2422 	}
2423 	mutex_unlock(&adev->gfx.gfx_off_mutex);
2424 }
2425 
2426 /**
2427  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2428  *
2429  * @adev: amdgpu_device pointer
2430  *
2431  * Main suspend function for hardware IPs.  The list of all the hardware
2432  * IPs that make up the asic is walked, clockgating is disabled and the
2433  * suspend callbacks are run.  suspend puts the hardware and software state
2434  * in each IP into a state suitable for suspend.
2435  * Returns 0 on success, negative error code on failure.
2436  */
2437 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2438 {
2439 	int i, r;
2440 
2441 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2442 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2443 
2444 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2445 		if (!adev->ip_blocks[i].status.valid)
2446 			continue;
2447 		/* displays are handled separately */
2448 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
2449 			/* XXX handle errors */
2450 			r = adev->ip_blocks[i].version->funcs->suspend(adev);
2451 			/* XXX handle errors */
2452 			if (r) {
2453 				DRM_ERROR("suspend of IP block <%s> failed %d\n",
2454 					  adev->ip_blocks[i].version->funcs->name, r);
2455 				return r;
2456 			}
2457 			adev->ip_blocks[i].status.hw = false;
2458 		}
2459 	}
2460 
2461 	return 0;
2462 }
2463 
2464 /**
2465  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2466  *
2467  * @adev: amdgpu_device pointer
2468  *
2469  * Main suspend function for hardware IPs.  The list of all the hardware
2470  * IPs that make up the asic is walked, clockgating is disabled and the
2471  * suspend callbacks are run.  suspend puts the hardware and software state
2472  * in each IP into a state suitable for suspend.
2473  * Returns 0 on success, negative error code on failure.
2474  */
2475 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
2476 {
2477 	int i, r;
2478 
2479 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2480 		if (!adev->ip_blocks[i].status.valid)
2481 			continue;
2482 		/* displays are handled in phase1 */
2483 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2484 			continue;
2485 		/* PSP lost connection when err_event_athub occurs */
2486 		if (amdgpu_ras_intr_triggered() &&
2487 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2488 			adev->ip_blocks[i].status.hw = false;
2489 			continue;
2490 		}
2491 		/* XXX handle errors */
2492 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
2493 		/* XXX handle errors */
2494 		if (r) {
2495 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
2496 				  adev->ip_blocks[i].version->funcs->name, r);
2497 		}
2498 		adev->ip_blocks[i].status.hw = false;
2499 		/* handle putting the SMC in the appropriate state */
2500 		if(!amdgpu_sriov_vf(adev)){
2501 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2502 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2503 				if (r) {
2504 					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2505 							adev->mp1_state, r);
2506 					return r;
2507 				}
2508 			}
2509 		}
2510 		adev->ip_blocks[i].status.hw = false;
2511 	}
2512 
2513 	return 0;
2514 }
2515 
2516 /**
2517  * amdgpu_device_ip_suspend - run suspend for hardware IPs
2518  *
2519  * @adev: amdgpu_device pointer
2520  *
2521  * Main suspend function for hardware IPs.  The list of all the hardware
2522  * IPs that make up the asic is walked, clockgating is disabled and the
2523  * suspend callbacks are run.  suspend puts the hardware and software state
2524  * in each IP into a state suitable for suspend.
2525  * Returns 0 on success, negative error code on failure.
2526  */
2527 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2528 {
2529 	int r;
2530 
2531 	if (amdgpu_sriov_vf(adev))
2532 		amdgpu_virt_request_full_gpu(adev, false);
2533 
2534 	r = amdgpu_device_ip_suspend_phase1(adev);
2535 	if (r)
2536 		return r;
2537 	r = amdgpu_device_ip_suspend_phase2(adev);
2538 
2539 	if (amdgpu_sriov_vf(adev))
2540 		amdgpu_virt_release_full_gpu(adev, false);
2541 
2542 	return r;
2543 }
2544 
2545 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
2546 {
2547 	int i, r;
2548 
2549 	static enum amd_ip_block_type ip_order[] = {
2550 		AMD_IP_BLOCK_TYPE_GMC,
2551 		AMD_IP_BLOCK_TYPE_COMMON,
2552 		AMD_IP_BLOCK_TYPE_PSP,
2553 		AMD_IP_BLOCK_TYPE_IH,
2554 	};
2555 
2556 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2557 		int j;
2558 		struct amdgpu_ip_block *block;
2559 
2560 		for (j = 0; j < adev->num_ip_blocks; j++) {
2561 			block = &adev->ip_blocks[j];
2562 
2563 			block->status.hw = false;
2564 			if (block->version->type != ip_order[i] ||
2565 				!block->status.valid)
2566 				continue;
2567 
2568 			r = block->version->funcs->hw_init(adev);
2569 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2570 			if (r)
2571 				return r;
2572 			block->status.hw = true;
2573 		}
2574 	}
2575 
2576 	return 0;
2577 }
2578 
2579 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
2580 {
2581 	int i, r;
2582 
2583 	static enum amd_ip_block_type ip_order[] = {
2584 		AMD_IP_BLOCK_TYPE_SMC,
2585 		AMD_IP_BLOCK_TYPE_DCE,
2586 		AMD_IP_BLOCK_TYPE_GFX,
2587 		AMD_IP_BLOCK_TYPE_SDMA,
2588 		AMD_IP_BLOCK_TYPE_UVD,
2589 		AMD_IP_BLOCK_TYPE_VCE,
2590 		AMD_IP_BLOCK_TYPE_VCN
2591 	};
2592 
2593 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2594 		int j;
2595 		struct amdgpu_ip_block *block;
2596 
2597 		for (j = 0; j < adev->num_ip_blocks; j++) {
2598 			block = &adev->ip_blocks[j];
2599 
2600 			if (block->version->type != ip_order[i] ||
2601 				!block->status.valid ||
2602 				block->status.hw)
2603 				continue;
2604 
2605 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2606 				r = block->version->funcs->resume(adev);
2607 			else
2608 				r = block->version->funcs->hw_init(adev);
2609 
2610 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2611 			if (r)
2612 				return r;
2613 			block->status.hw = true;
2614 		}
2615 	}
2616 
2617 	return 0;
2618 }
2619 
2620 /**
2621  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2622  *
2623  * @adev: amdgpu_device pointer
2624  *
2625  * First resume function for hardware IPs.  The list of all the hardware
2626  * IPs that make up the asic is walked and the resume callbacks are run for
2627  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
2628  * after a suspend and updates the software state as necessary.  This
2629  * function is also used for restoring the GPU after a GPU reset.
2630  * Returns 0 on success, negative error code on failure.
2631  */
2632 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
2633 {
2634 	int i, r;
2635 
2636 	for (i = 0; i < adev->num_ip_blocks; i++) {
2637 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2638 			continue;
2639 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2640 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2641 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2642 
2643 			r = adev->ip_blocks[i].version->funcs->resume(adev);
2644 			if (r) {
2645 				DRM_ERROR("resume of IP block <%s> failed %d\n",
2646 					  adev->ip_blocks[i].version->funcs->name, r);
2647 				return r;
2648 			}
2649 			adev->ip_blocks[i].status.hw = true;
2650 		}
2651 	}
2652 
2653 	return 0;
2654 }
2655 
2656 /**
2657  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2658  *
2659  * @adev: amdgpu_device pointer
2660  *
2661  * First resume function for hardware IPs.  The list of all the hardware
2662  * IPs that make up the asic is walked and the resume callbacks are run for
2663  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
2664  * functional state after a suspend and updates the software state as
2665  * necessary.  This function is also used for restoring the GPU after a GPU
2666  * reset.
2667  * Returns 0 on success, negative error code on failure.
2668  */
2669 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
2670 {
2671 	int i, r;
2672 
2673 	for (i = 0; i < adev->num_ip_blocks; i++) {
2674 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2675 			continue;
2676 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2677 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2678 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2679 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
2680 			continue;
2681 		r = adev->ip_blocks[i].version->funcs->resume(adev);
2682 		if (r) {
2683 			DRM_ERROR("resume of IP block <%s> failed %d\n",
2684 				  adev->ip_blocks[i].version->funcs->name, r);
2685 			return r;
2686 		}
2687 		adev->ip_blocks[i].status.hw = true;
2688 	}
2689 
2690 	return 0;
2691 }
2692 
2693 /**
2694  * amdgpu_device_ip_resume - run resume for hardware IPs
2695  *
2696  * @adev: amdgpu_device pointer
2697  *
2698  * Main resume function for hardware IPs.  The hardware IPs
2699  * are split into two resume functions because they are
2700  * are also used in in recovering from a GPU reset and some additional
2701  * steps need to be take between them.  In this case (S3/S4) they are
2702  * run sequentially.
2703  * Returns 0 on success, negative error code on failure.
2704  */
2705 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
2706 {
2707 	int r;
2708 
2709 	r = amdgpu_device_ip_resume_phase1(adev);
2710 	if (r)
2711 		return r;
2712 
2713 	r = amdgpu_device_fw_loading(adev);
2714 	if (r)
2715 		return r;
2716 
2717 	r = amdgpu_device_ip_resume_phase2(adev);
2718 
2719 	return r;
2720 }
2721 
2722 /**
2723  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2724  *
2725  * @adev: amdgpu_device pointer
2726  *
2727  * Query the VBIOS data tables to determine if the board supports SR-IOV.
2728  */
2729 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
2730 {
2731 	if (amdgpu_sriov_vf(adev)) {
2732 		if (adev->is_atom_fw) {
2733 			if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2734 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2735 		} else {
2736 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2737 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2738 		}
2739 
2740 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2741 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
2742 	}
2743 }
2744 
2745 /**
2746  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2747  *
2748  * @asic_type: AMD asic type
2749  *
2750  * Check if there is DC (new modesetting infrastructre) support for an asic.
2751  * returns true if DC has support, false if not.
2752  */
2753 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2754 {
2755 	switch (asic_type) {
2756 #if defined(CONFIG_DRM_AMD_DC)
2757 	case CHIP_BONAIRE:
2758 	case CHIP_KAVERI:
2759 	case CHIP_KABINI:
2760 	case CHIP_MULLINS:
2761 		/*
2762 		 * We have systems in the wild with these ASICs that require
2763 		 * LVDS and VGA support which is not supported with DC.
2764 		 *
2765 		 * Fallback to the non-DC driver here by default so as not to
2766 		 * cause regressions.
2767 		 */
2768 		return amdgpu_dc > 0;
2769 	case CHIP_HAWAII:
2770 	case CHIP_CARRIZO:
2771 	case CHIP_STONEY:
2772 	case CHIP_POLARIS10:
2773 	case CHIP_POLARIS11:
2774 	case CHIP_POLARIS12:
2775 	case CHIP_VEGAM:
2776 	case CHIP_TONGA:
2777 	case CHIP_FIJI:
2778 	case CHIP_VEGA10:
2779 	case CHIP_VEGA12:
2780 	case CHIP_VEGA20:
2781 #if defined(CONFIG_DRM_AMD_DC_DCN)
2782 	case CHIP_RAVEN:
2783 	case CHIP_NAVI10:
2784 	case CHIP_NAVI14:
2785 	case CHIP_NAVI12:
2786 	case CHIP_RENOIR:
2787 #endif
2788 		return amdgpu_dc != 0;
2789 #endif
2790 	default:
2791 		if (amdgpu_dc > 0)
2792 			DRM_INFO("Display Core has been requested via kernel parameter "
2793 					 "but isn't supported by ASIC, ignoring\n");
2794 		return false;
2795 	}
2796 }
2797 
2798 /**
2799  * amdgpu_device_has_dc_support - check if dc is supported
2800  *
2801  * @adev: amdgpu_device_pointer
2802  *
2803  * Returns true for supported, false for not supported
2804  */
2805 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2806 {
2807 	if (amdgpu_sriov_vf(adev))
2808 		return false;
2809 
2810 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
2811 }
2812 
2813 
2814 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2815 {
2816 	struct amdgpu_device *adev =
2817 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
2818 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
2819 
2820 	/* It's a bug to not have a hive within this function */
2821 	if (WARN_ON(!hive))
2822 		return;
2823 
2824 	/*
2825 	 * Use task barrier to synchronize all xgmi reset works across the
2826 	 * hive. task_barrier_enter and task_barrier_exit will block
2827 	 * until all the threads running the xgmi reset works reach
2828 	 * those points. task_barrier_full will do both blocks.
2829 	 */
2830 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2831 
2832 		task_barrier_enter(&hive->tb);
2833 		adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2834 
2835 		if (adev->asic_reset_res)
2836 			goto fail;
2837 
2838 		task_barrier_exit(&hive->tb);
2839 		adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2840 
2841 		if (adev->asic_reset_res)
2842 			goto fail;
2843 
2844 		if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
2845 			adev->mmhub.funcs->reset_ras_error_count(adev);
2846 	} else {
2847 
2848 		task_barrier_full(&hive->tb);
2849 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
2850 	}
2851 
2852 fail:
2853 	if (adev->asic_reset_res)
2854 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
2855 			 adev->asic_reset_res, adev->ddev->unique);
2856 }
2857 
2858 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2859 {
2860 	char *input = amdgpu_lockup_timeout;
2861 	char *timeout_setting = NULL;
2862 	int index = 0;
2863 	long timeout;
2864 	int ret = 0;
2865 
2866 	/*
2867 	 * By default timeout for non compute jobs is 10000.
2868 	 * And there is no timeout enforced on compute jobs.
2869 	 * In SR-IOV or passthrough mode, timeout for compute
2870 	 * jobs are 60000 by default.
2871 	 */
2872 	adev->gfx_timeout = msecs_to_jiffies(10000);
2873 	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2874 	if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2875 		adev->compute_timeout =  msecs_to_jiffies(60000);
2876 	else
2877 		adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2878 
2879 	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
2880 		while ((timeout_setting = strsep(&input, ",")) &&
2881 				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
2882 			ret = kstrtol(timeout_setting, 0, &timeout);
2883 			if (ret)
2884 				return ret;
2885 
2886 			if (timeout == 0) {
2887 				index++;
2888 				continue;
2889 			} else if (timeout < 0) {
2890 				timeout = MAX_SCHEDULE_TIMEOUT;
2891 			} else {
2892 				timeout = msecs_to_jiffies(timeout);
2893 			}
2894 
2895 			switch (index++) {
2896 			case 0:
2897 				adev->gfx_timeout = timeout;
2898 				break;
2899 			case 1:
2900 				adev->compute_timeout = timeout;
2901 				break;
2902 			case 2:
2903 				adev->sdma_timeout = timeout;
2904 				break;
2905 			case 3:
2906 				adev->video_timeout = timeout;
2907 				break;
2908 			default:
2909 				break;
2910 			}
2911 		}
2912 		/*
2913 		 * There is only one value specified and
2914 		 * it should apply to all non-compute jobs.
2915 		 */
2916 		if (index == 1) {
2917 			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2918 			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2919 				adev->compute_timeout = adev->gfx_timeout;
2920 		}
2921 	}
2922 
2923 	return ret;
2924 }
2925 
2926 /**
2927  * amdgpu_device_init - initialize the driver
2928  *
2929  * @adev: amdgpu_device pointer
2930  * @ddev: drm dev pointer
2931  * @pdev: pci dev pointer
2932  * @flags: driver flags
2933  *
2934  * Initializes the driver info and hw (all asics).
2935  * Returns 0 for success or an error on failure.
2936  * Called at driver startup.
2937  */
2938 int amdgpu_device_init(struct amdgpu_device *adev,
2939 		       struct drm_device *ddev,
2940 		       struct pci_dev *pdev,
2941 		       uint32_t flags)
2942 {
2943 	int r, i;
2944 	bool boco = false;
2945 	u32 max_MBps;
2946 
2947 	adev->shutdown = false;
2948 	adev->dev = &pdev->dev;
2949 	adev->ddev = ddev;
2950 	adev->pdev = pdev;
2951 	adev->flags = flags;
2952 
2953 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2954 		adev->asic_type = amdgpu_force_asic_type;
2955 	else
2956 		adev->asic_type = flags & AMD_ASIC_MASK;
2957 
2958 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
2959 	if (amdgpu_emu_mode == 1)
2960 		adev->usec_timeout *= 10;
2961 	adev->gmc.gart_size = 512 * 1024 * 1024;
2962 	adev->accel_working = false;
2963 	adev->num_rings = 0;
2964 	adev->mman.buffer_funcs = NULL;
2965 	adev->mman.buffer_funcs_ring = NULL;
2966 	adev->vm_manager.vm_pte_funcs = NULL;
2967 	adev->vm_manager.vm_pte_num_scheds = 0;
2968 	adev->gmc.gmc_funcs = NULL;
2969 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
2970 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2971 
2972 	adev->smc_rreg = &amdgpu_invalid_rreg;
2973 	adev->smc_wreg = &amdgpu_invalid_wreg;
2974 	adev->pcie_rreg = &amdgpu_invalid_rreg;
2975 	adev->pcie_wreg = &amdgpu_invalid_wreg;
2976 	adev->pciep_rreg = &amdgpu_invalid_rreg;
2977 	adev->pciep_wreg = &amdgpu_invalid_wreg;
2978 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2979 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
2980 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2981 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2982 	adev->didt_rreg = &amdgpu_invalid_rreg;
2983 	adev->didt_wreg = &amdgpu_invalid_wreg;
2984 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2985 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
2986 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2987 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2988 
2989 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2990 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2991 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
2992 
2993 	/* mutex initialization are all done here so we
2994 	 * can recall function without having locking issues */
2995 	atomic_set(&adev->irq.ih.lock, 0);
2996 	mutex_init(&adev->firmware.mutex);
2997 	mutex_init(&adev->pm.mutex);
2998 	mutex_init(&adev->gfx.gpu_clock_mutex);
2999 	mutex_init(&adev->srbm_mutex);
3000 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3001 	mutex_init(&adev->gfx.gfx_off_mutex);
3002 	mutex_init(&adev->grbm_idx_mutex);
3003 	mutex_init(&adev->mn_lock);
3004 	mutex_init(&adev->virt.vf_errors.lock);
3005 	hash_init(adev->mn_hash);
3006 	mutex_init(&adev->lock_reset);
3007 	mutex_init(&adev->psp.mutex);
3008 	mutex_init(&adev->notifier_lock);
3009 
3010 	r = amdgpu_device_check_arguments(adev);
3011 	if (r)
3012 		return r;
3013 
3014 	spin_lock_init(&adev->mmio_idx_lock);
3015 	spin_lock_init(&adev->smc_idx_lock);
3016 	spin_lock_init(&adev->pcie_idx_lock);
3017 	spin_lock_init(&adev->uvd_ctx_idx_lock);
3018 	spin_lock_init(&adev->didt_idx_lock);
3019 	spin_lock_init(&adev->gc_cac_idx_lock);
3020 	spin_lock_init(&adev->se_cac_idx_lock);
3021 	spin_lock_init(&adev->audio_endpt_idx_lock);
3022 	spin_lock_init(&adev->mm_stats.lock);
3023 
3024 	INIT_LIST_HEAD(&adev->shadow_list);
3025 	mutex_init(&adev->shadow_list_lock);
3026 
3027 	INIT_LIST_HEAD(&adev->ring_lru_list);
3028 	spin_lock_init(&adev->ring_lru_list_lock);
3029 
3030 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3031 			  amdgpu_device_delayed_init_work_handler);
3032 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3033 			  amdgpu_device_delay_enable_gfx_off);
3034 
3035 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3036 
3037 	adev->gfx.gfx_off_req_count = 1;
3038 	adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
3039 
3040 	/* Registers mapping */
3041 	/* TODO: block userspace mapping of io register */
3042 	if (adev->asic_type >= CHIP_BONAIRE) {
3043 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3044 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3045 	} else {
3046 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3047 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3048 	}
3049 
3050 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3051 	if (adev->rmmio == NULL) {
3052 		return -ENOMEM;
3053 	}
3054 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3055 	DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3056 
3057 	/* io port mapping */
3058 	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3059 		if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3060 			adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3061 			adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3062 			break;
3063 		}
3064 	}
3065 	if (adev->rio_mem == NULL)
3066 		DRM_INFO("PCI I/O BAR is not found.\n");
3067 
3068 	/* enable PCIE atomic ops */
3069 	r = pci_enable_atomic_ops_to_root(adev->pdev,
3070 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3071 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3072 	if (r) {
3073 		adev->have_atomics_support = false;
3074 		DRM_INFO("PCIE atomic ops is not supported\n");
3075 	} else {
3076 		adev->have_atomics_support = true;
3077 	}
3078 
3079 	amdgpu_device_get_pcie_info(adev);
3080 
3081 	if (amdgpu_mcbp)
3082 		DRM_INFO("MCBP is enabled\n");
3083 
3084 	if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3085 		adev->enable_mes = true;
3086 
3087 	/* detect hw virtualization here */
3088 	amdgpu_detect_virtualization(adev);
3089 
3090 	r = amdgpu_device_get_job_timeout_settings(adev);
3091 	if (r) {
3092 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3093 		return r;
3094 	}
3095 
3096 	/* early init functions */
3097 	r = amdgpu_device_ip_early_init(adev);
3098 	if (r)
3099 		return r;
3100 
3101 	/* doorbell bar mapping and doorbell index init*/
3102 	amdgpu_device_doorbell_init(adev);
3103 
3104 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3105 	/* this will fail for cards that aren't VGA class devices, just
3106 	 * ignore it */
3107 	vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
3108 
3109 	if (amdgpu_device_supports_boco(ddev))
3110 		boco = true;
3111 	if (amdgpu_has_atpx() &&
3112 	    (amdgpu_is_atpx_hybrid() ||
3113 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
3114 	    !pci_is_thunderbolt_attached(adev->pdev))
3115 		vga_switcheroo_register_client(adev->pdev,
3116 					       &amdgpu_switcheroo_ops, boco);
3117 	if (boco)
3118 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3119 
3120 	if (amdgpu_emu_mode == 1) {
3121 		/* post the asic on emulation mode */
3122 		emu_soc_asic_init(adev);
3123 		goto fence_driver_init;
3124 	}
3125 
3126 	/* detect if we are with an SRIOV vbios */
3127 	amdgpu_device_detect_sriov_bios(adev);
3128 
3129 	/* check if we need to reset the asic
3130 	 *  E.g., driver was not cleanly unloaded previously, etc.
3131 	 */
3132 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3133 		r = amdgpu_asic_reset(adev);
3134 		if (r) {
3135 			dev_err(adev->dev, "asic reset on init failed\n");
3136 			goto failed;
3137 		}
3138 	}
3139 
3140 	/* Post card if necessary */
3141 	if (amdgpu_device_need_post(adev)) {
3142 		if (!adev->bios) {
3143 			dev_err(adev->dev, "no vBIOS found\n");
3144 			r = -EINVAL;
3145 			goto failed;
3146 		}
3147 		DRM_INFO("GPU posting now...\n");
3148 		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3149 		if (r) {
3150 			dev_err(adev->dev, "gpu post error!\n");
3151 			goto failed;
3152 		}
3153 	}
3154 
3155 	if (adev->is_atom_fw) {
3156 		/* Initialize clocks */
3157 		r = amdgpu_atomfirmware_get_clock_info(adev);
3158 		if (r) {
3159 			dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3160 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3161 			goto failed;
3162 		}
3163 	} else {
3164 		/* Initialize clocks */
3165 		r = amdgpu_atombios_get_clock_info(adev);
3166 		if (r) {
3167 			dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3168 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3169 			goto failed;
3170 		}
3171 		/* init i2c buses */
3172 		if (!amdgpu_device_has_dc_support(adev))
3173 			amdgpu_atombios_i2c_init(adev);
3174 	}
3175 
3176 fence_driver_init:
3177 	/* Fence driver */
3178 	r = amdgpu_fence_driver_init(adev);
3179 	if (r) {
3180 		dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
3181 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3182 		goto failed;
3183 	}
3184 
3185 	/* init the mode config */
3186 	drm_mode_config_init(adev->ddev);
3187 
3188 	r = amdgpu_device_ip_init(adev);
3189 	if (r) {
3190 		/* failed in exclusive mode due to timeout */
3191 		if (amdgpu_sriov_vf(adev) &&
3192 		    !amdgpu_sriov_runtime(adev) &&
3193 		    amdgpu_virt_mmio_blocked(adev) &&
3194 		    !amdgpu_virt_wait_reset(adev)) {
3195 			dev_err(adev->dev, "VF exclusive mode timeout\n");
3196 			/* Don't send request since VF is inactive. */
3197 			adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3198 			adev->virt.ops = NULL;
3199 			r = -EAGAIN;
3200 			goto failed;
3201 		}
3202 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
3203 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
3204 		goto failed;
3205 	}
3206 
3207 	DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3208 			adev->gfx.config.max_shader_engines,
3209 			adev->gfx.config.max_sh_per_se,
3210 			adev->gfx.config.max_cu_per_sh,
3211 			adev->gfx.cu_info.number);
3212 
3213 	adev->accel_working = true;
3214 
3215 	amdgpu_vm_check_compute_bug(adev);
3216 
3217 	/* Initialize the buffer migration limit. */
3218 	if (amdgpu_moverate >= 0)
3219 		max_MBps = amdgpu_moverate;
3220 	else
3221 		max_MBps = 8; /* Allow 8 MB/s. */
3222 	/* Get a log2 for easy divisions. */
3223 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3224 
3225 	amdgpu_fbdev_init(adev);
3226 
3227 	r = amdgpu_pm_sysfs_init(adev);
3228 	if (r) {
3229 		adev->pm_sysfs_en = false;
3230 		DRM_ERROR("registering pm debugfs failed (%d).\n", r);
3231 	} else
3232 		adev->pm_sysfs_en = true;
3233 
3234 	r = amdgpu_ucode_sysfs_init(adev);
3235 	if (r) {
3236 		adev->ucode_sysfs_en = false;
3237 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
3238 	} else
3239 		adev->ucode_sysfs_en = true;
3240 
3241 	if ((amdgpu_testing & 1)) {
3242 		if (adev->accel_working)
3243 			amdgpu_test_moves(adev);
3244 		else
3245 			DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3246 	}
3247 	if (amdgpu_benchmarking) {
3248 		if (adev->accel_working)
3249 			amdgpu_benchmark(adev, amdgpu_benchmarking);
3250 		else
3251 			DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3252 	}
3253 
3254 	/*
3255 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3256 	 * Otherwise the mgpu fan boost feature will be skipped due to the
3257 	 * gpu instance is counted less.
3258 	 */
3259 	amdgpu_register_gpu_instance(adev);
3260 
3261 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
3262 	 * explicit gating rather than handling it automatically.
3263 	 */
3264 	r = amdgpu_device_ip_late_init(adev);
3265 	if (r) {
3266 		dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3267 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
3268 		goto failed;
3269 	}
3270 
3271 	/* must succeed. */
3272 	amdgpu_ras_resume(adev);
3273 
3274 	queue_delayed_work(system_wq, &adev->delayed_init_work,
3275 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
3276 
3277 	r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
3278 	if (r) {
3279 		dev_err(adev->dev, "Could not create pcie_replay_count");
3280 		return r;
3281 	}
3282 
3283 	r = device_create_file(adev->dev, &dev_attr_product_name);
3284 	if (r) {
3285 		dev_err(adev->dev, "Could not create product_name");
3286 		return r;
3287 	}
3288 
3289 	r = device_create_file(adev->dev, &dev_attr_product_number);
3290 	if (r) {
3291 		dev_err(adev->dev, "Could not create product_number");
3292 		return r;
3293 	}
3294 
3295 	r = device_create_file(adev->dev, &dev_attr_serial_number);
3296 	if (r) {
3297 		dev_err(adev->dev, "Could not create serial_number");
3298 		return r;
3299 	}
3300 
3301 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
3302 		r = amdgpu_pmu_init(adev);
3303 	if (r)
3304 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3305 
3306 	return 0;
3307 
3308 failed:
3309 	amdgpu_vf_error_trans_all(adev);
3310 	if (boco)
3311 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
3312 
3313 	return r;
3314 }
3315 
3316 /**
3317  * amdgpu_device_fini - tear down the driver
3318  *
3319  * @adev: amdgpu_device pointer
3320  *
3321  * Tear down the driver info (all asics).
3322  * Called at driver shutdown.
3323  */
3324 void amdgpu_device_fini(struct amdgpu_device *adev)
3325 {
3326 	int r;
3327 
3328 	DRM_INFO("amdgpu: finishing device.\n");
3329 	flush_delayed_work(&adev->delayed_init_work);
3330 	adev->shutdown = true;
3331 
3332 	/* make sure IB test finished before entering exclusive mode
3333 	 * to avoid preemption on IB test
3334 	 * */
3335 	if (amdgpu_sriov_vf(adev))
3336 		amdgpu_virt_request_full_gpu(adev, false);
3337 
3338 	/* disable all interrupts */
3339 	amdgpu_irq_disable_all(adev);
3340 	if (adev->mode_info.mode_config_initialized){
3341 		if (!amdgpu_device_has_dc_support(adev))
3342 			drm_helper_force_disable_all(adev->ddev);
3343 		else
3344 			drm_atomic_helper_shutdown(adev->ddev);
3345 	}
3346 	amdgpu_fence_driver_fini(adev);
3347 	if (adev->pm_sysfs_en)
3348 		amdgpu_pm_sysfs_fini(adev);
3349 	amdgpu_fbdev_fini(adev);
3350 	r = amdgpu_device_ip_fini(adev);
3351 	if (adev->firmware.gpu_info_fw) {
3352 		release_firmware(adev->firmware.gpu_info_fw);
3353 		adev->firmware.gpu_info_fw = NULL;
3354 	}
3355 	adev->accel_working = false;
3356 	/* free i2c buses */
3357 	if (!amdgpu_device_has_dc_support(adev))
3358 		amdgpu_i2c_fini(adev);
3359 
3360 	if (amdgpu_emu_mode != 1)
3361 		amdgpu_atombios_fini(adev);
3362 
3363 	kfree(adev->bios);
3364 	adev->bios = NULL;
3365 	if (amdgpu_has_atpx() &&
3366 	    (amdgpu_is_atpx_hybrid() ||
3367 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
3368 	    !pci_is_thunderbolt_attached(adev->pdev))
3369 		vga_switcheroo_unregister_client(adev->pdev);
3370 	if (amdgpu_device_supports_boco(adev->ddev))
3371 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
3372 	vga_client_register(adev->pdev, NULL, NULL, NULL);
3373 	if (adev->rio_mem)
3374 		pci_iounmap(adev->pdev, adev->rio_mem);
3375 	adev->rio_mem = NULL;
3376 	iounmap(adev->rmmio);
3377 	adev->rmmio = NULL;
3378 	amdgpu_device_doorbell_fini(adev);
3379 
3380 	device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
3381 	if (adev->ucode_sysfs_en)
3382 		amdgpu_ucode_sysfs_fini(adev);
3383 	device_remove_file(adev->dev, &dev_attr_product_name);
3384 	device_remove_file(adev->dev, &dev_attr_product_number);
3385 	device_remove_file(adev->dev, &dev_attr_serial_number);
3386 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
3387 		amdgpu_pmu_fini(adev);
3388 	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
3389 		amdgpu_discovery_fini(adev);
3390 }
3391 
3392 
3393 /*
3394  * Suspend & resume.
3395  */
3396 /**
3397  * amdgpu_device_suspend - initiate device suspend
3398  *
3399  * @dev: drm dev pointer
3400  * @suspend: suspend state
3401  * @fbcon : notify the fbdev of suspend
3402  *
3403  * Puts the hw in the suspend state (all asics).
3404  * Returns 0 for success or an error on failure.
3405  * Called at driver suspend.
3406  */
3407 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
3408 {
3409 	struct amdgpu_device *adev;
3410 	struct drm_crtc *crtc;
3411 	struct drm_connector *connector;
3412 	struct drm_connector_list_iter iter;
3413 	int r;
3414 
3415 	if (dev == NULL || dev->dev_private == NULL) {
3416 		return -ENODEV;
3417 	}
3418 
3419 	adev = dev->dev_private;
3420 
3421 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3422 		return 0;
3423 
3424 	adev->in_suspend = true;
3425 	drm_kms_helper_poll_disable(dev);
3426 
3427 	if (fbcon)
3428 		amdgpu_fbdev_set_suspend(adev, 1);
3429 
3430 	cancel_delayed_work_sync(&adev->delayed_init_work);
3431 
3432 	if (!amdgpu_device_has_dc_support(adev)) {
3433 		/* turn off display hw */
3434 		drm_modeset_lock_all(dev);
3435 		drm_connector_list_iter_begin(dev, &iter);
3436 		drm_for_each_connector_iter(connector, &iter)
3437 			drm_helper_connector_dpms(connector,
3438 						  DRM_MODE_DPMS_OFF);
3439 		drm_connector_list_iter_end(&iter);
3440 		drm_modeset_unlock_all(dev);
3441 			/* unpin the front buffers and cursors */
3442 		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3443 			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3444 			struct drm_framebuffer *fb = crtc->primary->fb;
3445 			struct amdgpu_bo *robj;
3446 
3447 			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
3448 				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3449 				r = amdgpu_bo_reserve(aobj, true);
3450 				if (r == 0) {
3451 					amdgpu_bo_unpin(aobj);
3452 					amdgpu_bo_unreserve(aobj);
3453 				}
3454 			}
3455 
3456 			if (fb == NULL || fb->obj[0] == NULL) {
3457 				continue;
3458 			}
3459 			robj = gem_to_amdgpu_bo(fb->obj[0]);
3460 			/* don't unpin kernel fb objects */
3461 			if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3462 				r = amdgpu_bo_reserve(robj, true);
3463 				if (r == 0) {
3464 					amdgpu_bo_unpin(robj);
3465 					amdgpu_bo_unreserve(robj);
3466 				}
3467 			}
3468 		}
3469 	}
3470 
3471 	amdgpu_amdkfd_suspend(adev, !fbcon);
3472 
3473 	amdgpu_ras_suspend(adev);
3474 
3475 	r = amdgpu_device_ip_suspend_phase1(adev);
3476 
3477 	/* evict vram memory */
3478 	amdgpu_bo_evict_vram(adev);
3479 
3480 	amdgpu_fence_driver_suspend(adev);
3481 
3482 	r = amdgpu_device_ip_suspend_phase2(adev);
3483 
3484 	/* evict remaining vram memory
3485 	 * This second call to evict vram is to evict the gart page table
3486 	 * using the CPU.
3487 	 */
3488 	amdgpu_bo_evict_vram(adev);
3489 
3490 	return 0;
3491 }
3492 
3493 /**
3494  * amdgpu_device_resume - initiate device resume
3495  *
3496  * @dev: drm dev pointer
3497  * @resume: resume state
3498  * @fbcon : notify the fbdev of resume
3499  *
3500  * Bring the hw back to operating state (all asics).
3501  * Returns 0 for success or an error on failure.
3502  * Called at driver resume.
3503  */
3504 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
3505 {
3506 	struct drm_connector *connector;
3507 	struct drm_connector_list_iter iter;
3508 	struct amdgpu_device *adev = dev->dev_private;
3509 	struct drm_crtc *crtc;
3510 	int r = 0;
3511 
3512 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3513 		return 0;
3514 
3515 	/* post card */
3516 	if (amdgpu_device_need_post(adev)) {
3517 		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3518 		if (r)
3519 			DRM_ERROR("amdgpu asic init failed\n");
3520 	}
3521 
3522 	r = amdgpu_device_ip_resume(adev);
3523 	if (r) {
3524 		DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
3525 		return r;
3526 	}
3527 	amdgpu_fence_driver_resume(adev);
3528 
3529 
3530 	r = amdgpu_device_ip_late_init(adev);
3531 	if (r)
3532 		return r;
3533 
3534 	queue_delayed_work(system_wq, &adev->delayed_init_work,
3535 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
3536 
3537 	if (!amdgpu_device_has_dc_support(adev)) {
3538 		/* pin cursors */
3539 		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3540 			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3541 
3542 			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
3543 				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3544 				r = amdgpu_bo_reserve(aobj, true);
3545 				if (r == 0) {
3546 					r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3547 					if (r != 0)
3548 						DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3549 					amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3550 					amdgpu_bo_unreserve(aobj);
3551 				}
3552 			}
3553 		}
3554 	}
3555 	r = amdgpu_amdkfd_resume(adev, !fbcon);
3556 	if (r)
3557 		return r;
3558 
3559 	/* Make sure IB tests flushed */
3560 	flush_delayed_work(&adev->delayed_init_work);
3561 
3562 	/* blat the mode back in */
3563 	if (fbcon) {
3564 		if (!amdgpu_device_has_dc_support(adev)) {
3565 			/* pre DCE11 */
3566 			drm_helper_resume_force_mode(dev);
3567 
3568 			/* turn on display hw */
3569 			drm_modeset_lock_all(dev);
3570 
3571 			drm_connector_list_iter_begin(dev, &iter);
3572 			drm_for_each_connector_iter(connector, &iter)
3573 				drm_helper_connector_dpms(connector,
3574 							  DRM_MODE_DPMS_ON);
3575 			drm_connector_list_iter_end(&iter);
3576 
3577 			drm_modeset_unlock_all(dev);
3578 		}
3579 		amdgpu_fbdev_set_suspend(adev, 0);
3580 	}
3581 
3582 	drm_kms_helper_poll_enable(dev);
3583 
3584 	amdgpu_ras_resume(adev);
3585 
3586 	/*
3587 	 * Most of the connector probing functions try to acquire runtime pm
3588 	 * refs to ensure that the GPU is powered on when connector polling is
3589 	 * performed. Since we're calling this from a runtime PM callback,
3590 	 * trying to acquire rpm refs will cause us to deadlock.
3591 	 *
3592 	 * Since we're guaranteed to be holding the rpm lock, it's safe to
3593 	 * temporarily disable the rpm helpers so this doesn't deadlock us.
3594 	 */
3595 #ifdef CONFIG_PM
3596 	dev->dev->power.disable_depth++;
3597 #endif
3598 	if (!amdgpu_device_has_dc_support(adev))
3599 		drm_helper_hpd_irq_event(dev);
3600 	else
3601 		drm_kms_helper_hotplug_event(dev);
3602 #ifdef CONFIG_PM
3603 	dev->dev->power.disable_depth--;
3604 #endif
3605 	adev->in_suspend = false;
3606 
3607 	return 0;
3608 }
3609 
3610 /**
3611  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3612  *
3613  * @adev: amdgpu_device pointer
3614  *
3615  * The list of all the hardware IPs that make up the asic is walked and
3616  * the check_soft_reset callbacks are run.  check_soft_reset determines
3617  * if the asic is still hung or not.
3618  * Returns true if any of the IPs are still in a hung state, false if not.
3619  */
3620 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
3621 {
3622 	int i;
3623 	bool asic_hang = false;
3624 
3625 	if (amdgpu_sriov_vf(adev))
3626 		return true;
3627 
3628 	if (amdgpu_asic_need_full_reset(adev))
3629 		return true;
3630 
3631 	for (i = 0; i < adev->num_ip_blocks; i++) {
3632 		if (!adev->ip_blocks[i].status.valid)
3633 			continue;
3634 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3635 			adev->ip_blocks[i].status.hang =
3636 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3637 		if (adev->ip_blocks[i].status.hang) {
3638 			DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
3639 			asic_hang = true;
3640 		}
3641 	}
3642 	return asic_hang;
3643 }
3644 
3645 /**
3646  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3647  *
3648  * @adev: amdgpu_device pointer
3649  *
3650  * The list of all the hardware IPs that make up the asic is walked and the
3651  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
3652  * handles any IP specific hardware or software state changes that are
3653  * necessary for a soft reset to succeed.
3654  * Returns 0 on success, negative error code on failure.
3655  */
3656 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
3657 {
3658 	int i, r = 0;
3659 
3660 	for (i = 0; i < adev->num_ip_blocks; i++) {
3661 		if (!adev->ip_blocks[i].status.valid)
3662 			continue;
3663 		if (adev->ip_blocks[i].status.hang &&
3664 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3665 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
3666 			if (r)
3667 				return r;
3668 		}
3669 	}
3670 
3671 	return 0;
3672 }
3673 
3674 /**
3675  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3676  *
3677  * @adev: amdgpu_device pointer
3678  *
3679  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
3680  * reset is necessary to recover.
3681  * Returns true if a full asic reset is required, false if not.
3682  */
3683 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
3684 {
3685 	int i;
3686 
3687 	if (amdgpu_asic_need_full_reset(adev))
3688 		return true;
3689 
3690 	for (i = 0; i < adev->num_ip_blocks; i++) {
3691 		if (!adev->ip_blocks[i].status.valid)
3692 			continue;
3693 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3694 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3695 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
3696 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3697 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3698 			if (adev->ip_blocks[i].status.hang) {
3699 				DRM_INFO("Some block need full reset!\n");
3700 				return true;
3701 			}
3702 		}
3703 	}
3704 	return false;
3705 }
3706 
3707 /**
3708  * amdgpu_device_ip_soft_reset - do a soft reset
3709  *
3710  * @adev: amdgpu_device pointer
3711  *
3712  * The list of all the hardware IPs that make up the asic is walked and the
3713  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
3714  * IP specific hardware or software state changes that are necessary to soft
3715  * reset the IP.
3716  * Returns 0 on success, negative error code on failure.
3717  */
3718 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
3719 {
3720 	int i, r = 0;
3721 
3722 	for (i = 0; i < adev->num_ip_blocks; i++) {
3723 		if (!adev->ip_blocks[i].status.valid)
3724 			continue;
3725 		if (adev->ip_blocks[i].status.hang &&
3726 		    adev->ip_blocks[i].version->funcs->soft_reset) {
3727 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
3728 			if (r)
3729 				return r;
3730 		}
3731 	}
3732 
3733 	return 0;
3734 }
3735 
3736 /**
3737  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3738  *
3739  * @adev: amdgpu_device pointer
3740  *
3741  * The list of all the hardware IPs that make up the asic is walked and the
3742  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
3743  * handles any IP specific hardware or software state changes that are
3744  * necessary after the IP has been soft reset.
3745  * Returns 0 on success, negative error code on failure.
3746  */
3747 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
3748 {
3749 	int i, r = 0;
3750 
3751 	for (i = 0; i < adev->num_ip_blocks; i++) {
3752 		if (!adev->ip_blocks[i].status.valid)
3753 			continue;
3754 		if (adev->ip_blocks[i].status.hang &&
3755 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
3756 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
3757 		if (r)
3758 			return r;
3759 	}
3760 
3761 	return 0;
3762 }
3763 
3764 /**
3765  * amdgpu_device_recover_vram - Recover some VRAM contents
3766  *
3767  * @adev: amdgpu_device pointer
3768  *
3769  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
3770  * restore things like GPUVM page tables after a GPU reset where
3771  * the contents of VRAM might be lost.
3772  *
3773  * Returns:
3774  * 0 on success, negative error code on failure.
3775  */
3776 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
3777 {
3778 	struct dma_fence *fence = NULL, *next = NULL;
3779 	struct amdgpu_bo *shadow;
3780 	long r = 1, tmo;
3781 
3782 	if (amdgpu_sriov_runtime(adev))
3783 		tmo = msecs_to_jiffies(8000);
3784 	else
3785 		tmo = msecs_to_jiffies(100);
3786 
3787 	DRM_INFO("recover vram bo from shadow start\n");
3788 	mutex_lock(&adev->shadow_list_lock);
3789 	list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3790 
3791 		/* No need to recover an evicted BO */
3792 		if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
3793 		    shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
3794 		    shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3795 			continue;
3796 
3797 		r = amdgpu_bo_restore_shadow(shadow, &next);
3798 		if (r)
3799 			break;
3800 
3801 		if (fence) {
3802 			tmo = dma_fence_wait_timeout(fence, false, tmo);
3803 			dma_fence_put(fence);
3804 			fence = next;
3805 			if (tmo == 0) {
3806 				r = -ETIMEDOUT;
3807 				break;
3808 			} else if (tmo < 0) {
3809 				r = tmo;
3810 				break;
3811 			}
3812 		} else {
3813 			fence = next;
3814 		}
3815 	}
3816 	mutex_unlock(&adev->shadow_list_lock);
3817 
3818 	if (fence)
3819 		tmo = dma_fence_wait_timeout(fence, false, tmo);
3820 	dma_fence_put(fence);
3821 
3822 	if (r < 0 || tmo <= 0) {
3823 		DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
3824 		return -EIO;
3825 	}
3826 
3827 	DRM_INFO("recover vram bo from shadow done\n");
3828 	return 0;
3829 }
3830 
3831 
3832 /**
3833  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
3834  *
3835  * @adev: amdgpu device pointer
3836  * @from_hypervisor: request from hypervisor
3837  *
3838  * do VF FLR and reinitialize Asic
3839  * return 0 means succeeded otherwise failed
3840  */
3841 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3842 				     bool from_hypervisor)
3843 {
3844 	int r;
3845 
3846 	if (from_hypervisor)
3847 		r = amdgpu_virt_request_full_gpu(adev, true);
3848 	else
3849 		r = amdgpu_virt_reset_gpu(adev);
3850 	if (r)
3851 		return r;
3852 
3853 	amdgpu_amdkfd_pre_reset(adev);
3854 
3855 	/* Resume IP prior to SMC */
3856 	r = amdgpu_device_ip_reinit_early_sriov(adev);
3857 	if (r)
3858 		goto error;
3859 
3860 	amdgpu_virt_init_data_exchange(adev);
3861 	/* we need recover gart prior to run SMC/CP/SDMA resume */
3862 	amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
3863 
3864 	r = amdgpu_device_fw_loading(adev);
3865 	if (r)
3866 		return r;
3867 
3868 	/* now we are okay to resume SMC/CP/SDMA */
3869 	r = amdgpu_device_ip_reinit_late_sriov(adev);
3870 	if (r)
3871 		goto error;
3872 
3873 	amdgpu_irq_gpu_reset_resume_helper(adev);
3874 	r = amdgpu_ib_ring_tests(adev);
3875 	amdgpu_amdkfd_post_reset(adev);
3876 
3877 error:
3878 	amdgpu_virt_release_full_gpu(adev, true);
3879 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3880 		amdgpu_inc_vram_lost(adev);
3881 		r = amdgpu_device_recover_vram(adev);
3882 	}
3883 
3884 	return r;
3885 }
3886 
3887 /**
3888  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3889  *
3890  * @adev: amdgpu device pointer
3891  *
3892  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3893  * a hung GPU.
3894  */
3895 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3896 {
3897 	if (!amdgpu_device_ip_check_soft_reset(adev)) {
3898 		DRM_INFO("Timeout, but no hardware hang detected.\n");
3899 		return false;
3900 	}
3901 
3902 	if (amdgpu_gpu_recovery == 0)
3903 		goto disabled;
3904 
3905 	if (amdgpu_sriov_vf(adev))
3906 		return true;
3907 
3908 	if (amdgpu_gpu_recovery == -1) {
3909 		switch (adev->asic_type) {
3910 		case CHIP_BONAIRE:
3911 		case CHIP_HAWAII:
3912 		case CHIP_TOPAZ:
3913 		case CHIP_TONGA:
3914 		case CHIP_FIJI:
3915 		case CHIP_POLARIS10:
3916 		case CHIP_POLARIS11:
3917 		case CHIP_POLARIS12:
3918 		case CHIP_VEGAM:
3919 		case CHIP_VEGA20:
3920 		case CHIP_VEGA10:
3921 		case CHIP_VEGA12:
3922 		case CHIP_RAVEN:
3923 		case CHIP_ARCTURUS:
3924 		case CHIP_RENOIR:
3925 		case CHIP_NAVI10:
3926 		case CHIP_NAVI14:
3927 		case CHIP_NAVI12:
3928 			break;
3929 		default:
3930 			goto disabled;
3931 		}
3932 	}
3933 
3934 	return true;
3935 
3936 disabled:
3937 		DRM_INFO("GPU recovery disabled.\n");
3938 		return false;
3939 }
3940 
3941 
3942 static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3943 					struct amdgpu_job *job,
3944 					bool *need_full_reset_arg)
3945 {
3946 	int i, r = 0;
3947 	bool need_full_reset  = *need_full_reset_arg;
3948 
3949 	/* block all schedulers and reset given job's ring */
3950 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3951 		struct amdgpu_ring *ring = adev->rings[i];
3952 
3953 		if (!ring || !ring->sched.thread)
3954 			continue;
3955 
3956 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3957 		amdgpu_fence_driver_force_completion(ring);
3958 	}
3959 
3960 	if(job)
3961 		drm_sched_increase_karma(&job->base);
3962 
3963 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
3964 	if (!amdgpu_sriov_vf(adev)) {
3965 
3966 		if (!need_full_reset)
3967 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3968 
3969 		if (!need_full_reset) {
3970 			amdgpu_device_ip_pre_soft_reset(adev);
3971 			r = amdgpu_device_ip_soft_reset(adev);
3972 			amdgpu_device_ip_post_soft_reset(adev);
3973 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3974 				DRM_INFO("soft reset failed, will fallback to full reset!\n");
3975 				need_full_reset = true;
3976 			}
3977 		}
3978 
3979 		if (need_full_reset)
3980 			r = amdgpu_device_ip_suspend(adev);
3981 
3982 		*need_full_reset_arg = need_full_reset;
3983 	}
3984 
3985 	return r;
3986 }
3987 
3988 static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3989 			       struct list_head *device_list_handle,
3990 			       bool *need_full_reset_arg)
3991 {
3992 	struct amdgpu_device *tmp_adev = NULL;
3993 	bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3994 	int r = 0;
3995 
3996 	/*
3997 	 * ASIC reset has to be done on all HGMI hive nodes ASAP
3998 	 * to allow proper links negotiation in FW (within 1 sec)
3999 	 */
4000 	if (need_full_reset) {
4001 		list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4002 			/* For XGMI run all resets in parallel to speed up the process */
4003 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4004 				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
4005 					r = -EALREADY;
4006 			} else
4007 				r = amdgpu_asic_reset(tmp_adev);
4008 
4009 			if (r) {
4010 				DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
4011 					 r, tmp_adev->ddev->unique);
4012 				break;
4013 			}
4014 		}
4015 
4016 		/* For XGMI wait for all resets to complete before proceed */
4017 		if (!r) {
4018 			list_for_each_entry(tmp_adev, device_list_handle,
4019 					    gmc.xgmi.head) {
4020 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4021 					flush_work(&tmp_adev->xgmi_reset_work);
4022 					r = tmp_adev->asic_reset_res;
4023 					if (r)
4024 						break;
4025 				}
4026 			}
4027 		}
4028 	}
4029 
4030 	if (!r && amdgpu_ras_intr_triggered()) {
4031 		list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4032 			if (tmp_adev->mmhub.funcs &&
4033 			    tmp_adev->mmhub.funcs->reset_ras_error_count)
4034 				tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4035 		}
4036 
4037 		amdgpu_ras_intr_cleared();
4038 	}
4039 
4040 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4041 		if (need_full_reset) {
4042 			/* post card */
4043 			if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
4044 				DRM_WARN("asic atom init failed!");
4045 
4046 			if (!r) {
4047 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4048 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
4049 				if (r)
4050 					goto out;
4051 
4052 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4053 				if (vram_lost) {
4054 					DRM_INFO("VRAM is lost due to GPU reset!\n");
4055 					amdgpu_inc_vram_lost(tmp_adev);
4056 				}
4057 
4058 				r = amdgpu_gtt_mgr_recover(
4059 					&tmp_adev->mman.bdev.man[TTM_PL_TT]);
4060 				if (r)
4061 					goto out;
4062 
4063 				r = amdgpu_device_fw_loading(tmp_adev);
4064 				if (r)
4065 					return r;
4066 
4067 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
4068 				if (r)
4069 					goto out;
4070 
4071 				if (vram_lost)
4072 					amdgpu_device_fill_reset_magic(tmp_adev);
4073 
4074 				/*
4075 				 * Add this ASIC as tracked as reset was already
4076 				 * complete successfully.
4077 				 */
4078 				amdgpu_register_gpu_instance(tmp_adev);
4079 
4080 				r = amdgpu_device_ip_late_init(tmp_adev);
4081 				if (r)
4082 					goto out;
4083 
4084 				amdgpu_fbdev_set_suspend(tmp_adev, 0);
4085 
4086 				/* must succeed. */
4087 				amdgpu_ras_resume(tmp_adev);
4088 
4089 				/* Update PSP FW topology after reset */
4090 				if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4091 					r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4092 			}
4093 		}
4094 
4095 
4096 out:
4097 		if (!r) {
4098 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4099 			r = amdgpu_ib_ring_tests(tmp_adev);
4100 			if (r) {
4101 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4102 				r = amdgpu_device_ip_suspend(tmp_adev);
4103 				need_full_reset = true;
4104 				r = -EAGAIN;
4105 				goto end;
4106 			}
4107 		}
4108 
4109 		if (!r)
4110 			r = amdgpu_device_recover_vram(tmp_adev);
4111 		else
4112 			tmp_adev->asic_reset_res = r;
4113 	}
4114 
4115 end:
4116 	*need_full_reset_arg = need_full_reset;
4117 	return r;
4118 }
4119 
4120 static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
4121 {
4122 	if (trylock) {
4123 		if (!mutex_trylock(&adev->lock_reset))
4124 			return false;
4125 	} else
4126 		mutex_lock(&adev->lock_reset);
4127 
4128 	atomic_inc(&adev->gpu_reset_counter);
4129 	adev->in_gpu_reset = true;
4130 	switch (amdgpu_asic_reset_method(adev)) {
4131 	case AMD_RESET_METHOD_MODE1:
4132 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4133 		break;
4134 	case AMD_RESET_METHOD_MODE2:
4135 		adev->mp1_state = PP_MP1_STATE_RESET;
4136 		break;
4137 	default:
4138 		adev->mp1_state = PP_MP1_STATE_NONE;
4139 		break;
4140 	}
4141 
4142 	return true;
4143 }
4144 
4145 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4146 {
4147 	amdgpu_vf_error_trans_all(adev);
4148 	adev->mp1_state = PP_MP1_STATE_NONE;
4149 	adev->in_gpu_reset = false;
4150 	mutex_unlock(&adev->lock_reset);
4151 }
4152 
4153 /**
4154  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4155  *
4156  * @adev: amdgpu device pointer
4157  * @job: which job trigger hang
4158  *
4159  * Attempt to reset the GPU if it has hung (all asics).
4160  * Attempt to do soft-reset or full-reset and reinitialize Asic
4161  * Returns 0 for success or an error on failure.
4162  */
4163 
4164 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4165 			      struct amdgpu_job *job)
4166 {
4167 	struct list_head device_list, *device_list_handle =  NULL;
4168 	bool need_full_reset, job_signaled;
4169 	struct amdgpu_hive_info *hive = NULL;
4170 	struct amdgpu_device *tmp_adev = NULL;
4171 	int i, r = 0;
4172 	bool in_ras_intr = amdgpu_ras_intr_triggered();
4173 	bool use_baco =
4174 		(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
4175 		true : false;
4176 
4177 	/*
4178 	 * Flush RAM to disk so that after reboot
4179 	 * the user can read log and see why the system rebooted.
4180 	 */
4181 	if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
4182 
4183 		DRM_WARN("Emergency reboot.");
4184 
4185 		ksys_sync_helper();
4186 		emergency_restart();
4187 	}
4188 
4189 	need_full_reset = job_signaled = false;
4190 	INIT_LIST_HEAD(&device_list);
4191 
4192 	amdgpu_ras_set_error_query_ready(adev, false);
4193 
4194 	dev_info(adev->dev, "GPU %s begin!\n",
4195 		(in_ras_intr && !use_baco) ? "jobs stop":"reset");
4196 
4197 	cancel_delayed_work_sync(&adev->delayed_init_work);
4198 
4199 	hive = amdgpu_get_xgmi_hive(adev, false);
4200 
4201 	/*
4202 	 * Here we trylock to avoid chain of resets executing from
4203 	 * either trigger by jobs on different adevs in XGMI hive or jobs on
4204 	 * different schedulers for same device while this TO handler is running.
4205 	 * We always reset all schedulers for device and all devices for XGMI
4206 	 * hive so that should take care of them too.
4207 	 */
4208 
4209 	if (hive && !mutex_trylock(&hive->reset_lock)) {
4210 		DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
4211 			  job ? job->base.id : -1, hive->hive_id);
4212 		return 0;
4213 	}
4214 
4215 	/* Start with adev pre asic reset first for soft reset check.*/
4216 	if (!amdgpu_device_lock_adev(adev, !hive)) {
4217 		DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
4218 			  job ? job->base.id : -1);
4219 		return 0;
4220 	}
4221 
4222 	/* Block kfd: SRIOV would do it separately */
4223 	if (!amdgpu_sriov_vf(adev))
4224                 amdgpu_amdkfd_pre_reset(adev);
4225 
4226 	/* Build list of devices to reset */
4227 	if  (adev->gmc.xgmi.num_physical_nodes > 1) {
4228 		if (!hive) {
4229 			/*unlock kfd: SRIOV would do it separately */
4230 			if (!amdgpu_sriov_vf(adev))
4231 		                amdgpu_amdkfd_post_reset(adev);
4232 			amdgpu_device_unlock_adev(adev);
4233 			return -ENODEV;
4234 		}
4235 
4236 		/*
4237 		 * In case we are in XGMI hive mode device reset is done for all the
4238 		 * nodes in the hive to retrain all XGMI links and hence the reset
4239 		 * sequence is executed in loop on all nodes.
4240 		 */
4241 		device_list_handle = &hive->device_list;
4242 	} else {
4243 		list_add_tail(&adev->gmc.xgmi.head, &device_list);
4244 		device_list_handle = &device_list;
4245 	}
4246 
4247 	/* block all schedulers and reset given job's ring */
4248 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4249 		if (tmp_adev != adev) {
4250 			amdgpu_ras_set_error_query_ready(tmp_adev, false);
4251 			amdgpu_device_lock_adev(tmp_adev, false);
4252 			if (!amdgpu_sriov_vf(tmp_adev))
4253 			                amdgpu_amdkfd_pre_reset(tmp_adev);
4254 		}
4255 
4256 		/*
4257 		 * Mark these ASICs to be reseted as untracked first
4258 		 * And add them back after reset completed
4259 		 */
4260 		amdgpu_unregister_gpu_instance(tmp_adev);
4261 
4262 		amdgpu_fbdev_set_suspend(adev, 1);
4263 
4264 		/* disable ras on ALL IPs */
4265 		if (!(in_ras_intr && !use_baco) &&
4266 		      amdgpu_device_ip_need_full_reset(tmp_adev))
4267 			amdgpu_ras_suspend(tmp_adev);
4268 
4269 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4270 			struct amdgpu_ring *ring = tmp_adev->rings[i];
4271 
4272 			if (!ring || !ring->sched.thread)
4273 				continue;
4274 
4275 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
4276 
4277 			if (in_ras_intr && !use_baco)
4278 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
4279 		}
4280 	}
4281 
4282 
4283 	if (in_ras_intr && !use_baco)
4284 		goto skip_sched_resume;
4285 
4286 	/*
4287 	 * Must check guilty signal here since after this point all old
4288 	 * HW fences are force signaled.
4289 	 *
4290 	 * job->base holds a reference to parent fence
4291 	 */
4292 	if (job && job->base.s_fence->parent &&
4293 	    dma_fence_is_signaled(job->base.s_fence->parent))
4294 		job_signaled = true;
4295 
4296 	if (job_signaled) {
4297 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4298 		goto skip_hw_reset;
4299 	}
4300 
4301 
4302 	/* Guilty job will be freed after this*/
4303 	r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);
4304 	if (r) {
4305 		/*TODO Should we stop ?*/
4306 		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4307 			  r, adev->ddev->unique);
4308 		adev->asic_reset_res = r;
4309 	}
4310 
4311 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
4312 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4313 
4314 		if (tmp_adev == adev)
4315 			continue;
4316 
4317 		r = amdgpu_device_pre_asic_reset(tmp_adev,
4318 						 NULL,
4319 						 &need_full_reset);
4320 		/*TODO Should we stop ?*/
4321 		if (r) {
4322 			DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4323 				  r, tmp_adev->ddev->unique);
4324 			tmp_adev->asic_reset_res = r;
4325 		}
4326 	}
4327 
4328 	/* Actual ASIC resets if needed.*/
4329 	/* TODO Implement XGMI hive reset logic for SRIOV */
4330 	if (amdgpu_sriov_vf(adev)) {
4331 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
4332 		if (r)
4333 			adev->asic_reset_res = r;
4334 	} else {
4335 		r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
4336 		if (r && r == -EAGAIN)
4337 			goto retry;
4338 	}
4339 
4340 skip_hw_reset:
4341 
4342 	/* Post ASIC reset for all devs .*/
4343 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4344 
4345 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4346 			struct amdgpu_ring *ring = tmp_adev->rings[i];
4347 
4348 			if (!ring || !ring->sched.thread)
4349 				continue;
4350 
4351 			/* No point to resubmit jobs if we didn't HW reset*/
4352 			if (!tmp_adev->asic_reset_res && !job_signaled)
4353 				drm_sched_resubmit_jobs(&ring->sched);
4354 
4355 			drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4356 		}
4357 
4358 		if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4359 			drm_helper_resume_force_mode(tmp_adev->ddev);
4360 		}
4361 
4362 		tmp_adev->asic_reset_res = 0;
4363 
4364 		if (r) {
4365 			/* bad news, how to tell it to userspace ? */
4366 			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
4367 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4368 		} else {
4369 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
4370 		}
4371 	}
4372 
4373 skip_sched_resume:
4374 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4375 		/*unlock kfd: SRIOV would do it separately */
4376 		if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
4377 	                amdgpu_amdkfd_post_reset(tmp_adev);
4378 		amdgpu_device_unlock_adev(tmp_adev);
4379 	}
4380 
4381 	if (hive)
4382 		mutex_unlock(&hive->reset_lock);
4383 
4384 	if (r)
4385 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
4386 	return r;
4387 }
4388 
4389 /**
4390  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4391  *
4392  * @adev: amdgpu_device pointer
4393  *
4394  * Fetchs and stores in the driver the PCIE capabilities (gen speed
4395  * and lanes) of the slot the device is in. Handles APUs and
4396  * virtualized environments where PCIE config space may not be available.
4397  */
4398 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
4399 {
4400 	struct pci_dev *pdev;
4401 	enum pci_bus_speed speed_cap, platform_speed_cap;
4402 	enum pcie_link_width platform_link_width;
4403 
4404 	if (amdgpu_pcie_gen_cap)
4405 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
4406 
4407 	if (amdgpu_pcie_lane_cap)
4408 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
4409 
4410 	/* covers APUs as well */
4411 	if (pci_is_root_bus(adev->pdev->bus)) {
4412 		if (adev->pm.pcie_gen_mask == 0)
4413 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4414 		if (adev->pm.pcie_mlw_mask == 0)
4415 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
4416 		return;
4417 	}
4418 
4419 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4420 		return;
4421 
4422 	pcie_bandwidth_available(adev->pdev, NULL,
4423 				 &platform_speed_cap, &platform_link_width);
4424 
4425 	if (adev->pm.pcie_gen_mask == 0) {
4426 		/* asic caps */
4427 		pdev = adev->pdev;
4428 		speed_cap = pcie_get_speed_cap(pdev);
4429 		if (speed_cap == PCI_SPEED_UNKNOWN) {
4430 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4431 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4432 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4433 		} else {
4434 			if (speed_cap == PCIE_SPEED_16_0GT)
4435 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4436 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4437 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4438 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4439 			else if (speed_cap == PCIE_SPEED_8_0GT)
4440 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4441 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4442 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4443 			else if (speed_cap == PCIE_SPEED_5_0GT)
4444 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4445 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4446 			else
4447 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4448 		}
4449 		/* platform caps */
4450 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
4451 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4452 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4453 		} else {
4454 			if (platform_speed_cap == PCIE_SPEED_16_0GT)
4455 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4456 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4457 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4458 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
4459 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
4460 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4461 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4462 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
4463 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
4464 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4465 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4466 			else
4467 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4468 
4469 		}
4470 	}
4471 	if (adev->pm.pcie_mlw_mask == 0) {
4472 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
4473 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4474 		} else {
4475 			switch (platform_link_width) {
4476 			case PCIE_LNK_X32:
4477 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4478 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4479 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4480 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4481 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4482 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4483 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4484 				break;
4485 			case PCIE_LNK_X16:
4486 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4487 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4488 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4489 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4490 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4491 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4492 				break;
4493 			case PCIE_LNK_X12:
4494 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4495 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4496 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4497 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4498 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4499 				break;
4500 			case PCIE_LNK_X8:
4501 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4502 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4503 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4504 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4505 				break;
4506 			case PCIE_LNK_X4:
4507 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4508 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4509 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4510 				break;
4511 			case PCIE_LNK_X2:
4512 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4513 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4514 				break;
4515 			case PCIE_LNK_X1:
4516 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4517 				break;
4518 			default:
4519 				break;
4520 			}
4521 		}
4522 	}
4523 }
4524 
4525 int amdgpu_device_baco_enter(struct drm_device *dev)
4526 {
4527 	struct amdgpu_device *adev = dev->dev_private;
4528 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
4529 
4530 	if (!amdgpu_device_supports_baco(adev->ddev))
4531 		return -ENOTSUPP;
4532 
4533 	if (ras && ras->supported)
4534 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4535 
4536 	return amdgpu_dpm_baco_enter(adev);
4537 }
4538 
4539 int amdgpu_device_baco_exit(struct drm_device *dev)
4540 {
4541 	struct amdgpu_device *adev = dev->dev_private;
4542 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
4543 	int ret = 0;
4544 
4545 	if (!amdgpu_device_supports_baco(adev->ddev))
4546 		return -ENOTSUPP;
4547 
4548 	ret = amdgpu_dpm_baco_exit(adev);
4549 	if (ret)
4550 		return ret;
4551 
4552 	if (ras && ras->supported)
4553 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4554 
4555 	return 0;
4556 }
4557