xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c (revision 3de9c42d02a79a5e09bbee7a4421ddc00cfd5c6d)
1  /*
2   * Copyright 2008 Advanced Micro Devices, Inc.
3   * Copyright 2008 Red Hat Inc.
4   * Copyright 2009 Jerome Glisse.
5   *
6   * Permission is hereby granted, free of charge, to any person obtaining a
7   * copy of this software and associated documentation files (the "Software"),
8   * to deal in the Software without restriction, including without limitation
9   * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10   * and/or sell copies of the Software, and to permit persons to whom the
11   * Software is furnished to do so, subject to the following conditions:
12   *
13   * The above copyright notice and this permission notice shall be included in
14   * all copies or substantial portions of the Software.
15   *
16   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19   * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20   * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22   * OTHER DEALINGS IN THE SOFTWARE.
23   *
24   * Authors: Dave Airlie
25   *          Alex Deucher
26   *          Jerome Glisse
27   */
28  #include <linux/power_supply.h>
29  #include <linux/kthread.h>
30  #include <linux/module.h>
31  #include <linux/console.h>
32  #include <linux/slab.h>
33  #include <linux/iommu.h>
34  #include <linux/pci.h>
35  #include <linux/pci-p2pdma.h>
36  #include <linux/apple-gmux.h>
37  
38  #include <drm/drm_aperture.h>
39  #include <drm/drm_atomic_helper.h>
40  #include <drm/drm_crtc_helper.h>
41  #include <drm/drm_fb_helper.h>
42  #include <drm/drm_probe_helper.h>
43  #include <drm/amdgpu_drm.h>
44  #include <linux/device.h>
45  #include <linux/vgaarb.h>
46  #include <linux/vga_switcheroo.h>
47  #include <linux/efi.h>
48  #include "amdgpu.h"
49  #include "amdgpu_trace.h"
50  #include "amdgpu_i2c.h"
51  #include "atom.h"
52  #include "amdgpu_atombios.h"
53  #include "amdgpu_atomfirmware.h"
54  #include "amd_pcie.h"
55  #ifdef CONFIG_DRM_AMDGPU_SI
56  #include "si.h"
57  #endif
58  #ifdef CONFIG_DRM_AMDGPU_CIK
59  #include "cik.h"
60  #endif
61  #include "vi.h"
62  #include "soc15.h"
63  #include "nv.h"
64  #include "bif/bif_4_1_d.h"
65  #include <linux/firmware.h>
66  #include "amdgpu_vf_error.h"
67  
68  #include "amdgpu_amdkfd.h"
69  #include "amdgpu_pm.h"
70  
71  #include "amdgpu_xgmi.h"
72  #include "amdgpu_ras.h"
73  #include "amdgpu_pmu.h"
74  #include "amdgpu_fru_eeprom.h"
75  #include "amdgpu_reset.h"
76  #include "amdgpu_virt.h"
77  #include "amdgpu_dev_coredump.h"
78  
79  #include <linux/suspend.h>
80  #include <drm/task_barrier.h>
81  #include <linux/pm_runtime.h>
82  
83  #include <drm/drm_drv.h>
84  
85  #if IS_ENABLED(CONFIG_X86)
86  #include <asm/intel-family.h>
87  #endif
88  
89  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
90  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
91  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
92  MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
93  MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
94  MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
95  MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
96  
97  #define AMDGPU_RESUME_MS		2000
98  #define AMDGPU_MAX_RETRY_LIMIT		2
99  #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
100  #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
101  #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
102  #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
103  
104  static const struct drm_driver amdgpu_kms_driver;
105  
106  const char *amdgpu_asic_name[] = {
107  	"TAHITI",
108  	"PITCAIRN",
109  	"VERDE",
110  	"OLAND",
111  	"HAINAN",
112  	"BONAIRE",
113  	"KAVERI",
114  	"KABINI",
115  	"HAWAII",
116  	"MULLINS",
117  	"TOPAZ",
118  	"TONGA",
119  	"FIJI",
120  	"CARRIZO",
121  	"STONEY",
122  	"POLARIS10",
123  	"POLARIS11",
124  	"POLARIS12",
125  	"VEGAM",
126  	"VEGA10",
127  	"VEGA12",
128  	"VEGA20",
129  	"RAVEN",
130  	"ARCTURUS",
131  	"RENOIR",
132  	"ALDEBARAN",
133  	"NAVI10",
134  	"CYAN_SKILLFISH",
135  	"NAVI14",
136  	"NAVI12",
137  	"SIENNA_CICHLID",
138  	"NAVY_FLOUNDER",
139  	"VANGOGH",
140  	"DIMGREY_CAVEFISH",
141  	"BEIGE_GOBY",
142  	"YELLOW_CARP",
143  	"IP DISCOVERY",
144  	"LAST",
145  };
146  
147  static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
148  
149  /**
150   * DOC: pcie_replay_count
151   *
152   * The amdgpu driver provides a sysfs API for reporting the total number
153   * of PCIe replays (NAKs)
154   * The file pcie_replay_count is used for this and returns the total
155   * number of replays as a sum of the NAKs generated and NAKs received
156   */
157  
158  static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
159  		struct device_attribute *attr, char *buf)
160  {
161  	struct drm_device *ddev = dev_get_drvdata(dev);
162  	struct amdgpu_device *adev = drm_to_adev(ddev);
163  	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
164  
165  	return sysfs_emit(buf, "%llu\n", cnt);
166  }
167  
168  static DEVICE_ATTR(pcie_replay_count, 0444,
169  		amdgpu_device_get_pcie_replay_count, NULL);
170  
171  static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
172  					  struct bin_attribute *attr, char *buf,
173  					  loff_t ppos, size_t count)
174  {
175  	struct device *dev = kobj_to_dev(kobj);
176  	struct drm_device *ddev = dev_get_drvdata(dev);
177  	struct amdgpu_device *adev = drm_to_adev(ddev);
178  	ssize_t bytes_read;
179  
180  	switch (ppos) {
181  	case AMDGPU_SYS_REG_STATE_XGMI:
182  		bytes_read = amdgpu_asic_get_reg_state(
183  			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
184  		break;
185  	case AMDGPU_SYS_REG_STATE_WAFL:
186  		bytes_read = amdgpu_asic_get_reg_state(
187  			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
188  		break;
189  	case AMDGPU_SYS_REG_STATE_PCIE:
190  		bytes_read = amdgpu_asic_get_reg_state(
191  			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
192  		break;
193  	case AMDGPU_SYS_REG_STATE_USR:
194  		bytes_read = amdgpu_asic_get_reg_state(
195  			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
196  		break;
197  	case AMDGPU_SYS_REG_STATE_USR_1:
198  		bytes_read = amdgpu_asic_get_reg_state(
199  			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
200  		break;
201  	default:
202  		return -EINVAL;
203  	}
204  
205  	return bytes_read;
206  }
207  
208  BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
209  	 AMDGPU_SYS_REG_STATE_END);
210  
211  int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
212  {
213  	int ret;
214  
215  	if (!amdgpu_asic_get_reg_state_supported(adev))
216  		return 0;
217  
218  	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
219  
220  	return ret;
221  }
222  
223  void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
224  {
225  	if (!amdgpu_asic_get_reg_state_supported(adev))
226  		return;
227  	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
228  }
229  
230  /**
231   * DOC: board_info
232   *
233   * The amdgpu driver provides a sysfs API for giving board related information.
234   * It provides the form factor information in the format
235   *
236   *   type : form factor
237   *
238   * Possible form factor values
239   *
240   * - "cem"		- PCIE CEM card
241   * - "oam"		- Open Compute Accelerator Module
242   * - "unknown"	- Not known
243   *
244   */
245  
246  static ssize_t amdgpu_device_get_board_info(struct device *dev,
247  					    struct device_attribute *attr,
248  					    char *buf)
249  {
250  	struct drm_device *ddev = dev_get_drvdata(dev);
251  	struct amdgpu_device *adev = drm_to_adev(ddev);
252  	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
253  	const char *pkg;
254  
255  	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
256  		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
257  
258  	switch (pkg_type) {
259  	case AMDGPU_PKG_TYPE_CEM:
260  		pkg = "cem";
261  		break;
262  	case AMDGPU_PKG_TYPE_OAM:
263  		pkg = "oam";
264  		break;
265  	default:
266  		pkg = "unknown";
267  		break;
268  	}
269  
270  	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
271  }
272  
273  static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
274  
275  static struct attribute *amdgpu_board_attrs[] = {
276  	&dev_attr_board_info.attr,
277  	NULL,
278  };
279  
280  static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
281  					     struct attribute *attr, int n)
282  {
283  	struct device *dev = kobj_to_dev(kobj);
284  	struct drm_device *ddev = dev_get_drvdata(dev);
285  	struct amdgpu_device *adev = drm_to_adev(ddev);
286  
287  	if (adev->flags & AMD_IS_APU)
288  		return 0;
289  
290  	return attr->mode;
291  }
292  
293  static const struct attribute_group amdgpu_board_attrs_group = {
294  	.attrs = amdgpu_board_attrs,
295  	.is_visible = amdgpu_board_attrs_is_visible
296  };
297  
298  static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
299  
300  
301  /**
302   * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
303   *
304   * @dev: drm_device pointer
305   *
306   * Returns true if the device is a dGPU with ATPX power control,
307   * otherwise return false.
308   */
309  bool amdgpu_device_supports_px(struct drm_device *dev)
310  {
311  	struct amdgpu_device *adev = drm_to_adev(dev);
312  
313  	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
314  		return true;
315  	return false;
316  }
317  
318  /**
319   * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
320   *
321   * @dev: drm_device pointer
322   *
323   * Returns true if the device is a dGPU with ACPI power control,
324   * otherwise return false.
325   */
326  bool amdgpu_device_supports_boco(struct drm_device *dev)
327  {
328  	struct amdgpu_device *adev = drm_to_adev(dev);
329  
330  	if (adev->has_pr3 ||
331  	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
332  		return true;
333  	return false;
334  }
335  
336  /**
337   * amdgpu_device_supports_baco - Does the device support BACO
338   *
339   * @dev: drm_device pointer
340   *
341   * Return:
342   * 1 if the device supporte BACO;
343   * 3 if the device support MACO (only works if BACO is supported)
344   * otherwise return 0.
345   */
346  int amdgpu_device_supports_baco(struct drm_device *dev)
347  {
348  	struct amdgpu_device *adev = drm_to_adev(dev);
349  
350  	return amdgpu_asic_supports_baco(adev);
351  }
352  
353  void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
354  {
355  	struct drm_device *dev;
356  	int bamaco_support;
357  
358  	dev = adev_to_drm(adev);
359  
360  	adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
361  	bamaco_support = amdgpu_device_supports_baco(dev);
362  
363  	switch (amdgpu_runtime_pm) {
364  	case 2:
365  		if (bamaco_support & MACO_SUPPORT) {
366  			adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
367  			dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
368  		} else if (bamaco_support == BACO_SUPPORT) {
369  			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
370  			dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
371  		}
372  		break;
373  	case 1:
374  		if (bamaco_support & BACO_SUPPORT) {
375  			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
376  			dev_info(adev->dev, "Forcing BACO for runtime pm\n");
377  		}
378  		break;
379  	case -1:
380  	case -2:
381  		if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */
382  			adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
383  			dev_info(adev->dev, "Using ATPX for runtime pm\n");
384  		} else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */
385  			adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
386  			dev_info(adev->dev, "Using BOCO for runtime pm\n");
387  		} else {
388  			if (!bamaco_support)
389  				goto no_runtime_pm;
390  
391  			switch (adev->asic_type) {
392  			case CHIP_VEGA20:
393  			case CHIP_ARCTURUS:
394  				/* BACO are not supported on vega20 and arctrus */
395  				break;
396  			case CHIP_VEGA10:
397  				/* enable BACO as runpm mode if noretry=0 */
398  				if (!adev->gmc.noretry)
399  					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
400  				break;
401  			default:
402  				/* enable BACO as runpm mode on CI+ */
403  				adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
404  				break;
405  			}
406  
407  			if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
408  				if (bamaco_support & MACO_SUPPORT) {
409  					adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
410  					dev_info(adev->dev, "Using BAMACO for runtime pm\n");
411  				} else {
412  					dev_info(adev->dev, "Using BACO for runtime pm\n");
413  				}
414  			}
415  		}
416  		break;
417  	case 0:
418  		dev_info(adev->dev, "runtime pm is manually disabled\n");
419  		break;
420  	default:
421  		break;
422  	}
423  
424  no_runtime_pm:
425  	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
426  		dev_info(adev->dev, "Runtime PM not available\n");
427  }
428  /**
429   * amdgpu_device_supports_smart_shift - Is the device dGPU with
430   * smart shift support
431   *
432   * @dev: drm_device pointer
433   *
434   * Returns true if the device is a dGPU with Smart Shift support,
435   * otherwise returns false.
436   */
437  bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
438  {
439  	return (amdgpu_device_supports_boco(dev) &&
440  		amdgpu_acpi_is_power_shift_control_supported());
441  }
442  
443  /*
444   * VRAM access helper functions
445   */
446  
447  /**
448   * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
449   *
450   * @adev: amdgpu_device pointer
451   * @pos: offset of the buffer in vram
452   * @buf: virtual address of the buffer in system memory
453   * @size: read/write size, sizeof(@buf) must > @size
454   * @write: true - write to vram, otherwise - read from vram
455   */
456  void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
457  			     void *buf, size_t size, bool write)
458  {
459  	unsigned long flags;
460  	uint32_t hi = ~0, tmp = 0;
461  	uint32_t *data = buf;
462  	uint64_t last;
463  	int idx;
464  
465  	if (!drm_dev_enter(adev_to_drm(adev), &idx))
466  		return;
467  
468  	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
469  
470  	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
471  	for (last = pos + size; pos < last; pos += 4) {
472  		tmp = pos >> 31;
473  
474  		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
475  		if (tmp != hi) {
476  			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
477  			hi = tmp;
478  		}
479  		if (write)
480  			WREG32_NO_KIQ(mmMM_DATA, *data++);
481  		else
482  			*data++ = RREG32_NO_KIQ(mmMM_DATA);
483  	}
484  
485  	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
486  	drm_dev_exit(idx);
487  }
488  
489  /**
490   * amdgpu_device_aper_access - access vram by vram aperature
491   *
492   * @adev: amdgpu_device pointer
493   * @pos: offset of the buffer in vram
494   * @buf: virtual address of the buffer in system memory
495   * @size: read/write size, sizeof(@buf) must > @size
496   * @write: true - write to vram, otherwise - read from vram
497   *
498   * The return value means how many bytes have been transferred.
499   */
500  size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
501  				 void *buf, size_t size, bool write)
502  {
503  #ifdef CONFIG_64BIT
504  	void __iomem *addr;
505  	size_t count = 0;
506  	uint64_t last;
507  
508  	if (!adev->mman.aper_base_kaddr)
509  		return 0;
510  
511  	last = min(pos + size, adev->gmc.visible_vram_size);
512  	if (last > pos) {
513  		addr = adev->mman.aper_base_kaddr + pos;
514  		count = last - pos;
515  
516  		if (write) {
517  			memcpy_toio(addr, buf, count);
518  			/* Make sure HDP write cache flush happens without any reordering
519  			 * after the system memory contents are sent over PCIe device
520  			 */
521  			mb();
522  			amdgpu_device_flush_hdp(adev, NULL);
523  		} else {
524  			amdgpu_device_invalidate_hdp(adev, NULL);
525  			/* Make sure HDP read cache is invalidated before issuing a read
526  			 * to the PCIe device
527  			 */
528  			mb();
529  			memcpy_fromio(buf, addr, count);
530  		}
531  
532  	}
533  
534  	return count;
535  #else
536  	return 0;
537  #endif
538  }
539  
540  /**
541   * amdgpu_device_vram_access - read/write a buffer in vram
542   *
543   * @adev: amdgpu_device pointer
544   * @pos: offset of the buffer in vram
545   * @buf: virtual address of the buffer in system memory
546   * @size: read/write size, sizeof(@buf) must > @size
547   * @write: true - write to vram, otherwise - read from vram
548   */
549  void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
550  			       void *buf, size_t size, bool write)
551  {
552  	size_t count;
553  
554  	/* try to using vram apreature to access vram first */
555  	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
556  	size -= count;
557  	if (size) {
558  		/* using MM to access rest vram */
559  		pos += count;
560  		buf += count;
561  		amdgpu_device_mm_access(adev, pos, buf, size, write);
562  	}
563  }
564  
565  /*
566   * register access helper functions.
567   */
568  
569  /* Check if hw access should be skipped because of hotplug or device error */
570  bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
571  {
572  	if (adev->no_hw_access)
573  		return true;
574  
575  #ifdef CONFIG_LOCKDEP
576  	/*
577  	 * This is a bit complicated to understand, so worth a comment. What we assert
578  	 * here is that the GPU reset is not running on another thread in parallel.
579  	 *
580  	 * For this we trylock the read side of the reset semaphore, if that succeeds
581  	 * we know that the reset is not running in paralell.
582  	 *
583  	 * If the trylock fails we assert that we are either already holding the read
584  	 * side of the lock or are the reset thread itself and hold the write side of
585  	 * the lock.
586  	 */
587  	if (in_task()) {
588  		if (down_read_trylock(&adev->reset_domain->sem))
589  			up_read(&adev->reset_domain->sem);
590  		else
591  			lockdep_assert_held(&adev->reset_domain->sem);
592  	}
593  #endif
594  	return false;
595  }
596  
597  /**
598   * amdgpu_device_rreg - read a memory mapped IO or indirect register
599   *
600   * @adev: amdgpu_device pointer
601   * @reg: dword aligned register offset
602   * @acc_flags: access flags which require special behavior
603   *
604   * Returns the 32 bit value from the offset specified.
605   */
606  uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
607  			    uint32_t reg, uint32_t acc_flags)
608  {
609  	uint32_t ret;
610  
611  	if (amdgpu_device_skip_hw_access(adev))
612  		return 0;
613  
614  	if ((reg * 4) < adev->rmmio_size) {
615  		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
616  		    amdgpu_sriov_runtime(adev) &&
617  		    down_read_trylock(&adev->reset_domain->sem)) {
618  			ret = amdgpu_kiq_rreg(adev, reg, 0);
619  			up_read(&adev->reset_domain->sem);
620  		} else {
621  			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
622  		}
623  	} else {
624  		ret = adev->pcie_rreg(adev, reg * 4);
625  	}
626  
627  	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
628  
629  	return ret;
630  }
631  
632  /*
633   * MMIO register read with bytes helper functions
634   * @offset:bytes offset from MMIO start
635   */
636  
637  /**
638   * amdgpu_mm_rreg8 - read a memory mapped IO register
639   *
640   * @adev: amdgpu_device pointer
641   * @offset: byte aligned register offset
642   *
643   * Returns the 8 bit value from the offset specified.
644   */
645  uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
646  {
647  	if (amdgpu_device_skip_hw_access(adev))
648  		return 0;
649  
650  	if (offset < adev->rmmio_size)
651  		return (readb(adev->rmmio + offset));
652  	BUG();
653  }
654  
655  
656  /**
657   * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
658   *
659   * @adev: amdgpu_device pointer
660   * @reg: dword aligned register offset
661   * @acc_flags: access flags which require special behavior
662   * @xcc_id: xcc accelerated compute core id
663   *
664   * Returns the 32 bit value from the offset specified.
665   */
666  uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
667  				uint32_t reg, uint32_t acc_flags,
668  				uint32_t xcc_id)
669  {
670  	uint32_t ret, rlcg_flag;
671  
672  	if (amdgpu_device_skip_hw_access(adev))
673  		return 0;
674  
675  	if ((reg * 4) < adev->rmmio_size) {
676  		if (amdgpu_sriov_vf(adev) &&
677  		    !amdgpu_sriov_runtime(adev) &&
678  		    adev->gfx.rlc.rlcg_reg_access_supported &&
679  		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
680  							 GC_HWIP, false,
681  							 &rlcg_flag)) {
682  			ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id);
683  		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
684  		    amdgpu_sriov_runtime(adev) &&
685  		    down_read_trylock(&adev->reset_domain->sem)) {
686  			ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
687  			up_read(&adev->reset_domain->sem);
688  		} else {
689  			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
690  		}
691  	} else {
692  		ret = adev->pcie_rreg(adev, reg * 4);
693  	}
694  
695  	return ret;
696  }
697  
698  /*
699   * MMIO register write with bytes helper functions
700   * @offset:bytes offset from MMIO start
701   * @value: the value want to be written to the register
702   */
703  
704  /**
705   * amdgpu_mm_wreg8 - read a memory mapped IO register
706   *
707   * @adev: amdgpu_device pointer
708   * @offset: byte aligned register offset
709   * @value: 8 bit value to write
710   *
711   * Writes the value specified to the offset specified.
712   */
713  void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
714  {
715  	if (amdgpu_device_skip_hw_access(adev))
716  		return;
717  
718  	if (offset < adev->rmmio_size)
719  		writeb(value, adev->rmmio + offset);
720  	else
721  		BUG();
722  }
723  
724  /**
725   * amdgpu_device_wreg - write to a memory mapped IO or indirect register
726   *
727   * @adev: amdgpu_device pointer
728   * @reg: dword aligned register offset
729   * @v: 32 bit value to write to the register
730   * @acc_flags: access flags which require special behavior
731   *
732   * Writes the value specified to the offset specified.
733   */
734  void amdgpu_device_wreg(struct amdgpu_device *adev,
735  			uint32_t reg, uint32_t v,
736  			uint32_t acc_flags)
737  {
738  	if (amdgpu_device_skip_hw_access(adev))
739  		return;
740  
741  	if ((reg * 4) < adev->rmmio_size) {
742  		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
743  		    amdgpu_sriov_runtime(adev) &&
744  		    down_read_trylock(&adev->reset_domain->sem)) {
745  			amdgpu_kiq_wreg(adev, reg, v, 0);
746  			up_read(&adev->reset_domain->sem);
747  		} else {
748  			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
749  		}
750  	} else {
751  		adev->pcie_wreg(adev, reg * 4, v);
752  	}
753  
754  	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
755  }
756  
757  /**
758   * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
759   *
760   * @adev: amdgpu_device pointer
761   * @reg: mmio/rlc register
762   * @v: value to write
763   * @xcc_id: xcc accelerated compute core id
764   *
765   * this function is invoked only for the debugfs register access
766   */
767  void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
768  			     uint32_t reg, uint32_t v,
769  			     uint32_t xcc_id)
770  {
771  	if (amdgpu_device_skip_hw_access(adev))
772  		return;
773  
774  	if (amdgpu_sriov_fullaccess(adev) &&
775  	    adev->gfx.rlc.funcs &&
776  	    adev->gfx.rlc.funcs->is_rlcg_access_range) {
777  		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
778  			return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
779  	} else if ((reg * 4) >= adev->rmmio_size) {
780  		adev->pcie_wreg(adev, reg * 4, v);
781  	} else {
782  		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
783  	}
784  }
785  
786  /**
787   * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
788   *
789   * @adev: amdgpu_device pointer
790   * @reg: dword aligned register offset
791   * @v: 32 bit value to write to the register
792   * @acc_flags: access flags which require special behavior
793   * @xcc_id: xcc accelerated compute core id
794   *
795   * Writes the value specified to the offset specified.
796   */
797  void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
798  			uint32_t reg, uint32_t v,
799  			uint32_t acc_flags, uint32_t xcc_id)
800  {
801  	uint32_t rlcg_flag;
802  
803  	if (amdgpu_device_skip_hw_access(adev))
804  		return;
805  
806  	if ((reg * 4) < adev->rmmio_size) {
807  		if (amdgpu_sriov_vf(adev) &&
808  		    !amdgpu_sriov_runtime(adev) &&
809  		    adev->gfx.rlc.rlcg_reg_access_supported &&
810  		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
811  							 GC_HWIP, true,
812  							 &rlcg_flag)) {
813  			amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id);
814  		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
815  		    amdgpu_sriov_runtime(adev) &&
816  		    down_read_trylock(&adev->reset_domain->sem)) {
817  			amdgpu_kiq_wreg(adev, reg, v, xcc_id);
818  			up_read(&adev->reset_domain->sem);
819  		} else {
820  			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
821  		}
822  	} else {
823  		adev->pcie_wreg(adev, reg * 4, v);
824  	}
825  }
826  
827  /**
828   * amdgpu_device_indirect_rreg - read an indirect register
829   *
830   * @adev: amdgpu_device pointer
831   * @reg_addr: indirect register address to read from
832   *
833   * Returns the value of indirect register @reg_addr
834   */
835  u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
836  				u32 reg_addr)
837  {
838  	unsigned long flags, pcie_index, pcie_data;
839  	void __iomem *pcie_index_offset;
840  	void __iomem *pcie_data_offset;
841  	u32 r;
842  
843  	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
844  	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
845  
846  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
847  	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
848  	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
849  
850  	writel(reg_addr, pcie_index_offset);
851  	readl(pcie_index_offset);
852  	r = readl(pcie_data_offset);
853  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
854  
855  	return r;
856  }
857  
858  u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
859  				    u64 reg_addr)
860  {
861  	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
862  	u32 r;
863  	void __iomem *pcie_index_offset;
864  	void __iomem *pcie_index_hi_offset;
865  	void __iomem *pcie_data_offset;
866  
867  	if (unlikely(!adev->nbio.funcs)) {
868  		pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
869  		pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
870  	} else {
871  		pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
872  		pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
873  	}
874  
875  	if (reg_addr >> 32) {
876  		if (unlikely(!adev->nbio.funcs))
877  			pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
878  		else
879  			pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
880  	} else {
881  		pcie_index_hi = 0;
882  	}
883  
884  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
885  	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
886  	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
887  	if (pcie_index_hi != 0)
888  		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
889  				pcie_index_hi * 4;
890  
891  	writel(reg_addr, pcie_index_offset);
892  	readl(pcie_index_offset);
893  	if (pcie_index_hi != 0) {
894  		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
895  		readl(pcie_index_hi_offset);
896  	}
897  	r = readl(pcie_data_offset);
898  
899  	/* clear the high bits */
900  	if (pcie_index_hi != 0) {
901  		writel(0, pcie_index_hi_offset);
902  		readl(pcie_index_hi_offset);
903  	}
904  
905  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
906  
907  	return r;
908  }
909  
910  /**
911   * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
912   *
913   * @adev: amdgpu_device pointer
914   * @reg_addr: indirect register address to read from
915   *
916   * Returns the value of indirect register @reg_addr
917   */
918  u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
919  				  u32 reg_addr)
920  {
921  	unsigned long flags, pcie_index, pcie_data;
922  	void __iomem *pcie_index_offset;
923  	void __iomem *pcie_data_offset;
924  	u64 r;
925  
926  	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
927  	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
928  
929  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
930  	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
931  	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
932  
933  	/* read low 32 bits */
934  	writel(reg_addr, pcie_index_offset);
935  	readl(pcie_index_offset);
936  	r = readl(pcie_data_offset);
937  	/* read high 32 bits */
938  	writel(reg_addr + 4, pcie_index_offset);
939  	readl(pcie_index_offset);
940  	r |= ((u64)readl(pcie_data_offset) << 32);
941  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
942  
943  	return r;
944  }
945  
946  u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
947  				  u64 reg_addr)
948  {
949  	unsigned long flags, pcie_index, pcie_data;
950  	unsigned long pcie_index_hi = 0;
951  	void __iomem *pcie_index_offset;
952  	void __iomem *pcie_index_hi_offset;
953  	void __iomem *pcie_data_offset;
954  	u64 r;
955  
956  	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
957  	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
958  	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
959  		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
960  
961  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
962  	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
963  	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
964  	if (pcie_index_hi != 0)
965  		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
966  			pcie_index_hi * 4;
967  
968  	/* read low 32 bits */
969  	writel(reg_addr, pcie_index_offset);
970  	readl(pcie_index_offset);
971  	if (pcie_index_hi != 0) {
972  		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
973  		readl(pcie_index_hi_offset);
974  	}
975  	r = readl(pcie_data_offset);
976  	/* read high 32 bits */
977  	writel(reg_addr + 4, pcie_index_offset);
978  	readl(pcie_index_offset);
979  	if (pcie_index_hi != 0) {
980  		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
981  		readl(pcie_index_hi_offset);
982  	}
983  	r |= ((u64)readl(pcie_data_offset) << 32);
984  
985  	/* clear the high bits */
986  	if (pcie_index_hi != 0) {
987  		writel(0, pcie_index_hi_offset);
988  		readl(pcie_index_hi_offset);
989  	}
990  
991  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
992  
993  	return r;
994  }
995  
996  /**
997   * amdgpu_device_indirect_wreg - write an indirect register address
998   *
999   * @adev: amdgpu_device pointer
1000   * @reg_addr: indirect register offset
1001   * @reg_data: indirect register data
1002   *
1003   */
1004  void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1005  				 u32 reg_addr, u32 reg_data)
1006  {
1007  	unsigned long flags, pcie_index, pcie_data;
1008  	void __iomem *pcie_index_offset;
1009  	void __iomem *pcie_data_offset;
1010  
1011  	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1012  	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1013  
1014  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1015  	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1016  	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1017  
1018  	writel(reg_addr, pcie_index_offset);
1019  	readl(pcie_index_offset);
1020  	writel(reg_data, pcie_data_offset);
1021  	readl(pcie_data_offset);
1022  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1023  }
1024  
1025  void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
1026  				     u64 reg_addr, u32 reg_data)
1027  {
1028  	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
1029  	void __iomem *pcie_index_offset;
1030  	void __iomem *pcie_index_hi_offset;
1031  	void __iomem *pcie_data_offset;
1032  
1033  	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1034  	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1035  	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1036  		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1037  	else
1038  		pcie_index_hi = 0;
1039  
1040  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1041  	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1042  	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1043  	if (pcie_index_hi != 0)
1044  		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1045  				pcie_index_hi * 4;
1046  
1047  	writel(reg_addr, pcie_index_offset);
1048  	readl(pcie_index_offset);
1049  	if (pcie_index_hi != 0) {
1050  		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1051  		readl(pcie_index_hi_offset);
1052  	}
1053  	writel(reg_data, pcie_data_offset);
1054  	readl(pcie_data_offset);
1055  
1056  	/* clear the high bits */
1057  	if (pcie_index_hi != 0) {
1058  		writel(0, pcie_index_hi_offset);
1059  		readl(pcie_index_hi_offset);
1060  	}
1061  
1062  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1063  }
1064  
1065  /**
1066   * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
1067   *
1068   * @adev: amdgpu_device pointer
1069   * @reg_addr: indirect register offset
1070   * @reg_data: indirect register data
1071   *
1072   */
1073  void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1074  				   u32 reg_addr, u64 reg_data)
1075  {
1076  	unsigned long flags, pcie_index, pcie_data;
1077  	void __iomem *pcie_index_offset;
1078  	void __iomem *pcie_data_offset;
1079  
1080  	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1081  	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1082  
1083  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1084  	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1085  	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1086  
1087  	/* write low 32 bits */
1088  	writel(reg_addr, pcie_index_offset);
1089  	readl(pcie_index_offset);
1090  	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1091  	readl(pcie_data_offset);
1092  	/* write high 32 bits */
1093  	writel(reg_addr + 4, pcie_index_offset);
1094  	readl(pcie_index_offset);
1095  	writel((u32)(reg_data >> 32), pcie_data_offset);
1096  	readl(pcie_data_offset);
1097  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1098  }
1099  
1100  void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1101  				   u64 reg_addr, u64 reg_data)
1102  {
1103  	unsigned long flags, pcie_index, pcie_data;
1104  	unsigned long pcie_index_hi = 0;
1105  	void __iomem *pcie_index_offset;
1106  	void __iomem *pcie_index_hi_offset;
1107  	void __iomem *pcie_data_offset;
1108  
1109  	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1110  	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1111  	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1112  		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1113  
1114  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1115  	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1116  	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1117  	if (pcie_index_hi != 0)
1118  		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1119  				pcie_index_hi * 4;
1120  
1121  	/* write low 32 bits */
1122  	writel(reg_addr, pcie_index_offset);
1123  	readl(pcie_index_offset);
1124  	if (pcie_index_hi != 0) {
1125  		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1126  		readl(pcie_index_hi_offset);
1127  	}
1128  	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1129  	readl(pcie_data_offset);
1130  	/* write high 32 bits */
1131  	writel(reg_addr + 4, pcie_index_offset);
1132  	readl(pcie_index_offset);
1133  	if (pcie_index_hi != 0) {
1134  		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1135  		readl(pcie_index_hi_offset);
1136  	}
1137  	writel((u32)(reg_data >> 32), pcie_data_offset);
1138  	readl(pcie_data_offset);
1139  
1140  	/* clear the high bits */
1141  	if (pcie_index_hi != 0) {
1142  		writel(0, pcie_index_hi_offset);
1143  		readl(pcie_index_hi_offset);
1144  	}
1145  
1146  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1147  }
1148  
1149  /**
1150   * amdgpu_device_get_rev_id - query device rev_id
1151   *
1152   * @adev: amdgpu_device pointer
1153   *
1154   * Return device rev_id
1155   */
1156  u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1157  {
1158  	return adev->nbio.funcs->get_rev_id(adev);
1159  }
1160  
1161  /**
1162   * amdgpu_invalid_rreg - dummy reg read function
1163   *
1164   * @adev: amdgpu_device pointer
1165   * @reg: offset of register
1166   *
1167   * Dummy register read function.  Used for register blocks
1168   * that certain asics don't have (all asics).
1169   * Returns the value in the register.
1170   */
1171  static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1172  {
1173  	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1174  	BUG();
1175  	return 0;
1176  }
1177  
1178  static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1179  {
1180  	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1181  	BUG();
1182  	return 0;
1183  }
1184  
1185  /**
1186   * amdgpu_invalid_wreg - dummy reg write function
1187   *
1188   * @adev: amdgpu_device pointer
1189   * @reg: offset of register
1190   * @v: value to write to the register
1191   *
1192   * Dummy register read function.  Used for register blocks
1193   * that certain asics don't have (all asics).
1194   */
1195  static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1196  {
1197  	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1198  		  reg, v);
1199  	BUG();
1200  }
1201  
1202  static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1203  {
1204  	DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1205  		  reg, v);
1206  	BUG();
1207  }
1208  
1209  /**
1210   * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1211   *
1212   * @adev: amdgpu_device pointer
1213   * @reg: offset of register
1214   *
1215   * Dummy register read function.  Used for register blocks
1216   * that certain asics don't have (all asics).
1217   * Returns the value in the register.
1218   */
1219  static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1220  {
1221  	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1222  	BUG();
1223  	return 0;
1224  }
1225  
1226  static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1227  {
1228  	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1229  	BUG();
1230  	return 0;
1231  }
1232  
1233  /**
1234   * amdgpu_invalid_wreg64 - dummy reg write function
1235   *
1236   * @adev: amdgpu_device pointer
1237   * @reg: offset of register
1238   * @v: value to write to the register
1239   *
1240   * Dummy register read function.  Used for register blocks
1241   * that certain asics don't have (all asics).
1242   */
1243  static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1244  {
1245  	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1246  		  reg, v);
1247  	BUG();
1248  }
1249  
1250  static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1251  {
1252  	DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1253  		  reg, v);
1254  	BUG();
1255  }
1256  
1257  /**
1258   * amdgpu_block_invalid_rreg - dummy reg read function
1259   *
1260   * @adev: amdgpu_device pointer
1261   * @block: offset of instance
1262   * @reg: offset of register
1263   *
1264   * Dummy register read function.  Used for register blocks
1265   * that certain asics don't have (all asics).
1266   * Returns the value in the register.
1267   */
1268  static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1269  					  uint32_t block, uint32_t reg)
1270  {
1271  	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1272  		  reg, block);
1273  	BUG();
1274  	return 0;
1275  }
1276  
1277  /**
1278   * amdgpu_block_invalid_wreg - dummy reg write function
1279   *
1280   * @adev: amdgpu_device pointer
1281   * @block: offset of instance
1282   * @reg: offset of register
1283   * @v: value to write to the register
1284   *
1285   * Dummy register read function.  Used for register blocks
1286   * that certain asics don't have (all asics).
1287   */
1288  static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1289  				      uint32_t block,
1290  				      uint32_t reg, uint32_t v)
1291  {
1292  	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1293  		  reg, block, v);
1294  	BUG();
1295  }
1296  
1297  /**
1298   * amdgpu_device_asic_init - Wrapper for atom asic_init
1299   *
1300   * @adev: amdgpu_device pointer
1301   *
1302   * Does any asic specific work and then calls atom asic init.
1303   */
1304  static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1305  {
1306  	int ret;
1307  
1308  	amdgpu_asic_pre_asic_init(adev);
1309  
1310  	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1311  	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
1312  		amdgpu_psp_wait_for_bootloader(adev);
1313  		ret = amdgpu_atomfirmware_asic_init(adev, true);
1314  		return ret;
1315  	} else {
1316  		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
1317  	}
1318  
1319  	return 0;
1320  }
1321  
1322  /**
1323   * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1324   *
1325   * @adev: amdgpu_device pointer
1326   *
1327   * Allocates a scratch page of VRAM for use by various things in the
1328   * driver.
1329   */
1330  static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1331  {
1332  	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1333  				       AMDGPU_GEM_DOMAIN_VRAM |
1334  				       AMDGPU_GEM_DOMAIN_GTT,
1335  				       &adev->mem_scratch.robj,
1336  				       &adev->mem_scratch.gpu_addr,
1337  				       (void **)&adev->mem_scratch.ptr);
1338  }
1339  
1340  /**
1341   * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1342   *
1343   * @adev: amdgpu_device pointer
1344   *
1345   * Frees the VRAM scratch page.
1346   */
1347  static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1348  {
1349  	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
1350  }
1351  
1352  /**
1353   * amdgpu_device_program_register_sequence - program an array of registers.
1354   *
1355   * @adev: amdgpu_device pointer
1356   * @registers: pointer to the register array
1357   * @array_size: size of the register array
1358   *
1359   * Programs an array or registers with and or masks.
1360   * This is a helper for setting golden registers.
1361   */
1362  void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1363  					     const u32 *registers,
1364  					     const u32 array_size)
1365  {
1366  	u32 tmp, reg, and_mask, or_mask;
1367  	int i;
1368  
1369  	if (array_size % 3)
1370  		return;
1371  
1372  	for (i = 0; i < array_size; i += 3) {
1373  		reg = registers[i + 0];
1374  		and_mask = registers[i + 1];
1375  		or_mask = registers[i + 2];
1376  
1377  		if (and_mask == 0xffffffff) {
1378  			tmp = or_mask;
1379  		} else {
1380  			tmp = RREG32(reg);
1381  			tmp &= ~and_mask;
1382  			if (adev->family >= AMDGPU_FAMILY_AI)
1383  				tmp |= (or_mask & and_mask);
1384  			else
1385  				tmp |= or_mask;
1386  		}
1387  		WREG32(reg, tmp);
1388  	}
1389  }
1390  
1391  /**
1392   * amdgpu_device_pci_config_reset - reset the GPU
1393   *
1394   * @adev: amdgpu_device pointer
1395   *
1396   * Resets the GPU using the pci config reset sequence.
1397   * Only applicable to asics prior to vega10.
1398   */
1399  void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1400  {
1401  	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1402  }
1403  
1404  /**
1405   * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1406   *
1407   * @adev: amdgpu_device pointer
1408   *
1409   * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1410   */
1411  int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1412  {
1413  	return pci_reset_function(adev->pdev);
1414  }
1415  
1416  /*
1417   * amdgpu_device_wb_*()
1418   * Writeback is the method by which the GPU updates special pages in memory
1419   * with the status of certain GPU events (fences, ring pointers,etc.).
1420   */
1421  
1422  /**
1423   * amdgpu_device_wb_fini - Disable Writeback and free memory
1424   *
1425   * @adev: amdgpu_device pointer
1426   *
1427   * Disables Writeback and frees the Writeback memory (all asics).
1428   * Used at driver shutdown.
1429   */
1430  static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1431  {
1432  	if (adev->wb.wb_obj) {
1433  		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1434  				      &adev->wb.gpu_addr,
1435  				      (void **)&adev->wb.wb);
1436  		adev->wb.wb_obj = NULL;
1437  	}
1438  }
1439  
1440  /**
1441   * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1442   *
1443   * @adev: amdgpu_device pointer
1444   *
1445   * Initializes writeback and allocates writeback memory (all asics).
1446   * Used at driver startup.
1447   * Returns 0 on success or an -error on failure.
1448   */
1449  static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1450  {
1451  	int r;
1452  
1453  	if (adev->wb.wb_obj == NULL) {
1454  		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1455  		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1456  					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1457  					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1458  					    (void **)&adev->wb.wb);
1459  		if (r) {
1460  			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1461  			return r;
1462  		}
1463  
1464  		adev->wb.num_wb = AMDGPU_MAX_WB;
1465  		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1466  
1467  		/* clear wb memory */
1468  		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1469  	}
1470  
1471  	return 0;
1472  }
1473  
1474  /**
1475   * amdgpu_device_wb_get - Allocate a wb entry
1476   *
1477   * @adev: amdgpu_device pointer
1478   * @wb: wb index
1479   *
1480   * Allocate a wb slot for use by the driver (all asics).
1481   * Returns 0 on success or -EINVAL on failure.
1482   */
1483  int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1484  {
1485  	unsigned long flags, offset;
1486  
1487  	spin_lock_irqsave(&adev->wb.lock, flags);
1488  	offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1489  	if (offset < adev->wb.num_wb) {
1490  		__set_bit(offset, adev->wb.used);
1491  		spin_unlock_irqrestore(&adev->wb.lock, flags);
1492  		*wb = offset << 3; /* convert to dw offset */
1493  		return 0;
1494  	} else {
1495  		spin_unlock_irqrestore(&adev->wb.lock, flags);
1496  		return -EINVAL;
1497  	}
1498  }
1499  
1500  /**
1501   * amdgpu_device_wb_free - Free a wb entry
1502   *
1503   * @adev: amdgpu_device pointer
1504   * @wb: wb index
1505   *
1506   * Free a wb slot allocated for use by the driver (all asics)
1507   */
1508  void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1509  {
1510  	unsigned long flags;
1511  
1512  	wb >>= 3;
1513  	spin_lock_irqsave(&adev->wb.lock, flags);
1514  	if (wb < adev->wb.num_wb)
1515  		__clear_bit(wb, adev->wb.used);
1516  	spin_unlock_irqrestore(&adev->wb.lock, flags);
1517  }
1518  
1519  /**
1520   * amdgpu_device_resize_fb_bar - try to resize FB BAR
1521   *
1522   * @adev: amdgpu_device pointer
1523   *
1524   * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1525   * to fail, but if any of the BARs is not accessible after the size we abort
1526   * driver loading by returning -ENODEV.
1527   */
1528  int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1529  {
1530  	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1531  	struct pci_bus *root;
1532  	struct resource *res;
1533  	unsigned int i;
1534  	u16 cmd;
1535  	int r;
1536  
1537  	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1538  		return 0;
1539  
1540  	/* Bypass for VF */
1541  	if (amdgpu_sriov_vf(adev))
1542  		return 0;
1543  
1544  	/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1545  	if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1546  		DRM_WARN("System can't access extended configuration space, please check!!\n");
1547  
1548  	/* skip if the bios has already enabled large BAR */
1549  	if (adev->gmc.real_vram_size &&
1550  	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1551  		return 0;
1552  
1553  	/* Check if the root BUS has 64bit memory resources */
1554  	root = adev->pdev->bus;
1555  	while (root->parent)
1556  		root = root->parent;
1557  
1558  	pci_bus_for_each_resource(root, res, i) {
1559  		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1560  		    res->start > 0x100000000ull)
1561  			break;
1562  	}
1563  
1564  	/* Trying to resize is pointless without a root hub window above 4GB */
1565  	if (!res)
1566  		return 0;
1567  
1568  	/* Limit the BAR size to what is available */
1569  	rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1570  			rbar_size);
1571  
1572  	/* Disable memory decoding while we change the BAR addresses and size */
1573  	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1574  	pci_write_config_word(adev->pdev, PCI_COMMAND,
1575  			      cmd & ~PCI_COMMAND_MEMORY);
1576  
1577  	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
1578  	amdgpu_doorbell_fini(adev);
1579  	if (adev->asic_type >= CHIP_BONAIRE)
1580  		pci_release_resource(adev->pdev, 2);
1581  
1582  	pci_release_resource(adev->pdev, 0);
1583  
1584  	r = pci_resize_resource(adev->pdev, 0, rbar_size);
1585  	if (r == -ENOSPC)
1586  		DRM_INFO("Not enough PCI address space for a large BAR.");
1587  	else if (r && r != -ENOTSUPP)
1588  		DRM_ERROR("Problem resizing BAR0 (%d).", r);
1589  
1590  	pci_assign_unassigned_bus_resources(adev->pdev->bus);
1591  
1592  	/* When the doorbell or fb BAR isn't available we have no chance of
1593  	 * using the device.
1594  	 */
1595  	r = amdgpu_doorbell_init(adev);
1596  	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1597  		return -ENODEV;
1598  
1599  	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1600  
1601  	return 0;
1602  }
1603  
1604  static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1605  {
1606  	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1607  		return false;
1608  
1609  	return true;
1610  }
1611  
1612  /*
1613   * GPU helpers function.
1614   */
1615  /**
1616   * amdgpu_device_need_post - check if the hw need post or not
1617   *
1618   * @adev: amdgpu_device pointer
1619   *
1620   * Check if the asic has been initialized (all asics) at driver startup
1621   * or post is needed if  hw reset is performed.
1622   * Returns true if need or false if not.
1623   */
1624  bool amdgpu_device_need_post(struct amdgpu_device *adev)
1625  {
1626  	uint32_t reg;
1627  
1628  	if (amdgpu_sriov_vf(adev))
1629  		return false;
1630  
1631  	if (!amdgpu_device_read_bios(adev))
1632  		return false;
1633  
1634  	if (amdgpu_passthrough(adev)) {
1635  		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1636  		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1637  		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1638  		 * vpost executed for smc version below 22.15
1639  		 */
1640  		if (adev->asic_type == CHIP_FIJI) {
1641  			int err;
1642  			uint32_t fw_ver;
1643  
1644  			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1645  			/* force vPost if error occured */
1646  			if (err)
1647  				return true;
1648  
1649  			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1650  			release_firmware(adev->pm.fw);
1651  			if (fw_ver < 0x00160e00)
1652  				return true;
1653  		}
1654  	}
1655  
1656  	/* Don't post if we need to reset whole hive on init */
1657  	if (adev->gmc.xgmi.pending_reset)
1658  		return false;
1659  
1660  	if (adev->has_hw_reset) {
1661  		adev->has_hw_reset = false;
1662  		return true;
1663  	}
1664  
1665  	/* bios scratch used on CIK+ */
1666  	if (adev->asic_type >= CHIP_BONAIRE)
1667  		return amdgpu_atombios_scratch_need_asic_init(adev);
1668  
1669  	/* check MEM_SIZE for older asics */
1670  	reg = amdgpu_asic_get_config_memsize(adev);
1671  
1672  	if ((reg != 0) && (reg != 0xffffffff))
1673  		return false;
1674  
1675  	return true;
1676  }
1677  
1678  /*
1679   * Check whether seamless boot is supported.
1680   *
1681   * So far we only support seamless boot on DCE 3.0 or later.
1682   * If users report that it works on older ASICS as well, we may
1683   * loosen this.
1684   */
1685  bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1686  {
1687  	switch (amdgpu_seamless) {
1688  	case -1:
1689  		break;
1690  	case 1:
1691  		return true;
1692  	case 0:
1693  		return false;
1694  	default:
1695  		DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1696  			  amdgpu_seamless);
1697  		return false;
1698  	}
1699  
1700  	if (!(adev->flags & AMD_IS_APU))
1701  		return false;
1702  
1703  	if (adev->mman.keep_stolen_vga_memory)
1704  		return false;
1705  
1706  	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1707  }
1708  
1709  /*
1710   * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1711   * don't support dynamic speed switching. Until we have confirmation from Intel
1712   * that a specific host supports it, it's safer that we keep it disabled for all.
1713   *
1714   * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1715   * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1716   */
1717  static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1718  {
1719  #if IS_ENABLED(CONFIG_X86)
1720  	struct cpuinfo_x86 *c = &cpu_data(0);
1721  
1722  	/* eGPU change speeds based on USB4 fabric conditions */
1723  	if (dev_is_removable(adev->dev))
1724  		return true;
1725  
1726  	if (c->x86_vendor == X86_VENDOR_INTEL)
1727  		return false;
1728  #endif
1729  	return true;
1730  }
1731  
1732  /**
1733   * amdgpu_device_should_use_aspm - check if the device should program ASPM
1734   *
1735   * @adev: amdgpu_device pointer
1736   *
1737   * Confirm whether the module parameter and pcie bridge agree that ASPM should
1738   * be set for this device.
1739   *
1740   * Returns true if it should be used or false if not.
1741   */
1742  bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1743  {
1744  	switch (amdgpu_aspm) {
1745  	case -1:
1746  		break;
1747  	case 0:
1748  		return false;
1749  	case 1:
1750  		return true;
1751  	default:
1752  		return false;
1753  	}
1754  	if (adev->flags & AMD_IS_APU)
1755  		return false;
1756  	if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1757  		return false;
1758  	return pcie_aspm_enabled(adev->pdev);
1759  }
1760  
1761  /* if we get transitioned to only one device, take VGA back */
1762  /**
1763   * amdgpu_device_vga_set_decode - enable/disable vga decode
1764   *
1765   * @pdev: PCI device pointer
1766   * @state: enable/disable vga decode
1767   *
1768   * Enable/disable vga decode (all asics).
1769   * Returns VGA resource flags.
1770   */
1771  static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1772  		bool state)
1773  {
1774  	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1775  
1776  	amdgpu_asic_set_vga_state(adev, state);
1777  	if (state)
1778  		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1779  		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1780  	else
1781  		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1782  }
1783  
1784  /**
1785   * amdgpu_device_check_block_size - validate the vm block size
1786   *
1787   * @adev: amdgpu_device pointer
1788   *
1789   * Validates the vm block size specified via module parameter.
1790   * The vm block size defines number of bits in page table versus page directory,
1791   * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1792   * page table and the remaining bits are in the page directory.
1793   */
1794  static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1795  {
1796  	/* defines number of bits in page table versus page directory,
1797  	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1798  	 * page table and the remaining bits are in the page directory
1799  	 */
1800  	if (amdgpu_vm_block_size == -1)
1801  		return;
1802  
1803  	if (amdgpu_vm_block_size < 9) {
1804  		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1805  			 amdgpu_vm_block_size);
1806  		amdgpu_vm_block_size = -1;
1807  	}
1808  }
1809  
1810  /**
1811   * amdgpu_device_check_vm_size - validate the vm size
1812   *
1813   * @adev: amdgpu_device pointer
1814   *
1815   * Validates the vm size in GB specified via module parameter.
1816   * The VM size is the size of the GPU virtual memory space in GB.
1817   */
1818  static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1819  {
1820  	/* no need to check the default value */
1821  	if (amdgpu_vm_size == -1)
1822  		return;
1823  
1824  	if (amdgpu_vm_size < 1) {
1825  		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1826  			 amdgpu_vm_size);
1827  		amdgpu_vm_size = -1;
1828  	}
1829  }
1830  
1831  static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1832  {
1833  	struct sysinfo si;
1834  	bool is_os_64 = (sizeof(void *) == 8);
1835  	uint64_t total_memory;
1836  	uint64_t dram_size_seven_GB = 0x1B8000000;
1837  	uint64_t dram_size_three_GB = 0xB8000000;
1838  
1839  	if (amdgpu_smu_memory_pool_size == 0)
1840  		return;
1841  
1842  	if (!is_os_64) {
1843  		DRM_WARN("Not 64-bit OS, feature not supported\n");
1844  		goto def_value;
1845  	}
1846  	si_meminfo(&si);
1847  	total_memory = (uint64_t)si.totalram * si.mem_unit;
1848  
1849  	if ((amdgpu_smu_memory_pool_size == 1) ||
1850  		(amdgpu_smu_memory_pool_size == 2)) {
1851  		if (total_memory < dram_size_three_GB)
1852  			goto def_value1;
1853  	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1854  		(amdgpu_smu_memory_pool_size == 8)) {
1855  		if (total_memory < dram_size_seven_GB)
1856  			goto def_value1;
1857  	} else {
1858  		DRM_WARN("Smu memory pool size not supported\n");
1859  		goto def_value;
1860  	}
1861  	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1862  
1863  	return;
1864  
1865  def_value1:
1866  	DRM_WARN("No enough system memory\n");
1867  def_value:
1868  	adev->pm.smu_prv_buffer_size = 0;
1869  }
1870  
1871  static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1872  {
1873  	if (!(adev->flags & AMD_IS_APU) ||
1874  	    adev->asic_type < CHIP_RAVEN)
1875  		return 0;
1876  
1877  	switch (adev->asic_type) {
1878  	case CHIP_RAVEN:
1879  		if (adev->pdev->device == 0x15dd)
1880  			adev->apu_flags |= AMD_APU_IS_RAVEN;
1881  		if (adev->pdev->device == 0x15d8)
1882  			adev->apu_flags |= AMD_APU_IS_PICASSO;
1883  		break;
1884  	case CHIP_RENOIR:
1885  		if ((adev->pdev->device == 0x1636) ||
1886  		    (adev->pdev->device == 0x164c))
1887  			adev->apu_flags |= AMD_APU_IS_RENOIR;
1888  		else
1889  			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1890  		break;
1891  	case CHIP_VANGOGH:
1892  		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1893  		break;
1894  	case CHIP_YELLOW_CARP:
1895  		break;
1896  	case CHIP_CYAN_SKILLFISH:
1897  		if ((adev->pdev->device == 0x13FE) ||
1898  		    (adev->pdev->device == 0x143F))
1899  			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1900  		break;
1901  	default:
1902  		break;
1903  	}
1904  
1905  	return 0;
1906  }
1907  
1908  /**
1909   * amdgpu_device_check_arguments - validate module params
1910   *
1911   * @adev: amdgpu_device pointer
1912   *
1913   * Validates certain module parameters and updates
1914   * the associated values used by the driver (all asics).
1915   */
1916  static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1917  {
1918  	if (amdgpu_sched_jobs < 4) {
1919  		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1920  			 amdgpu_sched_jobs);
1921  		amdgpu_sched_jobs = 4;
1922  	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
1923  		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1924  			 amdgpu_sched_jobs);
1925  		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1926  	}
1927  
1928  	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1929  		/* gart size must be greater or equal to 32M */
1930  		dev_warn(adev->dev, "gart size (%d) too small\n",
1931  			 amdgpu_gart_size);
1932  		amdgpu_gart_size = -1;
1933  	}
1934  
1935  	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1936  		/* gtt size must be greater or equal to 32M */
1937  		dev_warn(adev->dev, "gtt size (%d) too small\n",
1938  				 amdgpu_gtt_size);
1939  		amdgpu_gtt_size = -1;
1940  	}
1941  
1942  	/* valid range is between 4 and 9 inclusive */
1943  	if (amdgpu_vm_fragment_size != -1 &&
1944  	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1945  		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1946  		amdgpu_vm_fragment_size = -1;
1947  	}
1948  
1949  	if (amdgpu_sched_hw_submission < 2) {
1950  		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1951  			 amdgpu_sched_hw_submission);
1952  		amdgpu_sched_hw_submission = 2;
1953  	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1954  		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1955  			 amdgpu_sched_hw_submission);
1956  		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1957  	}
1958  
1959  	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1960  		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1961  		amdgpu_reset_method = -1;
1962  	}
1963  
1964  	amdgpu_device_check_smu_prv_buffer_size(adev);
1965  
1966  	amdgpu_device_check_vm_size(adev);
1967  
1968  	amdgpu_device_check_block_size(adev);
1969  
1970  	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1971  
1972  	return 0;
1973  }
1974  
1975  /**
1976   * amdgpu_switcheroo_set_state - set switcheroo state
1977   *
1978   * @pdev: pci dev pointer
1979   * @state: vga_switcheroo state
1980   *
1981   * Callback for the switcheroo driver.  Suspends or resumes
1982   * the asics before or after it is powered up using ACPI methods.
1983   */
1984  static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1985  					enum vga_switcheroo_state state)
1986  {
1987  	struct drm_device *dev = pci_get_drvdata(pdev);
1988  	int r;
1989  
1990  	if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1991  		return;
1992  
1993  	if (state == VGA_SWITCHEROO_ON) {
1994  		pr_info("switched on\n");
1995  		/* don't suspend or resume card normally */
1996  		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1997  
1998  		pci_set_power_state(pdev, PCI_D0);
1999  		amdgpu_device_load_pci_state(pdev);
2000  		r = pci_enable_device(pdev);
2001  		if (r)
2002  			DRM_WARN("pci_enable_device failed (%d)\n", r);
2003  		amdgpu_device_resume(dev, true);
2004  
2005  		dev->switch_power_state = DRM_SWITCH_POWER_ON;
2006  	} else {
2007  		pr_info("switched off\n");
2008  		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2009  		amdgpu_device_prepare(dev);
2010  		amdgpu_device_suspend(dev, true);
2011  		amdgpu_device_cache_pci_state(pdev);
2012  		/* Shut down the device */
2013  		pci_disable_device(pdev);
2014  		pci_set_power_state(pdev, PCI_D3cold);
2015  		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
2016  	}
2017  }
2018  
2019  /**
2020   * amdgpu_switcheroo_can_switch - see if switcheroo state can change
2021   *
2022   * @pdev: pci dev pointer
2023   *
2024   * Callback for the switcheroo driver.  Check of the switcheroo
2025   * state can be changed.
2026   * Returns true if the state can be changed, false if not.
2027   */
2028  static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
2029  {
2030  	struct drm_device *dev = pci_get_drvdata(pdev);
2031  
2032         /*
2033  	* FIXME: open_count is protected by drm_global_mutex but that would lead to
2034  	* locking inversion with the driver load path. And the access here is
2035  	* completely racy anyway. So don't bother with locking for now.
2036  	*/
2037  	return atomic_read(&dev->open_count) == 0;
2038  }
2039  
2040  static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
2041  	.set_gpu_state = amdgpu_switcheroo_set_state,
2042  	.reprobe = NULL,
2043  	.can_switch = amdgpu_switcheroo_can_switch,
2044  };
2045  
2046  /**
2047   * amdgpu_device_ip_set_clockgating_state - set the CG state
2048   *
2049   * @dev: amdgpu_device pointer
2050   * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2051   * @state: clockgating state (gate or ungate)
2052   *
2053   * Sets the requested clockgating state for all instances of
2054   * the hardware IP specified.
2055   * Returns the error code from the last instance.
2056   */
2057  int amdgpu_device_ip_set_clockgating_state(void *dev,
2058  					   enum amd_ip_block_type block_type,
2059  					   enum amd_clockgating_state state)
2060  {
2061  	struct amdgpu_device *adev = dev;
2062  	int i, r = 0;
2063  
2064  	for (i = 0; i < adev->num_ip_blocks; i++) {
2065  		if (!adev->ip_blocks[i].status.valid)
2066  			continue;
2067  		if (adev->ip_blocks[i].version->type != block_type)
2068  			continue;
2069  		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
2070  			continue;
2071  		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
2072  			(void *)adev, state);
2073  		if (r)
2074  			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
2075  				  adev->ip_blocks[i].version->funcs->name, r);
2076  	}
2077  	return r;
2078  }
2079  
2080  /**
2081   * amdgpu_device_ip_set_powergating_state - set the PG state
2082   *
2083   * @dev: amdgpu_device pointer
2084   * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2085   * @state: powergating state (gate or ungate)
2086   *
2087   * Sets the requested powergating state for all instances of
2088   * the hardware IP specified.
2089   * Returns the error code from the last instance.
2090   */
2091  int amdgpu_device_ip_set_powergating_state(void *dev,
2092  					   enum amd_ip_block_type block_type,
2093  					   enum amd_powergating_state state)
2094  {
2095  	struct amdgpu_device *adev = dev;
2096  	int i, r = 0;
2097  
2098  	for (i = 0; i < adev->num_ip_blocks; i++) {
2099  		if (!adev->ip_blocks[i].status.valid)
2100  			continue;
2101  		if (adev->ip_blocks[i].version->type != block_type)
2102  			continue;
2103  		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2104  			continue;
2105  		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2106  			(void *)adev, state);
2107  		if (r)
2108  			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
2109  				  adev->ip_blocks[i].version->funcs->name, r);
2110  	}
2111  	return r;
2112  }
2113  
2114  /**
2115   * amdgpu_device_ip_get_clockgating_state - get the CG state
2116   *
2117   * @adev: amdgpu_device pointer
2118   * @flags: clockgating feature flags
2119   *
2120   * Walks the list of IPs on the device and updates the clockgating
2121   * flags for each IP.
2122   * Updates @flags with the feature flags for each hardware IP where
2123   * clockgating is enabled.
2124   */
2125  void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
2126  					    u64 *flags)
2127  {
2128  	int i;
2129  
2130  	for (i = 0; i < adev->num_ip_blocks; i++) {
2131  		if (!adev->ip_blocks[i].status.valid)
2132  			continue;
2133  		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2134  			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
2135  	}
2136  }
2137  
2138  /**
2139   * amdgpu_device_ip_wait_for_idle - wait for idle
2140   *
2141   * @adev: amdgpu_device pointer
2142   * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2143   *
2144   * Waits for the request hardware IP to be idle.
2145   * Returns 0 for success or a negative error code on failure.
2146   */
2147  int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2148  				   enum amd_ip_block_type block_type)
2149  {
2150  	int i, r;
2151  
2152  	for (i = 0; i < adev->num_ip_blocks; i++) {
2153  		if (!adev->ip_blocks[i].status.valid)
2154  			continue;
2155  		if (adev->ip_blocks[i].version->type == block_type) {
2156  			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
2157  			if (r)
2158  				return r;
2159  			break;
2160  		}
2161  	}
2162  	return 0;
2163  
2164  }
2165  
2166  /**
2167   * amdgpu_device_ip_is_idle - is the hardware IP idle
2168   *
2169   * @adev: amdgpu_device pointer
2170   * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2171   *
2172   * Check if the hardware IP is idle or not.
2173   * Returns true if it the IP is idle, false if not.
2174   */
2175  bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
2176  			      enum amd_ip_block_type block_type)
2177  {
2178  	int i;
2179  
2180  	for (i = 0; i < adev->num_ip_blocks; i++) {
2181  		if (!adev->ip_blocks[i].status.valid)
2182  			continue;
2183  		if (adev->ip_blocks[i].version->type == block_type)
2184  			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
2185  	}
2186  	return true;
2187  
2188  }
2189  
2190  /**
2191   * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2192   *
2193   * @adev: amdgpu_device pointer
2194   * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
2195   *
2196   * Returns a pointer to the hardware IP block structure
2197   * if it exists for the asic, otherwise NULL.
2198   */
2199  struct amdgpu_ip_block *
2200  amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2201  			      enum amd_ip_block_type type)
2202  {
2203  	int i;
2204  
2205  	for (i = 0; i < adev->num_ip_blocks; i++)
2206  		if (adev->ip_blocks[i].version->type == type)
2207  			return &adev->ip_blocks[i];
2208  
2209  	return NULL;
2210  }
2211  
2212  /**
2213   * amdgpu_device_ip_block_version_cmp
2214   *
2215   * @adev: amdgpu_device pointer
2216   * @type: enum amd_ip_block_type
2217   * @major: major version
2218   * @minor: minor version
2219   *
2220   * return 0 if equal or greater
2221   * return 1 if smaller or the ip_block doesn't exist
2222   */
2223  int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2224  				       enum amd_ip_block_type type,
2225  				       u32 major, u32 minor)
2226  {
2227  	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
2228  
2229  	if (ip_block && ((ip_block->version->major > major) ||
2230  			((ip_block->version->major == major) &&
2231  			(ip_block->version->minor >= minor))))
2232  		return 0;
2233  
2234  	return 1;
2235  }
2236  
2237  /**
2238   * amdgpu_device_ip_block_add
2239   *
2240   * @adev: amdgpu_device pointer
2241   * @ip_block_version: pointer to the IP to add
2242   *
2243   * Adds the IP block driver information to the collection of IPs
2244   * on the asic.
2245   */
2246  int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2247  			       const struct amdgpu_ip_block_version *ip_block_version)
2248  {
2249  	if (!ip_block_version)
2250  		return -EINVAL;
2251  
2252  	switch (ip_block_version->type) {
2253  	case AMD_IP_BLOCK_TYPE_VCN:
2254  		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2255  			return 0;
2256  		break;
2257  	case AMD_IP_BLOCK_TYPE_JPEG:
2258  		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2259  			return 0;
2260  		break;
2261  	default:
2262  		break;
2263  	}
2264  
2265  	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
2266  		  ip_block_version->funcs->name);
2267  
2268  	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2269  
2270  	return 0;
2271  }
2272  
2273  /**
2274   * amdgpu_device_enable_virtual_display - enable virtual display feature
2275   *
2276   * @adev: amdgpu_device pointer
2277   *
2278   * Enabled the virtual display feature if the user has enabled it via
2279   * the module parameter virtual_display.  This feature provides a virtual
2280   * display hardware on headless boards or in virtualized environments.
2281   * This function parses and validates the configuration string specified by
2282   * the user and configues the virtual display configuration (number of
2283   * virtual connectors, crtcs, etc.) specified.
2284   */
2285  static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2286  {
2287  	adev->enable_virtual_display = false;
2288  
2289  	if (amdgpu_virtual_display) {
2290  		const char *pci_address_name = pci_name(adev->pdev);
2291  		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
2292  
2293  		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2294  		pciaddstr_tmp = pciaddstr;
2295  		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2296  			pciaddname = strsep(&pciaddname_tmp, ",");
2297  			if (!strcmp("all", pciaddname)
2298  			    || !strcmp(pci_address_name, pciaddname)) {
2299  				long num_crtc;
2300  				int res = -1;
2301  
2302  				adev->enable_virtual_display = true;
2303  
2304  				if (pciaddname_tmp)
2305  					res = kstrtol(pciaddname_tmp, 10,
2306  						      &num_crtc);
2307  
2308  				if (!res) {
2309  					if (num_crtc < 1)
2310  						num_crtc = 1;
2311  					if (num_crtc > 6)
2312  						num_crtc = 6;
2313  					adev->mode_info.num_crtc = num_crtc;
2314  				} else {
2315  					adev->mode_info.num_crtc = 1;
2316  				}
2317  				break;
2318  			}
2319  		}
2320  
2321  		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2322  			 amdgpu_virtual_display, pci_address_name,
2323  			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2324  
2325  		kfree(pciaddstr);
2326  	}
2327  }
2328  
2329  void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2330  {
2331  	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2332  		adev->mode_info.num_crtc = 1;
2333  		adev->enable_virtual_display = true;
2334  		DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2335  			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2336  	}
2337  }
2338  
2339  /**
2340   * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2341   *
2342   * @adev: amdgpu_device pointer
2343   *
2344   * Parses the asic configuration parameters specified in the gpu info
2345   * firmware and makes them availale to the driver for use in configuring
2346   * the asic.
2347   * Returns 0 on success, -EINVAL on failure.
2348   */
2349  static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2350  {
2351  	const char *chip_name;
2352  	char fw_name[40];
2353  	int err;
2354  	const struct gpu_info_firmware_header_v1_0 *hdr;
2355  
2356  	adev->firmware.gpu_info_fw = NULL;
2357  
2358  	if (adev->mman.discovery_bin)
2359  		return 0;
2360  
2361  	switch (adev->asic_type) {
2362  	default:
2363  		return 0;
2364  	case CHIP_VEGA10:
2365  		chip_name = "vega10";
2366  		break;
2367  	case CHIP_VEGA12:
2368  		chip_name = "vega12";
2369  		break;
2370  	case CHIP_RAVEN:
2371  		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2372  			chip_name = "raven2";
2373  		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2374  			chip_name = "picasso";
2375  		else
2376  			chip_name = "raven";
2377  		break;
2378  	case CHIP_ARCTURUS:
2379  		chip_name = "arcturus";
2380  		break;
2381  	case CHIP_NAVI12:
2382  		chip_name = "navi12";
2383  		break;
2384  	}
2385  
2386  	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
2387  	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
2388  	if (err) {
2389  		dev_err(adev->dev,
2390  			"Failed to get gpu_info firmware \"%s\"\n",
2391  			fw_name);
2392  		goto out;
2393  	}
2394  
2395  	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2396  	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2397  
2398  	switch (hdr->version_major) {
2399  	case 1:
2400  	{
2401  		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2402  			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2403  								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2404  
2405  		/*
2406  		 * Should be droped when DAL no longer needs it.
2407  		 */
2408  		if (adev->asic_type == CHIP_NAVI12)
2409  			goto parse_soc_bounding_box;
2410  
2411  		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2412  		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2413  		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2414  		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2415  		adev->gfx.config.max_texture_channel_caches =
2416  			le32_to_cpu(gpu_info_fw->gc_num_tccs);
2417  		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2418  		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2419  		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2420  		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2421  		adev->gfx.config.double_offchip_lds_buf =
2422  			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2423  		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2424  		adev->gfx.cu_info.max_waves_per_simd =
2425  			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2426  		adev->gfx.cu_info.max_scratch_slots_per_cu =
2427  			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2428  		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2429  		if (hdr->version_minor >= 1) {
2430  			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2431  				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2432  									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2433  			adev->gfx.config.num_sc_per_sh =
2434  				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2435  			adev->gfx.config.num_packer_per_sc =
2436  				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2437  		}
2438  
2439  parse_soc_bounding_box:
2440  		/*
2441  		 * soc bounding box info is not integrated in disocovery table,
2442  		 * we always need to parse it from gpu info firmware if needed.
2443  		 */
2444  		if (hdr->version_minor == 2) {
2445  			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2446  				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2447  									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2448  			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2449  		}
2450  		break;
2451  	}
2452  	default:
2453  		dev_err(adev->dev,
2454  			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2455  		err = -EINVAL;
2456  		goto out;
2457  	}
2458  out:
2459  	return err;
2460  }
2461  
2462  /**
2463   * amdgpu_device_ip_early_init - run early init for hardware IPs
2464   *
2465   * @adev: amdgpu_device pointer
2466   *
2467   * Early initialization pass for hardware IPs.  The hardware IPs that make
2468   * up each asic are discovered each IP's early_init callback is run.  This
2469   * is the first stage in initializing the asic.
2470   * Returns 0 on success, negative error code on failure.
2471   */
2472  static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2473  {
2474  	struct pci_dev *parent;
2475  	int i, r;
2476  	bool total;
2477  
2478  	amdgpu_device_enable_virtual_display(adev);
2479  
2480  	if (amdgpu_sriov_vf(adev)) {
2481  		r = amdgpu_virt_request_full_gpu(adev, true);
2482  		if (r)
2483  			return r;
2484  	}
2485  
2486  	switch (adev->asic_type) {
2487  #ifdef CONFIG_DRM_AMDGPU_SI
2488  	case CHIP_VERDE:
2489  	case CHIP_TAHITI:
2490  	case CHIP_PITCAIRN:
2491  	case CHIP_OLAND:
2492  	case CHIP_HAINAN:
2493  		adev->family = AMDGPU_FAMILY_SI;
2494  		r = si_set_ip_blocks(adev);
2495  		if (r)
2496  			return r;
2497  		break;
2498  #endif
2499  #ifdef CONFIG_DRM_AMDGPU_CIK
2500  	case CHIP_BONAIRE:
2501  	case CHIP_HAWAII:
2502  	case CHIP_KAVERI:
2503  	case CHIP_KABINI:
2504  	case CHIP_MULLINS:
2505  		if (adev->flags & AMD_IS_APU)
2506  			adev->family = AMDGPU_FAMILY_KV;
2507  		else
2508  			adev->family = AMDGPU_FAMILY_CI;
2509  
2510  		r = cik_set_ip_blocks(adev);
2511  		if (r)
2512  			return r;
2513  		break;
2514  #endif
2515  	case CHIP_TOPAZ:
2516  	case CHIP_TONGA:
2517  	case CHIP_FIJI:
2518  	case CHIP_POLARIS10:
2519  	case CHIP_POLARIS11:
2520  	case CHIP_POLARIS12:
2521  	case CHIP_VEGAM:
2522  	case CHIP_CARRIZO:
2523  	case CHIP_STONEY:
2524  		if (adev->flags & AMD_IS_APU)
2525  			adev->family = AMDGPU_FAMILY_CZ;
2526  		else
2527  			adev->family = AMDGPU_FAMILY_VI;
2528  
2529  		r = vi_set_ip_blocks(adev);
2530  		if (r)
2531  			return r;
2532  		break;
2533  	default:
2534  		r = amdgpu_discovery_set_ip_blocks(adev);
2535  		if (r)
2536  			return r;
2537  		break;
2538  	}
2539  
2540  	if (amdgpu_has_atpx() &&
2541  	    (amdgpu_is_atpx_hybrid() ||
2542  	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2543  	    ((adev->flags & AMD_IS_APU) == 0) &&
2544  	    !dev_is_removable(&adev->pdev->dev))
2545  		adev->flags |= AMD_IS_PX;
2546  
2547  	if (!(adev->flags & AMD_IS_APU)) {
2548  		parent = pcie_find_root_port(adev->pdev);
2549  		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2550  	}
2551  
2552  
2553  	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2554  	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2555  		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2556  	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2557  		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2558  	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2559  		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2560  
2561  	total = true;
2562  	for (i = 0; i < adev->num_ip_blocks; i++) {
2563  		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2564  			DRM_WARN("disabled ip block: %d <%s>\n",
2565  				  i, adev->ip_blocks[i].version->funcs->name);
2566  			adev->ip_blocks[i].status.valid = false;
2567  		} else {
2568  			if (adev->ip_blocks[i].version->funcs->early_init) {
2569  				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2570  				if (r == -ENOENT) {
2571  					adev->ip_blocks[i].status.valid = false;
2572  				} else if (r) {
2573  					DRM_ERROR("early_init of IP block <%s> failed %d\n",
2574  						  adev->ip_blocks[i].version->funcs->name, r);
2575  					total = false;
2576  				} else {
2577  					adev->ip_blocks[i].status.valid = true;
2578  				}
2579  			} else {
2580  				adev->ip_blocks[i].status.valid = true;
2581  			}
2582  		}
2583  		/* get the vbios after the asic_funcs are set up */
2584  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2585  			r = amdgpu_device_parse_gpu_info_fw(adev);
2586  			if (r)
2587  				return r;
2588  
2589  			/* Read BIOS */
2590  			if (amdgpu_device_read_bios(adev)) {
2591  				if (!amdgpu_get_bios(adev))
2592  					return -EINVAL;
2593  
2594  				r = amdgpu_atombios_init(adev);
2595  				if (r) {
2596  					dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2597  					amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2598  					return r;
2599  				}
2600  			}
2601  
2602  			/*get pf2vf msg info at it's earliest time*/
2603  			if (amdgpu_sriov_vf(adev))
2604  				amdgpu_virt_init_data_exchange(adev);
2605  
2606  		}
2607  	}
2608  	if (!total)
2609  		return -ENODEV;
2610  
2611  	amdgpu_amdkfd_device_probe(adev);
2612  	adev->cg_flags &= amdgpu_cg_mask;
2613  	adev->pg_flags &= amdgpu_pg_mask;
2614  
2615  	return 0;
2616  }
2617  
2618  static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2619  {
2620  	int i, r;
2621  
2622  	for (i = 0; i < adev->num_ip_blocks; i++) {
2623  		if (!adev->ip_blocks[i].status.sw)
2624  			continue;
2625  		if (adev->ip_blocks[i].status.hw)
2626  			continue;
2627  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2628  		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2629  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2630  			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2631  			if (r) {
2632  				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2633  					  adev->ip_blocks[i].version->funcs->name, r);
2634  				return r;
2635  			}
2636  			adev->ip_blocks[i].status.hw = true;
2637  		}
2638  	}
2639  
2640  	return 0;
2641  }
2642  
2643  static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2644  {
2645  	int i, r;
2646  
2647  	for (i = 0; i < adev->num_ip_blocks; i++) {
2648  		if (!adev->ip_blocks[i].status.sw)
2649  			continue;
2650  		if (adev->ip_blocks[i].status.hw)
2651  			continue;
2652  		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2653  		if (r) {
2654  			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2655  				  adev->ip_blocks[i].version->funcs->name, r);
2656  			return r;
2657  		}
2658  		adev->ip_blocks[i].status.hw = true;
2659  	}
2660  
2661  	return 0;
2662  }
2663  
2664  static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2665  {
2666  	int r = 0;
2667  	int i;
2668  	uint32_t smu_version;
2669  
2670  	if (adev->asic_type >= CHIP_VEGA10) {
2671  		for (i = 0; i < adev->num_ip_blocks; i++) {
2672  			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2673  				continue;
2674  
2675  			if (!adev->ip_blocks[i].status.sw)
2676  				continue;
2677  
2678  			/* no need to do the fw loading again if already done*/
2679  			if (adev->ip_blocks[i].status.hw == true)
2680  				break;
2681  
2682  			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2683  				r = adev->ip_blocks[i].version->funcs->resume(adev);
2684  				if (r) {
2685  					DRM_ERROR("resume of IP block <%s> failed %d\n",
2686  							  adev->ip_blocks[i].version->funcs->name, r);
2687  					return r;
2688  				}
2689  			} else {
2690  				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2691  				if (r) {
2692  					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2693  							  adev->ip_blocks[i].version->funcs->name, r);
2694  					return r;
2695  				}
2696  			}
2697  
2698  			adev->ip_blocks[i].status.hw = true;
2699  			break;
2700  		}
2701  	}
2702  
2703  	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2704  		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2705  
2706  	return r;
2707  }
2708  
2709  static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2710  {
2711  	long timeout;
2712  	int r, i;
2713  
2714  	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2715  		struct amdgpu_ring *ring = adev->rings[i];
2716  
2717  		/* No need to setup the GPU scheduler for rings that don't need it */
2718  		if (!ring || ring->no_scheduler)
2719  			continue;
2720  
2721  		switch (ring->funcs->type) {
2722  		case AMDGPU_RING_TYPE_GFX:
2723  			timeout = adev->gfx_timeout;
2724  			break;
2725  		case AMDGPU_RING_TYPE_COMPUTE:
2726  			timeout = adev->compute_timeout;
2727  			break;
2728  		case AMDGPU_RING_TYPE_SDMA:
2729  			timeout = adev->sdma_timeout;
2730  			break;
2731  		default:
2732  			timeout = adev->video_timeout;
2733  			break;
2734  		}
2735  
2736  		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
2737  				   DRM_SCHED_PRIORITY_COUNT,
2738  				   ring->num_hw_submission, 0,
2739  				   timeout, adev->reset_domain->wq,
2740  				   ring->sched_score, ring->name,
2741  				   adev->dev);
2742  		if (r) {
2743  			DRM_ERROR("Failed to create scheduler on ring %s.\n",
2744  				  ring->name);
2745  			return r;
2746  		}
2747  		r = amdgpu_uvd_entity_init(adev, ring);
2748  		if (r) {
2749  			DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2750  				  ring->name);
2751  			return r;
2752  		}
2753  		r = amdgpu_vce_entity_init(adev, ring);
2754  		if (r) {
2755  			DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
2756  				  ring->name);
2757  			return r;
2758  		}
2759  	}
2760  
2761  	amdgpu_xcp_update_partition_sched_list(adev);
2762  
2763  	return 0;
2764  }
2765  
2766  
2767  /**
2768   * amdgpu_device_ip_init - run init for hardware IPs
2769   *
2770   * @adev: amdgpu_device pointer
2771   *
2772   * Main initialization pass for hardware IPs.  The list of all the hardware
2773   * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2774   * are run.  sw_init initializes the software state associated with each IP
2775   * and hw_init initializes the hardware associated with each IP.
2776   * Returns 0 on success, negative error code on failure.
2777   */
2778  static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2779  {
2780  	int i, r;
2781  
2782  	r = amdgpu_ras_init(adev);
2783  	if (r)
2784  		return r;
2785  
2786  	for (i = 0; i < adev->num_ip_blocks; i++) {
2787  		if (!adev->ip_blocks[i].status.valid)
2788  			continue;
2789  		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2790  		if (r) {
2791  			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2792  				  adev->ip_blocks[i].version->funcs->name, r);
2793  			goto init_failed;
2794  		}
2795  		adev->ip_blocks[i].status.sw = true;
2796  
2797  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2798  			/* need to do common hw init early so everything is set up for gmc */
2799  			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2800  			if (r) {
2801  				DRM_ERROR("hw_init %d failed %d\n", i, r);
2802  				goto init_failed;
2803  			}
2804  			adev->ip_blocks[i].status.hw = true;
2805  		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2806  			/* need to do gmc hw init early so we can allocate gpu mem */
2807  			/* Try to reserve bad pages early */
2808  			if (amdgpu_sriov_vf(adev))
2809  				amdgpu_virt_exchange_data(adev);
2810  
2811  			r = amdgpu_device_mem_scratch_init(adev);
2812  			if (r) {
2813  				DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2814  				goto init_failed;
2815  			}
2816  			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2817  			if (r) {
2818  				DRM_ERROR("hw_init %d failed %d\n", i, r);
2819  				goto init_failed;
2820  			}
2821  			r = amdgpu_device_wb_init(adev);
2822  			if (r) {
2823  				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2824  				goto init_failed;
2825  			}
2826  			adev->ip_blocks[i].status.hw = true;
2827  
2828  			/* right after GMC hw init, we create CSA */
2829  			if (adev->gfx.mcbp) {
2830  				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2831  							       AMDGPU_GEM_DOMAIN_VRAM |
2832  							       AMDGPU_GEM_DOMAIN_GTT,
2833  							       AMDGPU_CSA_SIZE);
2834  				if (r) {
2835  					DRM_ERROR("allocate CSA failed %d\n", r);
2836  					goto init_failed;
2837  				}
2838  			}
2839  
2840  			r = amdgpu_seq64_init(adev);
2841  			if (r) {
2842  				DRM_ERROR("allocate seq64 failed %d\n", r);
2843  				goto init_failed;
2844  			}
2845  		}
2846  	}
2847  
2848  	if (amdgpu_sriov_vf(adev))
2849  		amdgpu_virt_init_data_exchange(adev);
2850  
2851  	r = amdgpu_ib_pool_init(adev);
2852  	if (r) {
2853  		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2854  		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2855  		goto init_failed;
2856  	}
2857  
2858  	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2859  	if (r)
2860  		goto init_failed;
2861  
2862  	r = amdgpu_device_ip_hw_init_phase1(adev);
2863  	if (r)
2864  		goto init_failed;
2865  
2866  	r = amdgpu_device_fw_loading(adev);
2867  	if (r)
2868  		goto init_failed;
2869  
2870  	r = amdgpu_device_ip_hw_init_phase2(adev);
2871  	if (r)
2872  		goto init_failed;
2873  
2874  	/*
2875  	 * retired pages will be loaded from eeprom and reserved here,
2876  	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2877  	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2878  	 * for I2C communication which only true at this point.
2879  	 *
2880  	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2881  	 * failure from bad gpu situation and stop amdgpu init process
2882  	 * accordingly. For other failed cases, it will still release all
2883  	 * the resource and print error message, rather than returning one
2884  	 * negative value to upper level.
2885  	 *
2886  	 * Note: theoretically, this should be called before all vram allocations
2887  	 * to protect retired page from abusing
2888  	 */
2889  	r = amdgpu_ras_recovery_init(adev);
2890  	if (r)
2891  		goto init_failed;
2892  
2893  	/**
2894  	 * In case of XGMI grab extra reference for reset domain for this device
2895  	 */
2896  	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2897  		if (amdgpu_xgmi_add_device(adev) == 0) {
2898  			if (!amdgpu_sriov_vf(adev)) {
2899  				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2900  
2901  				if (WARN_ON(!hive)) {
2902  					r = -ENOENT;
2903  					goto init_failed;
2904  				}
2905  
2906  				if (!hive->reset_domain ||
2907  				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2908  					r = -ENOENT;
2909  					amdgpu_put_xgmi_hive(hive);
2910  					goto init_failed;
2911  				}
2912  
2913  				/* Drop the early temporary reset domain we created for device */
2914  				amdgpu_reset_put_reset_domain(adev->reset_domain);
2915  				adev->reset_domain = hive->reset_domain;
2916  				amdgpu_put_xgmi_hive(hive);
2917  			}
2918  		}
2919  	}
2920  
2921  	r = amdgpu_device_init_schedulers(adev);
2922  	if (r)
2923  		goto init_failed;
2924  
2925  	if (adev->mman.buffer_funcs_ring->sched.ready)
2926  		amdgpu_ttm_set_buffer_funcs_status(adev, true);
2927  
2928  	/* Don't init kfd if whole hive need to be reset during init */
2929  	if (!adev->gmc.xgmi.pending_reset) {
2930  		kgd2kfd_init_zone_device(adev);
2931  		amdgpu_amdkfd_device_init(adev);
2932  	}
2933  
2934  	amdgpu_fru_get_product_info(adev);
2935  
2936  init_failed:
2937  
2938  	return r;
2939  }
2940  
2941  /**
2942   * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2943   *
2944   * @adev: amdgpu_device pointer
2945   *
2946   * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2947   * this function before a GPU reset.  If the value is retained after a
2948   * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2949   */
2950  static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2951  {
2952  	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2953  }
2954  
2955  /**
2956   * amdgpu_device_check_vram_lost - check if vram is valid
2957   *
2958   * @adev: amdgpu_device pointer
2959   *
2960   * Checks the reset magic value written to the gart pointer in VRAM.
2961   * The driver calls this after a GPU reset to see if the contents of
2962   * VRAM is lost or now.
2963   * returns true if vram is lost, false if not.
2964   */
2965  static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2966  {
2967  	if (memcmp(adev->gart.ptr, adev->reset_magic,
2968  			AMDGPU_RESET_MAGIC_NUM))
2969  		return true;
2970  
2971  	if (!amdgpu_in_reset(adev))
2972  		return false;
2973  
2974  	/*
2975  	 * For all ASICs with baco/mode1 reset, the VRAM is
2976  	 * always assumed to be lost.
2977  	 */
2978  	switch (amdgpu_asic_reset_method(adev)) {
2979  	case AMD_RESET_METHOD_BACO:
2980  	case AMD_RESET_METHOD_MODE1:
2981  		return true;
2982  	default:
2983  		return false;
2984  	}
2985  }
2986  
2987  /**
2988   * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2989   *
2990   * @adev: amdgpu_device pointer
2991   * @state: clockgating state (gate or ungate)
2992   *
2993   * The list of all the hardware IPs that make up the asic is walked and the
2994   * set_clockgating_state callbacks are run.
2995   * Late initialization pass enabling clockgating for hardware IPs.
2996   * Fini or suspend, pass disabling clockgating for hardware IPs.
2997   * Returns 0 on success, negative error code on failure.
2998   */
2999  
3000  int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
3001  			       enum amd_clockgating_state state)
3002  {
3003  	int i, j, r;
3004  
3005  	if (amdgpu_emu_mode == 1)
3006  		return 0;
3007  
3008  	for (j = 0; j < adev->num_ip_blocks; j++) {
3009  		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3010  		if (!adev->ip_blocks[i].status.late_initialized)
3011  			continue;
3012  		/* skip CG for GFX, SDMA on S0ix */
3013  		if (adev->in_s0ix &&
3014  		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3015  		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3016  			continue;
3017  		/* skip CG for VCE/UVD, it's handled specially */
3018  		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3019  		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3020  		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3021  		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3022  		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
3023  			/* enable clockgating to save power */
3024  			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
3025  										     state);
3026  			if (r) {
3027  				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
3028  					  adev->ip_blocks[i].version->funcs->name, r);
3029  				return r;
3030  			}
3031  		}
3032  	}
3033  
3034  	return 0;
3035  }
3036  
3037  int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
3038  			       enum amd_powergating_state state)
3039  {
3040  	int i, j, r;
3041  
3042  	if (amdgpu_emu_mode == 1)
3043  		return 0;
3044  
3045  	for (j = 0; j < adev->num_ip_blocks; j++) {
3046  		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3047  		if (!adev->ip_blocks[i].status.late_initialized)
3048  			continue;
3049  		/* skip PG for GFX, SDMA on S0ix */
3050  		if (adev->in_s0ix &&
3051  		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3052  		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3053  			continue;
3054  		/* skip CG for VCE/UVD, it's handled specially */
3055  		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3056  		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3057  		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3058  		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3059  		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
3060  			/* enable powergating to save power */
3061  			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
3062  											state);
3063  			if (r) {
3064  				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
3065  					  adev->ip_blocks[i].version->funcs->name, r);
3066  				return r;
3067  			}
3068  		}
3069  	}
3070  	return 0;
3071  }
3072  
3073  static int amdgpu_device_enable_mgpu_fan_boost(void)
3074  {
3075  	struct amdgpu_gpu_instance *gpu_ins;
3076  	struct amdgpu_device *adev;
3077  	int i, ret = 0;
3078  
3079  	mutex_lock(&mgpu_info.mutex);
3080  
3081  	/*
3082  	 * MGPU fan boost feature should be enabled
3083  	 * only when there are two or more dGPUs in
3084  	 * the system
3085  	 */
3086  	if (mgpu_info.num_dgpu < 2)
3087  		goto out;
3088  
3089  	for (i = 0; i < mgpu_info.num_dgpu; i++) {
3090  		gpu_ins = &(mgpu_info.gpu_ins[i]);
3091  		adev = gpu_ins->adev;
3092  		if (!(adev->flags & AMD_IS_APU) &&
3093  		    !gpu_ins->mgpu_fan_enabled) {
3094  			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3095  			if (ret)
3096  				break;
3097  
3098  			gpu_ins->mgpu_fan_enabled = 1;
3099  		}
3100  	}
3101  
3102  out:
3103  	mutex_unlock(&mgpu_info.mutex);
3104  
3105  	return ret;
3106  }
3107  
3108  /**
3109   * amdgpu_device_ip_late_init - run late init for hardware IPs
3110   *
3111   * @adev: amdgpu_device pointer
3112   *
3113   * Late initialization pass for hardware IPs.  The list of all the hardware
3114   * IPs that make up the asic is walked and the late_init callbacks are run.
3115   * late_init covers any special initialization that an IP requires
3116   * after all of the have been initialized or something that needs to happen
3117   * late in the init process.
3118   * Returns 0 on success, negative error code on failure.
3119   */
3120  static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3121  {
3122  	struct amdgpu_gpu_instance *gpu_instance;
3123  	int i = 0, r;
3124  
3125  	for (i = 0; i < adev->num_ip_blocks; i++) {
3126  		if (!adev->ip_blocks[i].status.hw)
3127  			continue;
3128  		if (adev->ip_blocks[i].version->funcs->late_init) {
3129  			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
3130  			if (r) {
3131  				DRM_ERROR("late_init of IP block <%s> failed %d\n",
3132  					  adev->ip_blocks[i].version->funcs->name, r);
3133  				return r;
3134  			}
3135  		}
3136  		adev->ip_blocks[i].status.late_initialized = true;
3137  	}
3138  
3139  	r = amdgpu_ras_late_init(adev);
3140  	if (r) {
3141  		DRM_ERROR("amdgpu_ras_late_init failed %d", r);
3142  		return r;
3143  	}
3144  
3145  	amdgpu_ras_set_error_query_ready(adev, true);
3146  
3147  	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3148  	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3149  
3150  	amdgpu_device_fill_reset_magic(adev);
3151  
3152  	r = amdgpu_device_enable_mgpu_fan_boost();
3153  	if (r)
3154  		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
3155  
3156  	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
3157  	if (amdgpu_passthrough(adev) &&
3158  	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3159  	     adev->asic_type == CHIP_ALDEBARAN))
3160  		amdgpu_dpm_handle_passthrough_sbr(adev, true);
3161  
3162  	if (adev->gmc.xgmi.num_physical_nodes > 1) {
3163  		mutex_lock(&mgpu_info.mutex);
3164  
3165  		/*
3166  		 * Reset device p-state to low as this was booted with high.
3167  		 *
3168  		 * This should be performed only after all devices from the same
3169  		 * hive get initialized.
3170  		 *
3171  		 * However, it's unknown how many device in the hive in advance.
3172  		 * As this is counted one by one during devices initializations.
3173  		 *
3174  		 * So, we wait for all XGMI interlinked devices initialized.
3175  		 * This may bring some delays as those devices may come from
3176  		 * different hives. But that should be OK.
3177  		 */
3178  		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3179  			for (i = 0; i < mgpu_info.num_gpu; i++) {
3180  				gpu_instance = &(mgpu_info.gpu_ins[i]);
3181  				if (gpu_instance->adev->flags & AMD_IS_APU)
3182  					continue;
3183  
3184  				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3185  						AMDGPU_XGMI_PSTATE_MIN);
3186  				if (r) {
3187  					DRM_ERROR("pstate setting failed (%d).\n", r);
3188  					break;
3189  				}
3190  			}
3191  		}
3192  
3193  		mutex_unlock(&mgpu_info.mutex);
3194  	}
3195  
3196  	return 0;
3197  }
3198  
3199  /**
3200   * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3201   *
3202   * @adev: amdgpu_device pointer
3203   *
3204   * For ASICs need to disable SMC first
3205   */
3206  static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3207  {
3208  	int i, r;
3209  
3210  	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
3211  		return;
3212  
3213  	for (i = 0; i < adev->num_ip_blocks; i++) {
3214  		if (!adev->ip_blocks[i].status.hw)
3215  			continue;
3216  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3217  			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3218  			/* XXX handle errors */
3219  			if (r) {
3220  				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3221  					  adev->ip_blocks[i].version->funcs->name, r);
3222  			}
3223  			adev->ip_blocks[i].status.hw = false;
3224  			break;
3225  		}
3226  	}
3227  }
3228  
3229  static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3230  {
3231  	int i, r;
3232  
3233  	for (i = 0; i < adev->num_ip_blocks; i++) {
3234  		if (!adev->ip_blocks[i].version->funcs->early_fini)
3235  			continue;
3236  
3237  		r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
3238  		if (r) {
3239  			DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3240  				  adev->ip_blocks[i].version->funcs->name, r);
3241  		}
3242  	}
3243  
3244  	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3245  	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3246  
3247  	amdgpu_amdkfd_suspend(adev, false);
3248  
3249  	/* Workaroud for ASICs need to disable SMC first */
3250  	amdgpu_device_smu_fini_early(adev);
3251  
3252  	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3253  		if (!adev->ip_blocks[i].status.hw)
3254  			continue;
3255  
3256  		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3257  		/* XXX handle errors */
3258  		if (r) {
3259  			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3260  				  adev->ip_blocks[i].version->funcs->name, r);
3261  		}
3262  
3263  		adev->ip_blocks[i].status.hw = false;
3264  	}
3265  
3266  	if (amdgpu_sriov_vf(adev)) {
3267  		if (amdgpu_virt_release_full_gpu(adev, false))
3268  			DRM_ERROR("failed to release exclusive mode on fini\n");
3269  	}
3270  
3271  	return 0;
3272  }
3273  
3274  /**
3275   * amdgpu_device_ip_fini - run fini for hardware IPs
3276   *
3277   * @adev: amdgpu_device pointer
3278   *
3279   * Main teardown pass for hardware IPs.  The list of all the hardware
3280   * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3281   * are run.  hw_fini tears down the hardware associated with each IP
3282   * and sw_fini tears down any software state associated with each IP.
3283   * Returns 0 on success, negative error code on failure.
3284   */
3285  static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3286  {
3287  	int i, r;
3288  
3289  	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3290  		amdgpu_virt_release_ras_err_handler_data(adev);
3291  
3292  	if (adev->gmc.xgmi.num_physical_nodes > 1)
3293  		amdgpu_xgmi_remove_device(adev);
3294  
3295  	amdgpu_amdkfd_device_fini_sw(adev);
3296  
3297  	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3298  		if (!adev->ip_blocks[i].status.sw)
3299  			continue;
3300  
3301  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3302  			amdgpu_ucode_free_bo(adev);
3303  			amdgpu_free_static_csa(&adev->virt.csa_obj);
3304  			amdgpu_device_wb_fini(adev);
3305  			amdgpu_device_mem_scratch_fini(adev);
3306  			amdgpu_ib_pool_fini(adev);
3307  			amdgpu_seq64_fini(adev);
3308  		}
3309  
3310  		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
3311  		/* XXX handle errors */
3312  		if (r) {
3313  			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3314  				  adev->ip_blocks[i].version->funcs->name, r);
3315  		}
3316  		adev->ip_blocks[i].status.sw = false;
3317  		adev->ip_blocks[i].status.valid = false;
3318  	}
3319  
3320  	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3321  		if (!adev->ip_blocks[i].status.late_initialized)
3322  			continue;
3323  		if (adev->ip_blocks[i].version->funcs->late_fini)
3324  			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3325  		adev->ip_blocks[i].status.late_initialized = false;
3326  	}
3327  
3328  	amdgpu_ras_fini(adev);
3329  
3330  	return 0;
3331  }
3332  
3333  /**
3334   * amdgpu_device_delayed_init_work_handler - work handler for IB tests
3335   *
3336   * @work: work_struct.
3337   */
3338  static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3339  {
3340  	struct amdgpu_device *adev =
3341  		container_of(work, struct amdgpu_device, delayed_init_work.work);
3342  	int r;
3343  
3344  	r = amdgpu_ib_ring_tests(adev);
3345  	if (r)
3346  		DRM_ERROR("ib ring test failed (%d).\n", r);
3347  }
3348  
3349  static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3350  {
3351  	struct amdgpu_device *adev =
3352  		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3353  
3354  	WARN_ON_ONCE(adev->gfx.gfx_off_state);
3355  	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3356  
3357  	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3358  		adev->gfx.gfx_off_state = true;
3359  }
3360  
3361  /**
3362   * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3363   *
3364   * @adev: amdgpu_device pointer
3365   *
3366   * Main suspend function for hardware IPs.  The list of all the hardware
3367   * IPs that make up the asic is walked, clockgating is disabled and the
3368   * suspend callbacks are run.  suspend puts the hardware and software state
3369   * in each IP into a state suitable for suspend.
3370   * Returns 0 on success, negative error code on failure.
3371   */
3372  static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3373  {
3374  	int i, r;
3375  
3376  	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3377  	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3378  
3379  	/*
3380  	 * Per PMFW team's suggestion, driver needs to handle gfxoff
3381  	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3382  	 * scenario. Add the missing df cstate disablement here.
3383  	 */
3384  	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3385  		dev_warn(adev->dev, "Failed to disallow df cstate");
3386  
3387  	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3388  		if (!adev->ip_blocks[i].status.valid)
3389  			continue;
3390  
3391  		/* displays are handled separately */
3392  		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3393  			continue;
3394  
3395  		/* XXX handle errors */
3396  		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3397  		/* XXX handle errors */
3398  		if (r) {
3399  			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3400  				  adev->ip_blocks[i].version->funcs->name, r);
3401  			return r;
3402  		}
3403  
3404  		adev->ip_blocks[i].status.hw = false;
3405  	}
3406  
3407  	return 0;
3408  }
3409  
3410  /**
3411   * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3412   *
3413   * @adev: amdgpu_device pointer
3414   *
3415   * Main suspend function for hardware IPs.  The list of all the hardware
3416   * IPs that make up the asic is walked, clockgating is disabled and the
3417   * suspend callbacks are run.  suspend puts the hardware and software state
3418   * in each IP into a state suitable for suspend.
3419   * Returns 0 on success, negative error code on failure.
3420   */
3421  static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3422  {
3423  	int i, r;
3424  
3425  	if (adev->in_s0ix)
3426  		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3427  
3428  	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3429  		if (!adev->ip_blocks[i].status.valid)
3430  			continue;
3431  		/* displays are handled in phase1 */
3432  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3433  			continue;
3434  		/* PSP lost connection when err_event_athub occurs */
3435  		if (amdgpu_ras_intr_triggered() &&
3436  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3437  			adev->ip_blocks[i].status.hw = false;
3438  			continue;
3439  		}
3440  
3441  		/* skip unnecessary suspend if we do not initialize them yet */
3442  		if (adev->gmc.xgmi.pending_reset &&
3443  		    !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3444  		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3445  		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3446  		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3447  			adev->ip_blocks[i].status.hw = false;
3448  			continue;
3449  		}
3450  
3451  		/* skip suspend of gfx/mes and psp for S0ix
3452  		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3453  		 * like at runtime. PSP is also part of the always on hardware
3454  		 * so no need to suspend it.
3455  		 */
3456  		if (adev->in_s0ix &&
3457  		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3458  		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3459  		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3460  			continue;
3461  
3462  		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3463  		if (adev->in_s0ix &&
3464  		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3465  		     IP_VERSION(5, 0, 0)) &&
3466  		    (adev->ip_blocks[i].version->type ==
3467  		     AMD_IP_BLOCK_TYPE_SDMA))
3468  			continue;
3469  
3470  		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3471  		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3472  		 * from this location and RLC Autoload automatically also gets loaded
3473  		 * from here based on PMFW -> PSP message during re-init sequence.
3474  		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3475  		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3476  		 */
3477  		if (amdgpu_in_reset(adev) &&
3478  		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3479  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3480  			continue;
3481  
3482  		/* XXX handle errors */
3483  		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3484  		/* XXX handle errors */
3485  		if (r) {
3486  			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3487  				  adev->ip_blocks[i].version->funcs->name, r);
3488  		}
3489  		adev->ip_blocks[i].status.hw = false;
3490  		/* handle putting the SMC in the appropriate state */
3491  		if (!amdgpu_sriov_vf(adev)) {
3492  			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3493  				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3494  				if (r) {
3495  					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3496  							adev->mp1_state, r);
3497  					return r;
3498  				}
3499  			}
3500  		}
3501  	}
3502  
3503  	return 0;
3504  }
3505  
3506  /**
3507   * amdgpu_device_ip_suspend - run suspend for hardware IPs
3508   *
3509   * @adev: amdgpu_device pointer
3510   *
3511   * Main suspend function for hardware IPs.  The list of all the hardware
3512   * IPs that make up the asic is walked, clockgating is disabled and the
3513   * suspend callbacks are run.  suspend puts the hardware and software state
3514   * in each IP into a state suitable for suspend.
3515   * Returns 0 on success, negative error code on failure.
3516   */
3517  int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3518  {
3519  	int r;
3520  
3521  	if (amdgpu_sriov_vf(adev)) {
3522  		amdgpu_virt_fini_data_exchange(adev);
3523  		amdgpu_virt_request_full_gpu(adev, false);
3524  	}
3525  
3526  	amdgpu_ttm_set_buffer_funcs_status(adev, false);
3527  
3528  	r = amdgpu_device_ip_suspend_phase1(adev);
3529  	if (r)
3530  		return r;
3531  	r = amdgpu_device_ip_suspend_phase2(adev);
3532  
3533  	if (amdgpu_sriov_vf(adev))
3534  		amdgpu_virt_release_full_gpu(adev, false);
3535  
3536  	return r;
3537  }
3538  
3539  static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3540  {
3541  	int i, r;
3542  
3543  	static enum amd_ip_block_type ip_order[] = {
3544  		AMD_IP_BLOCK_TYPE_COMMON,
3545  		AMD_IP_BLOCK_TYPE_GMC,
3546  		AMD_IP_BLOCK_TYPE_PSP,
3547  		AMD_IP_BLOCK_TYPE_IH,
3548  	};
3549  
3550  	for (i = 0; i < adev->num_ip_blocks; i++) {
3551  		int j;
3552  		struct amdgpu_ip_block *block;
3553  
3554  		block = &adev->ip_blocks[i];
3555  		block->status.hw = false;
3556  
3557  		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3558  
3559  			if (block->version->type != ip_order[j] ||
3560  				!block->status.valid)
3561  				continue;
3562  
3563  			r = block->version->funcs->hw_init(adev);
3564  			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3565  			if (r)
3566  				return r;
3567  			block->status.hw = true;
3568  		}
3569  	}
3570  
3571  	return 0;
3572  }
3573  
3574  static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3575  {
3576  	int i, r;
3577  
3578  	static enum amd_ip_block_type ip_order[] = {
3579  		AMD_IP_BLOCK_TYPE_SMC,
3580  		AMD_IP_BLOCK_TYPE_DCE,
3581  		AMD_IP_BLOCK_TYPE_GFX,
3582  		AMD_IP_BLOCK_TYPE_SDMA,
3583  		AMD_IP_BLOCK_TYPE_MES,
3584  		AMD_IP_BLOCK_TYPE_UVD,
3585  		AMD_IP_BLOCK_TYPE_VCE,
3586  		AMD_IP_BLOCK_TYPE_VCN,
3587  		AMD_IP_BLOCK_TYPE_JPEG
3588  	};
3589  
3590  	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3591  		int j;
3592  		struct amdgpu_ip_block *block;
3593  
3594  		for (j = 0; j < adev->num_ip_blocks; j++) {
3595  			block = &adev->ip_blocks[j];
3596  
3597  			if (block->version->type != ip_order[i] ||
3598  				!block->status.valid ||
3599  				block->status.hw)
3600  				continue;
3601  
3602  			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3603  				r = block->version->funcs->resume(adev);
3604  			else
3605  				r = block->version->funcs->hw_init(adev);
3606  
3607  			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3608  			if (r)
3609  				return r;
3610  			block->status.hw = true;
3611  		}
3612  	}
3613  
3614  	return 0;
3615  }
3616  
3617  /**
3618   * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3619   *
3620   * @adev: amdgpu_device pointer
3621   *
3622   * First resume function for hardware IPs.  The list of all the hardware
3623   * IPs that make up the asic is walked and the resume callbacks are run for
3624   * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3625   * after a suspend and updates the software state as necessary.  This
3626   * function is also used for restoring the GPU after a GPU reset.
3627   * Returns 0 on success, negative error code on failure.
3628   */
3629  static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3630  {
3631  	int i, r;
3632  
3633  	for (i = 0; i < adev->num_ip_blocks; i++) {
3634  		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3635  			continue;
3636  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3637  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3638  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3639  		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3640  
3641  			r = adev->ip_blocks[i].version->funcs->resume(adev);
3642  			if (r) {
3643  				DRM_ERROR("resume of IP block <%s> failed %d\n",
3644  					  adev->ip_blocks[i].version->funcs->name, r);
3645  				return r;
3646  			}
3647  			adev->ip_blocks[i].status.hw = true;
3648  		}
3649  	}
3650  
3651  	return 0;
3652  }
3653  
3654  /**
3655   * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3656   *
3657   * @adev: amdgpu_device pointer
3658   *
3659   * First resume function for hardware IPs.  The list of all the hardware
3660   * IPs that make up the asic is walked and the resume callbacks are run for
3661   * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3662   * functional state after a suspend and updates the software state as
3663   * necessary.  This function is also used for restoring the GPU after a GPU
3664   * reset.
3665   * Returns 0 on success, negative error code on failure.
3666   */
3667  static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3668  {
3669  	int i, r;
3670  
3671  	for (i = 0; i < adev->num_ip_blocks; i++) {
3672  		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3673  			continue;
3674  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3675  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3676  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3677  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3678  			continue;
3679  		r = adev->ip_blocks[i].version->funcs->resume(adev);
3680  		if (r) {
3681  			DRM_ERROR("resume of IP block <%s> failed %d\n",
3682  				  adev->ip_blocks[i].version->funcs->name, r);
3683  			return r;
3684  		}
3685  		adev->ip_blocks[i].status.hw = true;
3686  	}
3687  
3688  	return 0;
3689  }
3690  
3691  /**
3692   * amdgpu_device_ip_resume - run resume for hardware IPs
3693   *
3694   * @adev: amdgpu_device pointer
3695   *
3696   * Main resume function for hardware IPs.  The hardware IPs
3697   * are split into two resume functions because they are
3698   * also used in recovering from a GPU reset and some additional
3699   * steps need to be take between them.  In this case (S3/S4) they are
3700   * run sequentially.
3701   * Returns 0 on success, negative error code on failure.
3702   */
3703  static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3704  {
3705  	int r;
3706  
3707  	r = amdgpu_device_ip_resume_phase1(adev);
3708  	if (r)
3709  		return r;
3710  
3711  	r = amdgpu_device_fw_loading(adev);
3712  	if (r)
3713  		return r;
3714  
3715  	r = amdgpu_device_ip_resume_phase2(adev);
3716  
3717  	if (adev->mman.buffer_funcs_ring->sched.ready)
3718  		amdgpu_ttm_set_buffer_funcs_status(adev, true);
3719  
3720  	return r;
3721  }
3722  
3723  /**
3724   * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3725   *
3726   * @adev: amdgpu_device pointer
3727   *
3728   * Query the VBIOS data tables to determine if the board supports SR-IOV.
3729   */
3730  static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3731  {
3732  	if (amdgpu_sriov_vf(adev)) {
3733  		if (adev->is_atom_fw) {
3734  			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3735  				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3736  		} else {
3737  			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3738  				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3739  		}
3740  
3741  		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3742  			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3743  	}
3744  }
3745  
3746  /**
3747   * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3748   *
3749   * @asic_type: AMD asic type
3750   *
3751   * Check if there is DC (new modesetting infrastructre) support for an asic.
3752   * returns true if DC has support, false if not.
3753   */
3754  bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3755  {
3756  	switch (asic_type) {
3757  #ifdef CONFIG_DRM_AMDGPU_SI
3758  	case CHIP_HAINAN:
3759  #endif
3760  	case CHIP_TOPAZ:
3761  		/* chips with no display hardware */
3762  		return false;
3763  #if defined(CONFIG_DRM_AMD_DC)
3764  	case CHIP_TAHITI:
3765  	case CHIP_PITCAIRN:
3766  	case CHIP_VERDE:
3767  	case CHIP_OLAND:
3768  		/*
3769  		 * We have systems in the wild with these ASICs that require
3770  		 * LVDS and VGA support which is not supported with DC.
3771  		 *
3772  		 * Fallback to the non-DC driver here by default so as not to
3773  		 * cause regressions.
3774  		 */
3775  #if defined(CONFIG_DRM_AMD_DC_SI)
3776  		return amdgpu_dc > 0;
3777  #else
3778  		return false;
3779  #endif
3780  	case CHIP_BONAIRE:
3781  	case CHIP_KAVERI:
3782  	case CHIP_KABINI:
3783  	case CHIP_MULLINS:
3784  		/*
3785  		 * We have systems in the wild with these ASICs that require
3786  		 * VGA support which is not supported with DC.
3787  		 *
3788  		 * Fallback to the non-DC driver here by default so as not to
3789  		 * cause regressions.
3790  		 */
3791  		return amdgpu_dc > 0;
3792  	default:
3793  		return amdgpu_dc != 0;
3794  #else
3795  	default:
3796  		if (amdgpu_dc > 0)
3797  			DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3798  		return false;
3799  #endif
3800  	}
3801  }
3802  
3803  /**
3804   * amdgpu_device_has_dc_support - check if dc is supported
3805   *
3806   * @adev: amdgpu_device pointer
3807   *
3808   * Returns true for supported, false for not supported
3809   */
3810  bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3811  {
3812  	if (adev->enable_virtual_display ||
3813  	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3814  		return false;
3815  
3816  	return amdgpu_device_asic_has_dc_support(adev->asic_type);
3817  }
3818  
3819  static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3820  {
3821  	struct amdgpu_device *adev =
3822  		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3823  	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3824  
3825  	/* It's a bug to not have a hive within this function */
3826  	if (WARN_ON(!hive))
3827  		return;
3828  
3829  	/*
3830  	 * Use task barrier to synchronize all xgmi reset works across the
3831  	 * hive. task_barrier_enter and task_barrier_exit will block
3832  	 * until all the threads running the xgmi reset works reach
3833  	 * those points. task_barrier_full will do both blocks.
3834  	 */
3835  	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3836  
3837  		task_barrier_enter(&hive->tb);
3838  		adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3839  
3840  		if (adev->asic_reset_res)
3841  			goto fail;
3842  
3843  		task_barrier_exit(&hive->tb);
3844  		adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3845  
3846  		if (adev->asic_reset_res)
3847  			goto fail;
3848  
3849  		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3850  	} else {
3851  
3852  		task_barrier_full(&hive->tb);
3853  		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3854  	}
3855  
3856  fail:
3857  	if (adev->asic_reset_res)
3858  		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3859  			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3860  	amdgpu_put_xgmi_hive(hive);
3861  }
3862  
3863  static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3864  {
3865  	char *input = amdgpu_lockup_timeout;
3866  	char *timeout_setting = NULL;
3867  	int index = 0;
3868  	long timeout;
3869  	int ret = 0;
3870  
3871  	/*
3872  	 * By default timeout for non compute jobs is 10000
3873  	 * and 60000 for compute jobs.
3874  	 * In SR-IOV or passthrough mode, timeout for compute
3875  	 * jobs are 60000 by default.
3876  	 */
3877  	adev->gfx_timeout = msecs_to_jiffies(10000);
3878  	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3879  	if (amdgpu_sriov_vf(adev))
3880  		adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3881  					msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3882  	else
3883  		adev->compute_timeout =  msecs_to_jiffies(60000);
3884  
3885  	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3886  		while ((timeout_setting = strsep(&input, ",")) &&
3887  				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3888  			ret = kstrtol(timeout_setting, 0, &timeout);
3889  			if (ret)
3890  				return ret;
3891  
3892  			if (timeout == 0) {
3893  				index++;
3894  				continue;
3895  			} else if (timeout < 0) {
3896  				timeout = MAX_SCHEDULE_TIMEOUT;
3897  				dev_warn(adev->dev, "lockup timeout disabled");
3898  				add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3899  			} else {
3900  				timeout = msecs_to_jiffies(timeout);
3901  			}
3902  
3903  			switch (index++) {
3904  			case 0:
3905  				adev->gfx_timeout = timeout;
3906  				break;
3907  			case 1:
3908  				adev->compute_timeout = timeout;
3909  				break;
3910  			case 2:
3911  				adev->sdma_timeout = timeout;
3912  				break;
3913  			case 3:
3914  				adev->video_timeout = timeout;
3915  				break;
3916  			default:
3917  				break;
3918  			}
3919  		}
3920  		/*
3921  		 * There is only one value specified and
3922  		 * it should apply to all non-compute jobs.
3923  		 */
3924  		if (index == 1) {
3925  			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3926  			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3927  				adev->compute_timeout = adev->gfx_timeout;
3928  		}
3929  	}
3930  
3931  	return ret;
3932  }
3933  
3934  /**
3935   * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3936   *
3937   * @adev: amdgpu_device pointer
3938   *
3939   * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3940   */
3941  static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3942  {
3943  	struct iommu_domain *domain;
3944  
3945  	domain = iommu_get_domain_for_dev(adev->dev);
3946  	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3947  		adev->ram_is_direct_mapped = true;
3948  }
3949  
3950  static const struct attribute *amdgpu_dev_attributes[] = {
3951  	&dev_attr_pcie_replay_count.attr,
3952  	NULL
3953  };
3954  
3955  static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3956  {
3957  	if (amdgpu_mcbp == 1)
3958  		adev->gfx.mcbp = true;
3959  	else if (amdgpu_mcbp == 0)
3960  		adev->gfx.mcbp = false;
3961  
3962  	if (amdgpu_sriov_vf(adev))
3963  		adev->gfx.mcbp = true;
3964  
3965  	if (adev->gfx.mcbp)
3966  		DRM_INFO("MCBP is enabled\n");
3967  }
3968  
3969  /**
3970   * amdgpu_device_init - initialize the driver
3971   *
3972   * @adev: amdgpu_device pointer
3973   * @flags: driver flags
3974   *
3975   * Initializes the driver info and hw (all asics).
3976   * Returns 0 for success or an error on failure.
3977   * Called at driver startup.
3978   */
3979  int amdgpu_device_init(struct amdgpu_device *adev,
3980  		       uint32_t flags)
3981  {
3982  	struct drm_device *ddev = adev_to_drm(adev);
3983  	struct pci_dev *pdev = adev->pdev;
3984  	int r, i;
3985  	bool px = false;
3986  	u32 max_MBps;
3987  	int tmp;
3988  
3989  	adev->shutdown = false;
3990  	adev->flags = flags;
3991  
3992  	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3993  		adev->asic_type = amdgpu_force_asic_type;
3994  	else
3995  		adev->asic_type = flags & AMD_ASIC_MASK;
3996  
3997  	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3998  	if (amdgpu_emu_mode == 1)
3999  		adev->usec_timeout *= 10;
4000  	adev->gmc.gart_size = 512 * 1024 * 1024;
4001  	adev->accel_working = false;
4002  	adev->num_rings = 0;
4003  	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
4004  	adev->mman.buffer_funcs = NULL;
4005  	adev->mman.buffer_funcs_ring = NULL;
4006  	adev->vm_manager.vm_pte_funcs = NULL;
4007  	adev->vm_manager.vm_pte_num_scheds = 0;
4008  	adev->gmc.gmc_funcs = NULL;
4009  	adev->harvest_ip_mask = 0x0;
4010  	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
4011  	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
4012  
4013  	adev->smc_rreg = &amdgpu_invalid_rreg;
4014  	adev->smc_wreg = &amdgpu_invalid_wreg;
4015  	adev->pcie_rreg = &amdgpu_invalid_rreg;
4016  	adev->pcie_wreg = &amdgpu_invalid_wreg;
4017  	adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
4018  	adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
4019  	adev->pciep_rreg = &amdgpu_invalid_rreg;
4020  	adev->pciep_wreg = &amdgpu_invalid_wreg;
4021  	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
4022  	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
4023  	adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
4024  	adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
4025  	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
4026  	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
4027  	adev->didt_rreg = &amdgpu_invalid_rreg;
4028  	adev->didt_wreg = &amdgpu_invalid_wreg;
4029  	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
4030  	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
4031  	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
4032  	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
4033  
4034  	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
4035  		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
4036  		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
4037  
4038  	/* mutex initialization are all done here so we
4039  	 * can recall function without having locking issues
4040  	 */
4041  	mutex_init(&adev->firmware.mutex);
4042  	mutex_init(&adev->pm.mutex);
4043  	mutex_init(&adev->gfx.gpu_clock_mutex);
4044  	mutex_init(&adev->srbm_mutex);
4045  	mutex_init(&adev->gfx.pipe_reserve_mutex);
4046  	mutex_init(&adev->gfx.gfx_off_mutex);
4047  	mutex_init(&adev->gfx.partition_mutex);
4048  	mutex_init(&adev->grbm_idx_mutex);
4049  	mutex_init(&adev->mn_lock);
4050  	mutex_init(&adev->virt.vf_errors.lock);
4051  	hash_init(adev->mn_hash);
4052  	mutex_init(&adev->psp.mutex);
4053  	mutex_init(&adev->notifier_lock);
4054  	mutex_init(&adev->pm.stable_pstate_ctx_lock);
4055  	mutex_init(&adev->benchmark_mutex);
4056  
4057  	amdgpu_device_init_apu_flags(adev);
4058  
4059  	r = amdgpu_device_check_arguments(adev);
4060  	if (r)
4061  		return r;
4062  
4063  	spin_lock_init(&adev->mmio_idx_lock);
4064  	spin_lock_init(&adev->smc_idx_lock);
4065  	spin_lock_init(&adev->pcie_idx_lock);
4066  	spin_lock_init(&adev->uvd_ctx_idx_lock);
4067  	spin_lock_init(&adev->didt_idx_lock);
4068  	spin_lock_init(&adev->gc_cac_idx_lock);
4069  	spin_lock_init(&adev->se_cac_idx_lock);
4070  	spin_lock_init(&adev->audio_endpt_idx_lock);
4071  	spin_lock_init(&adev->mm_stats.lock);
4072  	spin_lock_init(&adev->wb.lock);
4073  
4074  	INIT_LIST_HEAD(&adev->shadow_list);
4075  	mutex_init(&adev->shadow_list_lock);
4076  
4077  	INIT_LIST_HEAD(&adev->reset_list);
4078  
4079  	INIT_LIST_HEAD(&adev->ras_list);
4080  
4081  	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
4082  
4083  	INIT_DELAYED_WORK(&adev->delayed_init_work,
4084  			  amdgpu_device_delayed_init_work_handler);
4085  	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
4086  			  amdgpu_device_delay_enable_gfx_off);
4087  
4088  	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
4089  
4090  	adev->gfx.gfx_off_req_count = 1;
4091  	adev->gfx.gfx_off_residency = 0;
4092  	adev->gfx.gfx_off_entrycount = 0;
4093  	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
4094  
4095  	atomic_set(&adev->throttling_logging_enabled, 1);
4096  	/*
4097  	 * If throttling continues, logging will be performed every minute
4098  	 * to avoid log flooding. "-1" is subtracted since the thermal
4099  	 * throttling interrupt comes every second. Thus, the total logging
4100  	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
4101  	 * for throttling interrupt) = 60 seconds.
4102  	 */
4103  	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
4104  	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4105  
4106  	/* Registers mapping */
4107  	/* TODO: block userspace mapping of io register */
4108  	if (adev->asic_type >= CHIP_BONAIRE) {
4109  		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4110  		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4111  	} else {
4112  		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4113  		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4114  	}
4115  
4116  	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4117  		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4118  
4119  	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
4120  	if (!adev->rmmio)
4121  		return -ENOMEM;
4122  
4123  	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
4124  	DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
4125  
4126  	/*
4127  	 * Reset domain needs to be present early, before XGMI hive discovered
4128  	 * (if any) and intitialized to use reset sem and in_gpu reset flag
4129  	 * early on during init and before calling to RREG32.
4130  	 */
4131  	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
4132  	if (!adev->reset_domain)
4133  		return -ENOMEM;
4134  
4135  	/* detect hw virtualization here */
4136  	amdgpu_detect_virtualization(adev);
4137  
4138  	amdgpu_device_get_pcie_info(adev);
4139  
4140  	r = amdgpu_device_get_job_timeout_settings(adev);
4141  	if (r) {
4142  		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4143  		return r;
4144  	}
4145  
4146  	amdgpu_device_set_mcbp(adev);
4147  
4148  	/* early init functions */
4149  	r = amdgpu_device_ip_early_init(adev);
4150  	if (r)
4151  		return r;
4152  
4153  	/* Get rid of things like offb */
4154  	r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
4155  	if (r)
4156  		return r;
4157  
4158  	/* Enable TMZ based on IP_VERSION */
4159  	amdgpu_gmc_tmz_set(adev);
4160  
4161  	if (amdgpu_sriov_vf(adev) &&
4162  	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
4163  		/* VF MMIO access (except mailbox range) from CPU
4164  		 * will be blocked during sriov runtime
4165  		 */
4166  		adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
4167  
4168  	amdgpu_gmc_noretry_set(adev);
4169  	/* Need to get xgmi info early to decide the reset behavior*/
4170  	if (adev->gmc.xgmi.supported) {
4171  		r = adev->gfxhub.funcs->get_xgmi_info(adev);
4172  		if (r)
4173  			return r;
4174  	}
4175  
4176  	/* enable PCIE atomic ops */
4177  	if (amdgpu_sriov_vf(adev)) {
4178  		if (adev->virt.fw_reserve.p_pf2vf)
4179  			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4180  						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4181  				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4182  	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4183  	 * internal path natively support atomics, set have_atomics_support to true.
4184  	 */
4185  	} else if ((adev->flags & AMD_IS_APU) &&
4186  		   (amdgpu_ip_version(adev, GC_HWIP, 0) >
4187  		    IP_VERSION(9, 0, 0))) {
4188  		adev->have_atomics_support = true;
4189  	} else {
4190  		adev->have_atomics_support =
4191  			!pci_enable_atomic_ops_to_root(adev->pdev,
4192  					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4193  					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4194  	}
4195  
4196  	if (!adev->have_atomics_support)
4197  		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4198  
4199  	/* doorbell bar mapping and doorbell index init*/
4200  	amdgpu_doorbell_init(adev);
4201  
4202  	if (amdgpu_emu_mode == 1) {
4203  		/* post the asic on emulation mode */
4204  		emu_soc_asic_init(adev);
4205  		goto fence_driver_init;
4206  	}
4207  
4208  	amdgpu_reset_init(adev);
4209  
4210  	/* detect if we are with an SRIOV vbios */
4211  	if (adev->bios)
4212  		amdgpu_device_detect_sriov_bios(adev);
4213  
4214  	/* check if we need to reset the asic
4215  	 *  E.g., driver was not cleanly unloaded previously, etc.
4216  	 */
4217  	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4218  		if (adev->gmc.xgmi.num_physical_nodes) {
4219  			dev_info(adev->dev, "Pending hive reset.\n");
4220  			adev->gmc.xgmi.pending_reset = true;
4221  			/* Only need to init necessary block for SMU to handle the reset */
4222  			for (i = 0; i < adev->num_ip_blocks; i++) {
4223  				if (!adev->ip_blocks[i].status.valid)
4224  					continue;
4225  				if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
4226  				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
4227  				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
4228  				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
4229  					DRM_DEBUG("IP %s disabled for hw_init.\n",
4230  						adev->ip_blocks[i].version->funcs->name);
4231  					adev->ip_blocks[i].status.hw = true;
4232  				}
4233  			}
4234  		} else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
4235  				   !amdgpu_device_has_display_hardware(adev)) {
4236  					r = psp_gpu_reset(adev);
4237  		} else {
4238  				tmp = amdgpu_reset_method;
4239  				/* It should do a default reset when loading or reloading the driver,
4240  				 * regardless of the module parameter reset_method.
4241  				 */
4242  				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4243  				r = amdgpu_asic_reset(adev);
4244  				amdgpu_reset_method = tmp;
4245  		}
4246  
4247  		if (r) {
4248  		  dev_err(adev->dev, "asic reset on init failed\n");
4249  		  goto failed;
4250  		}
4251  	}
4252  
4253  	/* Post card if necessary */
4254  	if (amdgpu_device_need_post(adev)) {
4255  		if (!adev->bios) {
4256  			dev_err(adev->dev, "no vBIOS found\n");
4257  			r = -EINVAL;
4258  			goto failed;
4259  		}
4260  		DRM_INFO("GPU posting now...\n");
4261  		r = amdgpu_device_asic_init(adev);
4262  		if (r) {
4263  			dev_err(adev->dev, "gpu post error!\n");
4264  			goto failed;
4265  		}
4266  	}
4267  
4268  	if (adev->bios) {
4269  		if (adev->is_atom_fw) {
4270  			/* Initialize clocks */
4271  			r = amdgpu_atomfirmware_get_clock_info(adev);
4272  			if (r) {
4273  				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4274  				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4275  				goto failed;
4276  			}
4277  		} else {
4278  			/* Initialize clocks */
4279  			r = amdgpu_atombios_get_clock_info(adev);
4280  			if (r) {
4281  				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4282  				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4283  				goto failed;
4284  			}
4285  			/* init i2c buses */
4286  			if (!amdgpu_device_has_dc_support(adev))
4287  				amdgpu_atombios_i2c_init(adev);
4288  		}
4289  	}
4290  
4291  fence_driver_init:
4292  	/* Fence driver */
4293  	r = amdgpu_fence_driver_sw_init(adev);
4294  	if (r) {
4295  		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4296  		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4297  		goto failed;
4298  	}
4299  
4300  	/* init the mode config */
4301  	drm_mode_config_init(adev_to_drm(adev));
4302  
4303  	r = amdgpu_device_ip_init(adev);
4304  	if (r) {
4305  		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4306  		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4307  		goto release_ras_con;
4308  	}
4309  
4310  	amdgpu_fence_driver_hw_init(adev);
4311  
4312  	dev_info(adev->dev,
4313  		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4314  			adev->gfx.config.max_shader_engines,
4315  			adev->gfx.config.max_sh_per_se,
4316  			adev->gfx.config.max_cu_per_sh,
4317  			adev->gfx.cu_info.number);
4318  
4319  	adev->accel_working = true;
4320  
4321  	amdgpu_vm_check_compute_bug(adev);
4322  
4323  	/* Initialize the buffer migration limit. */
4324  	if (amdgpu_moverate >= 0)
4325  		max_MBps = amdgpu_moverate;
4326  	else
4327  		max_MBps = 8; /* Allow 8 MB/s. */
4328  	/* Get a log2 for easy divisions. */
4329  	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4330  
4331  	/*
4332  	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4333  	 * Otherwise the mgpu fan boost feature will be skipped due to the
4334  	 * gpu instance is counted less.
4335  	 */
4336  	amdgpu_register_gpu_instance(adev);
4337  
4338  	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4339  	 * explicit gating rather than handling it automatically.
4340  	 */
4341  	if (!adev->gmc.xgmi.pending_reset) {
4342  		r = amdgpu_device_ip_late_init(adev);
4343  		if (r) {
4344  			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4345  			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4346  			goto release_ras_con;
4347  		}
4348  		/* must succeed. */
4349  		amdgpu_ras_resume(adev);
4350  		queue_delayed_work(system_wq, &adev->delayed_init_work,
4351  				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4352  	}
4353  
4354  	if (amdgpu_sriov_vf(adev)) {
4355  		amdgpu_virt_release_full_gpu(adev, true);
4356  		flush_delayed_work(&adev->delayed_init_work);
4357  	}
4358  
4359  	/*
4360  	 * Place those sysfs registering after `late_init`. As some of those
4361  	 * operations performed in `late_init` might affect the sysfs
4362  	 * interfaces creating.
4363  	 */
4364  	r = amdgpu_atombios_sysfs_init(adev);
4365  	if (r)
4366  		drm_err(&adev->ddev,
4367  			"registering atombios sysfs failed (%d).\n", r);
4368  
4369  	r = amdgpu_pm_sysfs_init(adev);
4370  	if (r)
4371  		DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4372  
4373  	r = amdgpu_ucode_sysfs_init(adev);
4374  	if (r) {
4375  		adev->ucode_sysfs_en = false;
4376  		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4377  	} else
4378  		adev->ucode_sysfs_en = true;
4379  
4380  	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
4381  	if (r)
4382  		dev_err(adev->dev, "Could not create amdgpu device attr\n");
4383  
4384  	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4385  	if (r)
4386  		dev_err(adev->dev,
4387  			"Could not create amdgpu board attributes\n");
4388  
4389  	amdgpu_fru_sysfs_init(adev);
4390  	amdgpu_reg_state_sysfs_init(adev);
4391  
4392  	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4393  		r = amdgpu_pmu_init(adev);
4394  	if (r)
4395  		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4396  
4397  	/* Have stored pci confspace at hand for restore in sudden PCI error */
4398  	if (amdgpu_device_cache_pci_state(adev->pdev))
4399  		pci_restore_state(pdev);
4400  
4401  	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4402  	/* this will fail for cards that aren't VGA class devices, just
4403  	 * ignore it
4404  	 */
4405  	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4406  		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4407  
4408  	px = amdgpu_device_supports_px(ddev);
4409  
4410  	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4411  				apple_gmux_detect(NULL, NULL)))
4412  		vga_switcheroo_register_client(adev->pdev,
4413  					       &amdgpu_switcheroo_ops, px);
4414  
4415  	if (px)
4416  		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4417  
4418  	if (adev->gmc.xgmi.pending_reset)
4419  		queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4420  				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4421  
4422  	amdgpu_device_check_iommu_direct_map(adev);
4423  
4424  	return 0;
4425  
4426  release_ras_con:
4427  	if (amdgpu_sriov_vf(adev))
4428  		amdgpu_virt_release_full_gpu(adev, true);
4429  
4430  	/* failed in exclusive mode due to timeout */
4431  	if (amdgpu_sriov_vf(adev) &&
4432  		!amdgpu_sriov_runtime(adev) &&
4433  		amdgpu_virt_mmio_blocked(adev) &&
4434  		!amdgpu_virt_wait_reset(adev)) {
4435  		dev_err(adev->dev, "VF exclusive mode timeout\n");
4436  		/* Don't send request since VF is inactive. */
4437  		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4438  		adev->virt.ops = NULL;
4439  		r = -EAGAIN;
4440  	}
4441  	amdgpu_release_ras_context(adev);
4442  
4443  failed:
4444  	amdgpu_vf_error_trans_all(adev);
4445  
4446  	return r;
4447  }
4448  
4449  static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4450  {
4451  
4452  	/* Clear all CPU mappings pointing to this device */
4453  	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4454  
4455  	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4456  	amdgpu_doorbell_fini(adev);
4457  
4458  	iounmap(adev->rmmio);
4459  	adev->rmmio = NULL;
4460  	if (adev->mman.aper_base_kaddr)
4461  		iounmap(adev->mman.aper_base_kaddr);
4462  	adev->mman.aper_base_kaddr = NULL;
4463  
4464  	/* Memory manager related */
4465  	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4466  		arch_phys_wc_del(adev->gmc.vram_mtrr);
4467  		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4468  	}
4469  }
4470  
4471  /**
4472   * amdgpu_device_fini_hw - tear down the driver
4473   *
4474   * @adev: amdgpu_device pointer
4475   *
4476   * Tear down the driver info (all asics).
4477   * Called at driver shutdown.
4478   */
4479  void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4480  {
4481  	dev_info(adev->dev, "amdgpu: finishing device.\n");
4482  	flush_delayed_work(&adev->delayed_init_work);
4483  	adev->shutdown = true;
4484  
4485  	/* make sure IB test finished before entering exclusive mode
4486  	 * to avoid preemption on IB test
4487  	 */
4488  	if (amdgpu_sriov_vf(adev)) {
4489  		amdgpu_virt_request_full_gpu(adev, false);
4490  		amdgpu_virt_fini_data_exchange(adev);
4491  	}
4492  
4493  	/* disable all interrupts */
4494  	amdgpu_irq_disable_all(adev);
4495  	if (adev->mode_info.mode_config_initialized) {
4496  		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4497  			drm_helper_force_disable_all(adev_to_drm(adev));
4498  		else
4499  			drm_atomic_helper_shutdown(adev_to_drm(adev));
4500  	}
4501  	amdgpu_fence_driver_hw_fini(adev);
4502  
4503  	if (adev->mman.initialized)
4504  		drain_workqueue(adev->mman.bdev.wq);
4505  
4506  	if (adev->pm.sysfs_initialized)
4507  		amdgpu_pm_sysfs_fini(adev);
4508  	if (adev->ucode_sysfs_en)
4509  		amdgpu_ucode_sysfs_fini(adev);
4510  	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4511  	amdgpu_fru_sysfs_fini(adev);
4512  
4513  	amdgpu_reg_state_sysfs_fini(adev);
4514  
4515  	/* disable ras feature must before hw fini */
4516  	amdgpu_ras_pre_fini(adev);
4517  
4518  	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4519  
4520  	amdgpu_device_ip_fini_early(adev);
4521  
4522  	amdgpu_irq_fini_hw(adev);
4523  
4524  	if (adev->mman.initialized)
4525  		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4526  
4527  	amdgpu_gart_dummy_page_fini(adev);
4528  
4529  	if (drm_dev_is_unplugged(adev_to_drm(adev)))
4530  		amdgpu_device_unmap_mmio(adev);
4531  
4532  }
4533  
4534  void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4535  {
4536  	int idx;
4537  	bool px;
4538  
4539  	amdgpu_fence_driver_sw_fini(adev);
4540  	amdgpu_device_ip_fini(adev);
4541  	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4542  	adev->accel_working = false;
4543  	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4544  
4545  	amdgpu_reset_fini(adev);
4546  
4547  	/* free i2c buses */
4548  	if (!amdgpu_device_has_dc_support(adev))
4549  		amdgpu_i2c_fini(adev);
4550  
4551  	if (amdgpu_emu_mode != 1)
4552  		amdgpu_atombios_fini(adev);
4553  
4554  	kfree(adev->bios);
4555  	adev->bios = NULL;
4556  
4557  	kfree(adev->fru_info);
4558  	adev->fru_info = NULL;
4559  
4560  	px = amdgpu_device_supports_px(adev_to_drm(adev));
4561  
4562  	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4563  				apple_gmux_detect(NULL, NULL)))
4564  		vga_switcheroo_unregister_client(adev->pdev);
4565  
4566  	if (px)
4567  		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4568  
4569  	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4570  		vga_client_unregister(adev->pdev);
4571  
4572  	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4573  
4574  		iounmap(adev->rmmio);
4575  		adev->rmmio = NULL;
4576  		amdgpu_doorbell_fini(adev);
4577  		drm_dev_exit(idx);
4578  	}
4579  
4580  	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4581  		amdgpu_pmu_fini(adev);
4582  	if (adev->mman.discovery_bin)
4583  		amdgpu_discovery_fini(adev);
4584  
4585  	amdgpu_reset_put_reset_domain(adev->reset_domain);
4586  	adev->reset_domain = NULL;
4587  
4588  	kfree(adev->pci_state);
4589  
4590  }
4591  
4592  /**
4593   * amdgpu_device_evict_resources - evict device resources
4594   * @adev: amdgpu device object
4595   *
4596   * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4597   * of the vram memory type. Mainly used for evicting device resources
4598   * at suspend time.
4599   *
4600   */
4601  static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4602  {
4603  	int ret;
4604  
4605  	/* No need to evict vram on APUs for suspend to ram or s2idle */
4606  	if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4607  		return 0;
4608  
4609  	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4610  	if (ret)
4611  		DRM_WARN("evicting device resources failed\n");
4612  	return ret;
4613  }
4614  
4615  /*
4616   * Suspend & resume.
4617   */
4618  /**
4619   * amdgpu_device_prepare - prepare for device suspend
4620   *
4621   * @dev: drm dev pointer
4622   *
4623   * Prepare to put the hw in the suspend state (all asics).
4624   * Returns 0 for success or an error on failure.
4625   * Called at driver suspend.
4626   */
4627  int amdgpu_device_prepare(struct drm_device *dev)
4628  {
4629  	struct amdgpu_device *adev = drm_to_adev(dev);
4630  	int i, r;
4631  
4632  	amdgpu_choose_low_power_state(adev);
4633  
4634  	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4635  		return 0;
4636  
4637  	/* Evict the majority of BOs before starting suspend sequence */
4638  	r = amdgpu_device_evict_resources(adev);
4639  	if (r)
4640  		goto unprepare;
4641  
4642  	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4643  
4644  	for (i = 0; i < adev->num_ip_blocks; i++) {
4645  		if (!adev->ip_blocks[i].status.valid)
4646  			continue;
4647  		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4648  			continue;
4649  		r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4650  		if (r)
4651  			goto unprepare;
4652  	}
4653  
4654  	return 0;
4655  
4656  unprepare:
4657  	adev->in_s0ix = adev->in_s3 = false;
4658  
4659  	return r;
4660  }
4661  
4662  /**
4663   * amdgpu_device_suspend - initiate device suspend
4664   *
4665   * @dev: drm dev pointer
4666   * @fbcon : notify the fbdev of suspend
4667   *
4668   * Puts the hw in the suspend state (all asics).
4669   * Returns 0 for success or an error on failure.
4670   * Called at driver suspend.
4671   */
4672  int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4673  {
4674  	struct amdgpu_device *adev = drm_to_adev(dev);
4675  	int r = 0;
4676  
4677  	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4678  		return 0;
4679  
4680  	adev->in_suspend = true;
4681  
4682  	if (amdgpu_sriov_vf(adev)) {
4683  		amdgpu_virt_fini_data_exchange(adev);
4684  		r = amdgpu_virt_request_full_gpu(adev, false);
4685  		if (r)
4686  			return r;
4687  	}
4688  
4689  	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4690  		DRM_WARN("smart shift update failed\n");
4691  
4692  	if (fbcon)
4693  		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4694  
4695  	cancel_delayed_work_sync(&adev->delayed_init_work);
4696  
4697  	amdgpu_ras_suspend(adev);
4698  
4699  	amdgpu_device_ip_suspend_phase1(adev);
4700  
4701  	if (!adev->in_s0ix)
4702  		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4703  
4704  	r = amdgpu_device_evict_resources(adev);
4705  	if (r)
4706  		return r;
4707  
4708  	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4709  
4710  	amdgpu_fence_driver_hw_fini(adev);
4711  
4712  	amdgpu_device_ip_suspend_phase2(adev);
4713  
4714  	if (amdgpu_sriov_vf(adev))
4715  		amdgpu_virt_release_full_gpu(adev, false);
4716  
4717  	r = amdgpu_dpm_notify_rlc_state(adev, false);
4718  	if (r)
4719  		return r;
4720  
4721  	return 0;
4722  }
4723  
4724  /**
4725   * amdgpu_device_resume - initiate device resume
4726   *
4727   * @dev: drm dev pointer
4728   * @fbcon : notify the fbdev of resume
4729   *
4730   * Bring the hw back to operating state (all asics).
4731   * Returns 0 for success or an error on failure.
4732   * Called at driver resume.
4733   */
4734  int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4735  {
4736  	struct amdgpu_device *adev = drm_to_adev(dev);
4737  	int r = 0;
4738  
4739  	if (amdgpu_sriov_vf(adev)) {
4740  		r = amdgpu_virt_request_full_gpu(adev, true);
4741  		if (r)
4742  			return r;
4743  	}
4744  
4745  	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4746  		return 0;
4747  
4748  	if (adev->in_s0ix)
4749  		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4750  
4751  	/* post card */
4752  	if (amdgpu_device_need_post(adev)) {
4753  		r = amdgpu_device_asic_init(adev);
4754  		if (r)
4755  			dev_err(adev->dev, "amdgpu asic init failed\n");
4756  	}
4757  
4758  	r = amdgpu_device_ip_resume(adev);
4759  
4760  	if (r) {
4761  		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4762  		goto exit;
4763  	}
4764  	amdgpu_fence_driver_hw_init(adev);
4765  
4766  	if (!adev->in_s0ix) {
4767  		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4768  		if (r)
4769  			goto exit;
4770  	}
4771  
4772  	r = amdgpu_device_ip_late_init(adev);
4773  	if (r)
4774  		goto exit;
4775  
4776  	queue_delayed_work(system_wq, &adev->delayed_init_work,
4777  			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4778  exit:
4779  	if (amdgpu_sriov_vf(adev)) {
4780  		amdgpu_virt_init_data_exchange(adev);
4781  		amdgpu_virt_release_full_gpu(adev, true);
4782  	}
4783  
4784  	if (r)
4785  		return r;
4786  
4787  	/* Make sure IB tests flushed */
4788  	flush_delayed_work(&adev->delayed_init_work);
4789  
4790  	if (fbcon)
4791  		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4792  
4793  	amdgpu_ras_resume(adev);
4794  
4795  	if (adev->mode_info.num_crtc) {
4796  		/*
4797  		 * Most of the connector probing functions try to acquire runtime pm
4798  		 * refs to ensure that the GPU is powered on when connector polling is
4799  		 * performed. Since we're calling this from a runtime PM callback,
4800  		 * trying to acquire rpm refs will cause us to deadlock.
4801  		 *
4802  		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4803  		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4804  		 */
4805  #ifdef CONFIG_PM
4806  		dev->dev->power.disable_depth++;
4807  #endif
4808  		if (!adev->dc_enabled)
4809  			drm_helper_hpd_irq_event(dev);
4810  		else
4811  			drm_kms_helper_hotplug_event(dev);
4812  #ifdef CONFIG_PM
4813  		dev->dev->power.disable_depth--;
4814  #endif
4815  	}
4816  	adev->in_suspend = false;
4817  
4818  	if (adev->enable_mes)
4819  		amdgpu_mes_self_test(adev);
4820  
4821  	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4822  		DRM_WARN("smart shift update failed\n");
4823  
4824  	return 0;
4825  }
4826  
4827  /**
4828   * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4829   *
4830   * @adev: amdgpu_device pointer
4831   *
4832   * The list of all the hardware IPs that make up the asic is walked and
4833   * the check_soft_reset callbacks are run.  check_soft_reset determines
4834   * if the asic is still hung or not.
4835   * Returns true if any of the IPs are still in a hung state, false if not.
4836   */
4837  static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4838  {
4839  	int i;
4840  	bool asic_hang = false;
4841  
4842  	if (amdgpu_sriov_vf(adev))
4843  		return true;
4844  
4845  	if (amdgpu_asic_need_full_reset(adev))
4846  		return true;
4847  
4848  	for (i = 0; i < adev->num_ip_blocks; i++) {
4849  		if (!adev->ip_blocks[i].status.valid)
4850  			continue;
4851  		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4852  			adev->ip_blocks[i].status.hang =
4853  				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4854  		if (adev->ip_blocks[i].status.hang) {
4855  			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4856  			asic_hang = true;
4857  		}
4858  	}
4859  	return asic_hang;
4860  }
4861  
4862  /**
4863   * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4864   *
4865   * @adev: amdgpu_device pointer
4866   *
4867   * The list of all the hardware IPs that make up the asic is walked and the
4868   * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4869   * handles any IP specific hardware or software state changes that are
4870   * necessary for a soft reset to succeed.
4871   * Returns 0 on success, negative error code on failure.
4872   */
4873  static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4874  {
4875  	int i, r = 0;
4876  
4877  	for (i = 0; i < adev->num_ip_blocks; i++) {
4878  		if (!adev->ip_blocks[i].status.valid)
4879  			continue;
4880  		if (adev->ip_blocks[i].status.hang &&
4881  		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4882  			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4883  			if (r)
4884  				return r;
4885  		}
4886  	}
4887  
4888  	return 0;
4889  }
4890  
4891  /**
4892   * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4893   *
4894   * @adev: amdgpu_device pointer
4895   *
4896   * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4897   * reset is necessary to recover.
4898   * Returns true if a full asic reset is required, false if not.
4899   */
4900  static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4901  {
4902  	int i;
4903  
4904  	if (amdgpu_asic_need_full_reset(adev))
4905  		return true;
4906  
4907  	for (i = 0; i < adev->num_ip_blocks; i++) {
4908  		if (!adev->ip_blocks[i].status.valid)
4909  			continue;
4910  		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4911  		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4912  		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4913  		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4914  		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4915  			if (adev->ip_blocks[i].status.hang) {
4916  				dev_info(adev->dev, "Some block need full reset!\n");
4917  				return true;
4918  			}
4919  		}
4920  	}
4921  	return false;
4922  }
4923  
4924  /**
4925   * amdgpu_device_ip_soft_reset - do a soft reset
4926   *
4927   * @adev: amdgpu_device pointer
4928   *
4929   * The list of all the hardware IPs that make up the asic is walked and the
4930   * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4931   * IP specific hardware or software state changes that are necessary to soft
4932   * reset the IP.
4933   * Returns 0 on success, negative error code on failure.
4934   */
4935  static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4936  {
4937  	int i, r = 0;
4938  
4939  	for (i = 0; i < adev->num_ip_blocks; i++) {
4940  		if (!adev->ip_blocks[i].status.valid)
4941  			continue;
4942  		if (adev->ip_blocks[i].status.hang &&
4943  		    adev->ip_blocks[i].version->funcs->soft_reset) {
4944  			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4945  			if (r)
4946  				return r;
4947  		}
4948  	}
4949  
4950  	return 0;
4951  }
4952  
4953  /**
4954   * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4955   *
4956   * @adev: amdgpu_device pointer
4957   *
4958   * The list of all the hardware IPs that make up the asic is walked and the
4959   * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4960   * handles any IP specific hardware or software state changes that are
4961   * necessary after the IP has been soft reset.
4962   * Returns 0 on success, negative error code on failure.
4963   */
4964  static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4965  {
4966  	int i, r = 0;
4967  
4968  	for (i = 0; i < adev->num_ip_blocks; i++) {
4969  		if (!adev->ip_blocks[i].status.valid)
4970  			continue;
4971  		if (adev->ip_blocks[i].status.hang &&
4972  		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4973  			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4974  		if (r)
4975  			return r;
4976  	}
4977  
4978  	return 0;
4979  }
4980  
4981  /**
4982   * amdgpu_device_recover_vram - Recover some VRAM contents
4983   *
4984   * @adev: amdgpu_device pointer
4985   *
4986   * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4987   * restore things like GPUVM page tables after a GPU reset where
4988   * the contents of VRAM might be lost.
4989   *
4990   * Returns:
4991   * 0 on success, negative error code on failure.
4992   */
4993  static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4994  {
4995  	struct dma_fence *fence = NULL, *next = NULL;
4996  	struct amdgpu_bo *shadow;
4997  	struct amdgpu_bo_vm *vmbo;
4998  	long r = 1, tmo;
4999  
5000  	if (amdgpu_sriov_runtime(adev))
5001  		tmo = msecs_to_jiffies(8000);
5002  	else
5003  		tmo = msecs_to_jiffies(100);
5004  
5005  	dev_info(adev->dev, "recover vram bo from shadow start\n");
5006  	mutex_lock(&adev->shadow_list_lock);
5007  	list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
5008  		/* If vm is compute context or adev is APU, shadow will be NULL */
5009  		if (!vmbo->shadow)
5010  			continue;
5011  		shadow = vmbo->shadow;
5012  
5013  		/* No need to recover an evicted BO */
5014  		if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
5015  		    shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
5016  		    shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
5017  			continue;
5018  
5019  		r = amdgpu_bo_restore_shadow(shadow, &next);
5020  		if (r)
5021  			break;
5022  
5023  		if (fence) {
5024  			tmo = dma_fence_wait_timeout(fence, false, tmo);
5025  			dma_fence_put(fence);
5026  			fence = next;
5027  			if (tmo == 0) {
5028  				r = -ETIMEDOUT;
5029  				break;
5030  			} else if (tmo < 0) {
5031  				r = tmo;
5032  				break;
5033  			}
5034  		} else {
5035  			fence = next;
5036  		}
5037  	}
5038  	mutex_unlock(&adev->shadow_list_lock);
5039  
5040  	if (fence)
5041  		tmo = dma_fence_wait_timeout(fence, false, tmo);
5042  	dma_fence_put(fence);
5043  
5044  	if (r < 0 || tmo <= 0) {
5045  		dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
5046  		return -EIO;
5047  	}
5048  
5049  	dev_info(adev->dev, "recover vram bo from shadow done\n");
5050  	return 0;
5051  }
5052  
5053  
5054  /**
5055   * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5056   *
5057   * @adev: amdgpu_device pointer
5058   * @from_hypervisor: request from hypervisor
5059   *
5060   * do VF FLR and reinitialize Asic
5061   * return 0 means succeeded otherwise failed
5062   */
5063  static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
5064  				     bool from_hypervisor)
5065  {
5066  	int r;
5067  	struct amdgpu_hive_info *hive = NULL;
5068  	int retry_limit = 0;
5069  
5070  retry:
5071  	amdgpu_amdkfd_pre_reset(adev);
5072  
5073  	amdgpu_device_stop_pending_resets(adev);
5074  
5075  	if (from_hypervisor)
5076  		r = amdgpu_virt_request_full_gpu(adev, true);
5077  	else
5078  		r = amdgpu_virt_reset_gpu(adev);
5079  	if (r)
5080  		return r;
5081  	amdgpu_ras_set_fed(adev, false);
5082  	amdgpu_irq_gpu_reset_resume_helper(adev);
5083  
5084  	/* some sw clean up VF needs to do before recover */
5085  	amdgpu_virt_post_reset(adev);
5086  
5087  	/* Resume IP prior to SMC */
5088  	r = amdgpu_device_ip_reinit_early_sriov(adev);
5089  	if (r)
5090  		goto error;
5091  
5092  	amdgpu_virt_init_data_exchange(adev);
5093  
5094  	r = amdgpu_device_fw_loading(adev);
5095  	if (r)
5096  		return r;
5097  
5098  	/* now we are okay to resume SMC/CP/SDMA */
5099  	r = amdgpu_device_ip_reinit_late_sriov(adev);
5100  	if (r)
5101  		goto error;
5102  
5103  	hive = amdgpu_get_xgmi_hive(adev);
5104  	/* Update PSP FW topology after reset */
5105  	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
5106  		r = amdgpu_xgmi_update_topology(hive, adev);
5107  
5108  	if (hive)
5109  		amdgpu_put_xgmi_hive(hive);
5110  
5111  	if (!r) {
5112  		r = amdgpu_ib_ring_tests(adev);
5113  
5114  		amdgpu_amdkfd_post_reset(adev);
5115  	}
5116  
5117  error:
5118  	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
5119  		amdgpu_inc_vram_lost(adev);
5120  		r = amdgpu_device_recover_vram(adev);
5121  	}
5122  	amdgpu_virt_release_full_gpu(adev, true);
5123  
5124  	if (AMDGPU_RETRY_SRIOV_RESET(r)) {
5125  		if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
5126  			retry_limit++;
5127  			goto retry;
5128  		} else
5129  			DRM_ERROR("GPU reset retry is beyond the retry limit\n");
5130  	}
5131  
5132  	return r;
5133  }
5134  
5135  /**
5136   * amdgpu_device_has_job_running - check if there is any job in mirror list
5137   *
5138   * @adev: amdgpu_device pointer
5139   *
5140   * check if there is any job in mirror list
5141   */
5142  bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5143  {
5144  	int i;
5145  	struct drm_sched_job *job;
5146  
5147  	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5148  		struct amdgpu_ring *ring = adev->rings[i];
5149  
5150  		if (!amdgpu_ring_sched_ready(ring))
5151  			continue;
5152  
5153  		spin_lock(&ring->sched.job_list_lock);
5154  		job = list_first_entry_or_null(&ring->sched.pending_list,
5155  					       struct drm_sched_job, list);
5156  		spin_unlock(&ring->sched.job_list_lock);
5157  		if (job)
5158  			return true;
5159  	}
5160  	return false;
5161  }
5162  
5163  /**
5164   * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5165   *
5166   * @adev: amdgpu_device pointer
5167   *
5168   * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5169   * a hung GPU.
5170   */
5171  bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5172  {
5173  
5174  	if (amdgpu_gpu_recovery == 0)
5175  		goto disabled;
5176  
5177  	/* Skip soft reset check in fatal error mode */
5178  	if (!amdgpu_ras_is_poison_mode_supported(adev))
5179  		return true;
5180  
5181  	if (amdgpu_sriov_vf(adev))
5182  		return true;
5183  
5184  	if (amdgpu_gpu_recovery == -1) {
5185  		switch (adev->asic_type) {
5186  #ifdef CONFIG_DRM_AMDGPU_SI
5187  		case CHIP_VERDE:
5188  		case CHIP_TAHITI:
5189  		case CHIP_PITCAIRN:
5190  		case CHIP_OLAND:
5191  		case CHIP_HAINAN:
5192  #endif
5193  #ifdef CONFIG_DRM_AMDGPU_CIK
5194  		case CHIP_KAVERI:
5195  		case CHIP_KABINI:
5196  		case CHIP_MULLINS:
5197  #endif
5198  		case CHIP_CARRIZO:
5199  		case CHIP_STONEY:
5200  		case CHIP_CYAN_SKILLFISH:
5201  			goto disabled;
5202  		default:
5203  			break;
5204  		}
5205  	}
5206  
5207  	return true;
5208  
5209  disabled:
5210  		dev_info(adev->dev, "GPU recovery disabled.\n");
5211  		return false;
5212  }
5213  
5214  int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5215  {
5216  	u32 i;
5217  	int ret = 0;
5218  
5219  	amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5220  
5221  	dev_info(adev->dev, "GPU mode1 reset\n");
5222  
5223  	/* disable BM */
5224  	pci_clear_master(adev->pdev);
5225  
5226  	amdgpu_device_cache_pci_state(adev->pdev);
5227  
5228  	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5229  		dev_info(adev->dev, "GPU smu mode1 reset\n");
5230  		ret = amdgpu_dpm_mode1_reset(adev);
5231  	} else {
5232  		dev_info(adev->dev, "GPU psp mode1 reset\n");
5233  		ret = psp_gpu_reset(adev);
5234  	}
5235  
5236  	if (ret)
5237  		goto mode1_reset_failed;
5238  
5239  	amdgpu_device_load_pci_state(adev->pdev);
5240  	ret = amdgpu_psp_wait_for_bootloader(adev);
5241  	if (ret)
5242  		goto mode1_reset_failed;
5243  
5244  	/* wait for asic to come out of reset */
5245  	for (i = 0; i < adev->usec_timeout; i++) {
5246  		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5247  
5248  		if (memsize != 0xffffffff)
5249  			break;
5250  		udelay(1);
5251  	}
5252  
5253  	if (i >= adev->usec_timeout) {
5254  		ret = -ETIMEDOUT;
5255  		goto mode1_reset_failed;
5256  	}
5257  
5258  	amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5259  
5260  	return 0;
5261  
5262  mode1_reset_failed:
5263  	dev_err(adev->dev, "GPU mode1 reset failed\n");
5264  	return ret;
5265  }
5266  
5267  int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5268  				 struct amdgpu_reset_context *reset_context)
5269  {
5270  	int i, r = 0;
5271  	struct amdgpu_job *job = NULL;
5272  	bool need_full_reset =
5273  		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5274  
5275  	if (reset_context->reset_req_dev == adev)
5276  		job = reset_context->job;
5277  
5278  	if (amdgpu_sriov_vf(adev)) {
5279  		/* stop the data exchange thread */
5280  		amdgpu_virt_fini_data_exchange(adev);
5281  	}
5282  
5283  	amdgpu_fence_driver_isr_toggle(adev, true);
5284  
5285  	/* block all schedulers and reset given job's ring */
5286  	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5287  		struct amdgpu_ring *ring = adev->rings[i];
5288  
5289  		if (!amdgpu_ring_sched_ready(ring))
5290  			continue;
5291  
5292  		/* Clear job fence from fence drv to avoid force_completion
5293  		 * leave NULL and vm flush fence in fence drv
5294  		 */
5295  		amdgpu_fence_driver_clear_job_fences(ring);
5296  
5297  		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5298  		amdgpu_fence_driver_force_completion(ring);
5299  	}
5300  
5301  	amdgpu_fence_driver_isr_toggle(adev, false);
5302  
5303  	if (job && job->vm)
5304  		drm_sched_increase_karma(&job->base);
5305  
5306  	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5307  	/* If reset handler not implemented, continue; otherwise return */
5308  	if (r == -EOPNOTSUPP)
5309  		r = 0;
5310  	else
5311  		return r;
5312  
5313  	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5314  	if (!amdgpu_sriov_vf(adev)) {
5315  
5316  		if (!need_full_reset)
5317  			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5318  
5319  		if (!need_full_reset && amdgpu_gpu_recovery &&
5320  		    amdgpu_device_ip_check_soft_reset(adev)) {
5321  			amdgpu_device_ip_pre_soft_reset(adev);
5322  			r = amdgpu_device_ip_soft_reset(adev);
5323  			amdgpu_device_ip_post_soft_reset(adev);
5324  			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5325  				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5326  				need_full_reset = true;
5327  			}
5328  		}
5329  
5330  		if (need_full_reset)
5331  			r = amdgpu_device_ip_suspend(adev);
5332  		if (need_full_reset)
5333  			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5334  		else
5335  			clear_bit(AMDGPU_NEED_FULL_RESET,
5336  				  &reset_context->flags);
5337  	}
5338  
5339  	return r;
5340  }
5341  
5342  static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5343  {
5344  	int i;
5345  
5346  	lockdep_assert_held(&adev->reset_domain->sem);
5347  
5348  	for (i = 0; i < adev->reset_info.num_regs; i++) {
5349  		adev->reset_info.reset_dump_reg_value[i] =
5350  			RREG32(adev->reset_info.reset_dump_reg_list[i]);
5351  
5352  		trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
5353  					     adev->reset_info.reset_dump_reg_value[i]);
5354  	}
5355  
5356  	return 0;
5357  }
5358  
5359  int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5360  			 struct amdgpu_reset_context *reset_context)
5361  {
5362  	struct amdgpu_device *tmp_adev = NULL;
5363  	bool need_full_reset, skip_hw_reset, vram_lost = false;
5364  	int r = 0;
5365  	uint32_t i;
5366  
5367  	/* Try reset handler method first */
5368  	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5369  				    reset_list);
5370  
5371  	if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5372  		amdgpu_reset_reg_dumps(tmp_adev);
5373  
5374  		/* Trigger ip dump before we reset the asic */
5375  		for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5376  			if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5377  				tmp_adev->ip_blocks[i].version->funcs
5378  				->dump_ip_state((void *)tmp_adev);
5379  	}
5380  
5381  	reset_context->reset_device_list = device_list_handle;
5382  	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5383  	/* If reset handler not implemented, continue; otherwise return */
5384  	if (r == -EOPNOTSUPP)
5385  		r = 0;
5386  	else
5387  		return r;
5388  
5389  	/* Reset handler not implemented, use the default method */
5390  	need_full_reset =
5391  		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5392  	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5393  
5394  	/*
5395  	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5396  	 * to allow proper links negotiation in FW (within 1 sec)
5397  	 */
5398  	if (!skip_hw_reset && need_full_reset) {
5399  		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5400  			/* For XGMI run all resets in parallel to speed up the process */
5401  			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5402  				tmp_adev->gmc.xgmi.pending_reset = false;
5403  				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
5404  					r = -EALREADY;
5405  			} else
5406  				r = amdgpu_asic_reset(tmp_adev);
5407  
5408  			if (r) {
5409  				dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
5410  					 r, adev_to_drm(tmp_adev)->unique);
5411  				goto out;
5412  			}
5413  		}
5414  
5415  		/* For XGMI wait for all resets to complete before proceed */
5416  		if (!r) {
5417  			list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5418  				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5419  					flush_work(&tmp_adev->xgmi_reset_work);
5420  					r = tmp_adev->asic_reset_res;
5421  					if (r)
5422  						break;
5423  				}
5424  			}
5425  		}
5426  	}
5427  
5428  	if (!r && amdgpu_ras_intr_triggered()) {
5429  		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5430  			amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
5431  		}
5432  
5433  		amdgpu_ras_intr_cleared();
5434  	}
5435  
5436  	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5437  		if (need_full_reset) {
5438  			/* post card */
5439  			amdgpu_ras_set_fed(tmp_adev, false);
5440  			r = amdgpu_device_asic_init(tmp_adev);
5441  			if (r) {
5442  				dev_warn(tmp_adev->dev, "asic atom init failed!");
5443  			} else {
5444  				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5445  
5446  				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5447  				if (r)
5448  					goto out;
5449  
5450  				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5451  
5452  				if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5453  					amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5454  
5455  				if (vram_lost) {
5456  					DRM_INFO("VRAM is lost due to GPU reset!\n");
5457  					amdgpu_inc_vram_lost(tmp_adev);
5458  				}
5459  
5460  				r = amdgpu_device_fw_loading(tmp_adev);
5461  				if (r)
5462  					return r;
5463  
5464  				r = amdgpu_xcp_restore_partition_mode(
5465  					tmp_adev->xcp_mgr);
5466  				if (r)
5467  					goto out;
5468  
5469  				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5470  				if (r)
5471  					goto out;
5472  
5473  				if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5474  					amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5475  
5476  				if (vram_lost)
5477  					amdgpu_device_fill_reset_magic(tmp_adev);
5478  
5479  				/*
5480  				 * Add this ASIC as tracked as reset was already
5481  				 * complete successfully.
5482  				 */
5483  				amdgpu_register_gpu_instance(tmp_adev);
5484  
5485  				if (!reset_context->hive &&
5486  				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5487  					amdgpu_xgmi_add_device(tmp_adev);
5488  
5489  				r = amdgpu_device_ip_late_init(tmp_adev);
5490  				if (r)
5491  					goto out;
5492  
5493  				drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
5494  
5495  				/*
5496  				 * The GPU enters bad state once faulty pages
5497  				 * by ECC has reached the threshold, and ras
5498  				 * recovery is scheduled next. So add one check
5499  				 * here to break recovery if it indeed exceeds
5500  				 * bad page threshold, and remind user to
5501  				 * retire this GPU or setting one bigger
5502  				 * bad_page_threshold value to fix this once
5503  				 * probing driver again.
5504  				 */
5505  				if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
5506  					/* must succeed. */
5507  					amdgpu_ras_resume(tmp_adev);
5508  				} else {
5509  					r = -EINVAL;
5510  					goto out;
5511  				}
5512  
5513  				/* Update PSP FW topology after reset */
5514  				if (reset_context->hive &&
5515  				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5516  					r = amdgpu_xgmi_update_topology(
5517  						reset_context->hive, tmp_adev);
5518  			}
5519  		}
5520  
5521  out:
5522  		if (!r) {
5523  			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5524  			r = amdgpu_ib_ring_tests(tmp_adev);
5525  			if (r) {
5526  				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5527  				need_full_reset = true;
5528  				r = -EAGAIN;
5529  				goto end;
5530  			}
5531  		}
5532  
5533  		if (!r)
5534  			r = amdgpu_device_recover_vram(tmp_adev);
5535  		else
5536  			tmp_adev->asic_reset_res = r;
5537  	}
5538  
5539  end:
5540  	if (need_full_reset)
5541  		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5542  	else
5543  		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5544  	return r;
5545  }
5546  
5547  static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5548  {
5549  
5550  	switch (amdgpu_asic_reset_method(adev)) {
5551  	case AMD_RESET_METHOD_MODE1:
5552  		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5553  		break;
5554  	case AMD_RESET_METHOD_MODE2:
5555  		adev->mp1_state = PP_MP1_STATE_RESET;
5556  		break;
5557  	default:
5558  		adev->mp1_state = PP_MP1_STATE_NONE;
5559  		break;
5560  	}
5561  }
5562  
5563  static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5564  {
5565  	amdgpu_vf_error_trans_all(adev);
5566  	adev->mp1_state = PP_MP1_STATE_NONE;
5567  }
5568  
5569  static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5570  {
5571  	struct pci_dev *p = NULL;
5572  
5573  	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5574  			adev->pdev->bus->number, 1);
5575  	if (p) {
5576  		pm_runtime_enable(&(p->dev));
5577  		pm_runtime_resume(&(p->dev));
5578  	}
5579  
5580  	pci_dev_put(p);
5581  }
5582  
5583  static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5584  {
5585  	enum amd_reset_method reset_method;
5586  	struct pci_dev *p = NULL;
5587  	u64 expires;
5588  
5589  	/*
5590  	 * For now, only BACO and mode1 reset are confirmed
5591  	 * to suffer the audio issue without proper suspended.
5592  	 */
5593  	reset_method = amdgpu_asic_reset_method(adev);
5594  	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5595  	     (reset_method != AMD_RESET_METHOD_MODE1))
5596  		return -EINVAL;
5597  
5598  	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5599  			adev->pdev->bus->number, 1);
5600  	if (!p)
5601  		return -ENODEV;
5602  
5603  	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5604  	if (!expires)
5605  		/*
5606  		 * If we cannot get the audio device autosuspend delay,
5607  		 * a fixed 4S interval will be used. Considering 3S is
5608  		 * the audio controller default autosuspend delay setting.
5609  		 * 4S used here is guaranteed to cover that.
5610  		 */
5611  		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5612  
5613  	while (!pm_runtime_status_suspended(&(p->dev))) {
5614  		if (!pm_runtime_suspend(&(p->dev)))
5615  			break;
5616  
5617  		if (expires < ktime_get_mono_fast_ns()) {
5618  			dev_warn(adev->dev, "failed to suspend display audio\n");
5619  			pci_dev_put(p);
5620  			/* TODO: abort the succeeding gpu reset? */
5621  			return -ETIMEDOUT;
5622  		}
5623  	}
5624  
5625  	pm_runtime_disable(&(p->dev));
5626  
5627  	pci_dev_put(p);
5628  	return 0;
5629  }
5630  
5631  static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5632  {
5633  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5634  
5635  #if defined(CONFIG_DEBUG_FS)
5636  	if (!amdgpu_sriov_vf(adev))
5637  		cancel_work(&adev->reset_work);
5638  #endif
5639  
5640  	if (adev->kfd.dev)
5641  		cancel_work(&adev->kfd.reset_work);
5642  
5643  	if (amdgpu_sriov_vf(adev))
5644  		cancel_work(&adev->virt.flr_work);
5645  
5646  	if (con && adev->ras_enabled)
5647  		cancel_work(&con->recovery_work);
5648  
5649  }
5650  
5651  static int amdgpu_device_health_check(struct list_head *device_list_handle)
5652  {
5653  	struct amdgpu_device *tmp_adev;
5654  	int ret = 0;
5655  	u32 status;
5656  
5657  	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5658  		pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status);
5659  		if (PCI_POSSIBLE_ERROR(status)) {
5660  			dev_err(tmp_adev->dev, "device lost from bus!");
5661  			ret = -ENODEV;
5662  		}
5663  	}
5664  
5665  	return ret;
5666  }
5667  
5668  /**
5669   * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5670   *
5671   * @adev: amdgpu_device pointer
5672   * @job: which job trigger hang
5673   * @reset_context: amdgpu reset context pointer
5674   *
5675   * Attempt to reset the GPU if it has hung (all asics).
5676   * Attempt to do soft-reset or full-reset and reinitialize Asic
5677   * Returns 0 for success or an error on failure.
5678   */
5679  
5680  int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5681  			      struct amdgpu_job *job,
5682  			      struct amdgpu_reset_context *reset_context)
5683  {
5684  	struct list_head device_list, *device_list_handle =  NULL;
5685  	bool job_signaled = false;
5686  	struct amdgpu_hive_info *hive = NULL;
5687  	struct amdgpu_device *tmp_adev = NULL;
5688  	int i, r = 0;
5689  	bool need_emergency_restart = false;
5690  	bool audio_suspended = false;
5691  
5692  	/*
5693  	 * Special case: RAS triggered and full reset isn't supported
5694  	 */
5695  	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5696  
5697  	/*
5698  	 * Flush RAM to disk so that after reboot
5699  	 * the user can read log and see why the system rebooted.
5700  	 */
5701  	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5702  		amdgpu_ras_get_context(adev)->reboot) {
5703  		DRM_WARN("Emergency reboot.");
5704  
5705  		ksys_sync_helper();
5706  		emergency_restart();
5707  	}
5708  
5709  	dev_info(adev->dev, "GPU %s begin!\n",
5710  		need_emergency_restart ? "jobs stop":"reset");
5711  
5712  	if (!amdgpu_sriov_vf(adev))
5713  		hive = amdgpu_get_xgmi_hive(adev);
5714  	if (hive)
5715  		mutex_lock(&hive->hive_lock);
5716  
5717  	reset_context->job = job;
5718  	reset_context->hive = hive;
5719  	/*
5720  	 * Build list of devices to reset.
5721  	 * In case we are in XGMI hive mode, resort the device list
5722  	 * to put adev in the 1st position.
5723  	 */
5724  	INIT_LIST_HEAD(&device_list);
5725  	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
5726  		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5727  			list_add_tail(&tmp_adev->reset_list, &device_list);
5728  			if (adev->shutdown)
5729  				tmp_adev->shutdown = true;
5730  		}
5731  		if (!list_is_first(&adev->reset_list, &device_list))
5732  			list_rotate_to_front(&adev->reset_list, &device_list);
5733  		device_list_handle = &device_list;
5734  	} else {
5735  		list_add_tail(&adev->reset_list, &device_list);
5736  		device_list_handle = &device_list;
5737  	}
5738  
5739  	if (!amdgpu_sriov_vf(adev)) {
5740  		r = amdgpu_device_health_check(device_list_handle);
5741  		if (r)
5742  			goto end_reset;
5743  	}
5744  
5745  	/* We need to lock reset domain only once both for XGMI and single device */
5746  	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5747  				    reset_list);
5748  	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5749  
5750  	/* block all schedulers and reset given job's ring */
5751  	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5752  
5753  		amdgpu_device_set_mp1_state(tmp_adev);
5754  
5755  		/*
5756  		 * Try to put the audio codec into suspend state
5757  		 * before gpu reset started.
5758  		 *
5759  		 * Due to the power domain of the graphics device
5760  		 * is shared with AZ power domain. Without this,
5761  		 * we may change the audio hardware from behind
5762  		 * the audio driver's back. That will trigger
5763  		 * some audio codec errors.
5764  		 */
5765  		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5766  			audio_suspended = true;
5767  
5768  		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5769  
5770  		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5771  
5772  		if (!amdgpu_sriov_vf(tmp_adev))
5773  			amdgpu_amdkfd_pre_reset(tmp_adev);
5774  
5775  		/*
5776  		 * Mark these ASICs to be reseted as untracked first
5777  		 * And add them back after reset completed
5778  		 */
5779  		amdgpu_unregister_gpu_instance(tmp_adev);
5780  
5781  		drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5782  
5783  		/* disable ras on ALL IPs */
5784  		if (!need_emergency_restart &&
5785  		      amdgpu_device_ip_need_full_reset(tmp_adev))
5786  			amdgpu_ras_suspend(tmp_adev);
5787  
5788  		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5789  			struct amdgpu_ring *ring = tmp_adev->rings[i];
5790  
5791  			if (!amdgpu_ring_sched_ready(ring))
5792  				continue;
5793  
5794  			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5795  
5796  			if (need_emergency_restart)
5797  				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5798  		}
5799  		atomic_inc(&tmp_adev->gpu_reset_counter);
5800  	}
5801  
5802  	if (need_emergency_restart)
5803  		goto skip_sched_resume;
5804  
5805  	/*
5806  	 * Must check guilty signal here since after this point all old
5807  	 * HW fences are force signaled.
5808  	 *
5809  	 * job->base holds a reference to parent fence
5810  	 */
5811  	if (job && dma_fence_is_signaled(&job->hw_fence)) {
5812  		job_signaled = true;
5813  		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5814  		goto skip_hw_reset;
5815  	}
5816  
5817  retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5818  	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5819  		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5820  		/*TODO Should we stop ?*/
5821  		if (r) {
5822  			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5823  				  r, adev_to_drm(tmp_adev)->unique);
5824  			tmp_adev->asic_reset_res = r;
5825  		}
5826  
5827  		if (!amdgpu_sriov_vf(tmp_adev))
5828  			/*
5829  			* Drop all pending non scheduler resets. Scheduler resets
5830  			* were already dropped during drm_sched_stop
5831  			*/
5832  			amdgpu_device_stop_pending_resets(tmp_adev);
5833  	}
5834  
5835  	/* Actual ASIC resets if needed.*/
5836  	/* Host driver will handle XGMI hive reset for SRIOV */
5837  	if (amdgpu_sriov_vf(adev)) {
5838  		r = amdgpu_device_reset_sriov(adev, job ? false : true);
5839  		if (r)
5840  			adev->asic_reset_res = r;
5841  
5842  		/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5843  		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5844  			    IP_VERSION(9, 4, 2) ||
5845  		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
5846  		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
5847  			amdgpu_ras_resume(adev);
5848  	} else {
5849  		r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5850  		if (r && r == -EAGAIN)
5851  			goto retry;
5852  	}
5853  
5854  skip_hw_reset:
5855  
5856  	/* Post ASIC reset for all devs .*/
5857  	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5858  
5859  		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5860  			struct amdgpu_ring *ring = tmp_adev->rings[i];
5861  
5862  			if (!amdgpu_ring_sched_ready(ring))
5863  				continue;
5864  
5865  			drm_sched_start(&ring->sched, true);
5866  		}
5867  
5868  		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5869  			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5870  
5871  		if (tmp_adev->asic_reset_res)
5872  			r = tmp_adev->asic_reset_res;
5873  
5874  		tmp_adev->asic_reset_res = 0;
5875  
5876  		if (r) {
5877  			/* bad news, how to tell it to userspace ? */
5878  			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5879  			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5880  		} else {
5881  			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5882  			if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5883  				DRM_WARN("smart shift update failed\n");
5884  		}
5885  	}
5886  
5887  skip_sched_resume:
5888  	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5889  		/* unlock kfd: SRIOV would do it separately */
5890  		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5891  			amdgpu_amdkfd_post_reset(tmp_adev);
5892  
5893  		/* kfd_post_reset will do nothing if kfd device is not initialized,
5894  		 * need to bring up kfd here if it's not be initialized before
5895  		 */
5896  		if (!adev->kfd.init_complete)
5897  			amdgpu_amdkfd_device_init(adev);
5898  
5899  		if (audio_suspended)
5900  			amdgpu_device_resume_display_audio(tmp_adev);
5901  
5902  		amdgpu_device_unset_mp1_state(tmp_adev);
5903  
5904  		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5905  	}
5906  
5907  	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5908  					    reset_list);
5909  	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5910  
5911  end_reset:
5912  	if (hive) {
5913  		mutex_unlock(&hive->hive_lock);
5914  		amdgpu_put_xgmi_hive(hive);
5915  	}
5916  
5917  	if (r)
5918  		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5919  
5920  	atomic_set(&adev->reset_domain->reset_res, r);
5921  	return r;
5922  }
5923  
5924  /**
5925   * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5926   *
5927   * @adev: amdgpu_device pointer
5928   * @speed: pointer to the speed of the link
5929   * @width: pointer to the width of the link
5930   *
5931   * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5932   * first physical partner to an AMD dGPU.
5933   * This will exclude any virtual switches and links.
5934   */
5935  static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5936  					    enum pci_bus_speed *speed,
5937  					    enum pcie_link_width *width)
5938  {
5939  	struct pci_dev *parent = adev->pdev;
5940  
5941  	if (!speed || !width)
5942  		return;
5943  
5944  	*speed = PCI_SPEED_UNKNOWN;
5945  	*width = PCIE_LNK_WIDTH_UNKNOWN;
5946  
5947  	if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
5948  		while ((parent = pci_upstream_bridge(parent))) {
5949  			/* skip upstream/downstream switches internal to dGPU*/
5950  			if (parent->vendor == PCI_VENDOR_ID_ATI)
5951  				continue;
5952  			*speed = pcie_get_speed_cap(parent);
5953  			*width = pcie_get_width_cap(parent);
5954  			break;
5955  		}
5956  	} else {
5957  		/* use the current speeds rather than max if switching is not supported */
5958  		pcie_bandwidth_available(adev->pdev, NULL, speed, width);
5959  	}
5960  }
5961  
5962  /**
5963   * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5964   *
5965   * @adev: amdgpu_device pointer
5966   *
5967   * Fetchs and stores in the driver the PCIE capabilities (gen speed
5968   * and lanes) of the slot the device is in. Handles APUs and
5969   * virtualized environments where PCIE config space may not be available.
5970   */
5971  static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5972  {
5973  	struct pci_dev *pdev;
5974  	enum pci_bus_speed speed_cap, platform_speed_cap;
5975  	enum pcie_link_width platform_link_width;
5976  
5977  	if (amdgpu_pcie_gen_cap)
5978  		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5979  
5980  	if (amdgpu_pcie_lane_cap)
5981  		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5982  
5983  	/* covers APUs as well */
5984  	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
5985  		if (adev->pm.pcie_gen_mask == 0)
5986  			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5987  		if (adev->pm.pcie_mlw_mask == 0)
5988  			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5989  		return;
5990  	}
5991  
5992  	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5993  		return;
5994  
5995  	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
5996  					&platform_link_width);
5997  
5998  	if (adev->pm.pcie_gen_mask == 0) {
5999  		/* asic caps */
6000  		pdev = adev->pdev;
6001  		speed_cap = pcie_get_speed_cap(pdev);
6002  		if (speed_cap == PCI_SPEED_UNKNOWN) {
6003  			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6004  						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6005  						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6006  		} else {
6007  			if (speed_cap == PCIE_SPEED_32_0GT)
6008  				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6009  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6010  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6011  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6012  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6013  			else if (speed_cap == PCIE_SPEED_16_0GT)
6014  				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6015  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6016  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6017  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6018  			else if (speed_cap == PCIE_SPEED_8_0GT)
6019  				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6020  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6021  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6022  			else if (speed_cap == PCIE_SPEED_5_0GT)
6023  				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6024  							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6025  			else
6026  				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6027  		}
6028  		/* platform caps */
6029  		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
6030  			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6031  						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6032  		} else {
6033  			if (platform_speed_cap == PCIE_SPEED_32_0GT)
6034  				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6035  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6036  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6037  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6038  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6039  			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6040  				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6041  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6042  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6043  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6044  			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6045  				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6046  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6047  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6048  			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6049  				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6050  							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6051  			else
6052  				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6053  
6054  		}
6055  	}
6056  	if (adev->pm.pcie_mlw_mask == 0) {
6057  		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6058  			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6059  		} else {
6060  			switch (platform_link_width) {
6061  			case PCIE_LNK_X32:
6062  				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6063  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6064  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6065  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6066  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6067  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6068  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6069  				break;
6070  			case PCIE_LNK_X16:
6071  				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6072  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6073  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6074  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6075  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6076  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6077  				break;
6078  			case PCIE_LNK_X12:
6079  				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6080  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6081  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6082  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6083  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6084  				break;
6085  			case PCIE_LNK_X8:
6086  				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6087  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6088  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6089  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6090  				break;
6091  			case PCIE_LNK_X4:
6092  				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6093  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6094  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6095  				break;
6096  			case PCIE_LNK_X2:
6097  				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6098  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6099  				break;
6100  			case PCIE_LNK_X1:
6101  				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6102  				break;
6103  			default:
6104  				break;
6105  			}
6106  		}
6107  	}
6108  }
6109  
6110  /**
6111   * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6112   *
6113   * @adev: amdgpu_device pointer
6114   * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6115   *
6116   * Return true if @peer_adev can access (DMA) @adev through the PCIe
6117   * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6118   * @peer_adev.
6119   */
6120  bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6121  				      struct amdgpu_device *peer_adev)
6122  {
6123  #ifdef CONFIG_HSA_AMD_P2P
6124  	uint64_t address_mask = peer_adev->dev->dma_mask ?
6125  		~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6126  	resource_size_t aper_limit =
6127  		adev->gmc.aper_base + adev->gmc.aper_size - 1;
6128  	bool p2p_access =
6129  		!adev->gmc.xgmi.connected_to_cpu &&
6130  		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6131  
6132  	return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
6133  		adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
6134  		!(adev->gmc.aper_base & address_mask ||
6135  		  aper_limit & address_mask));
6136  #else
6137  	return false;
6138  #endif
6139  }
6140  
6141  int amdgpu_device_baco_enter(struct drm_device *dev)
6142  {
6143  	struct amdgpu_device *adev = drm_to_adev(dev);
6144  	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6145  
6146  	if (!amdgpu_device_supports_baco(dev))
6147  		return -ENOTSUPP;
6148  
6149  	if (ras && adev->ras_enabled &&
6150  	    adev->nbio.funcs->enable_doorbell_interrupt)
6151  		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6152  
6153  	return amdgpu_dpm_baco_enter(adev);
6154  }
6155  
6156  int amdgpu_device_baco_exit(struct drm_device *dev)
6157  {
6158  	struct amdgpu_device *adev = drm_to_adev(dev);
6159  	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6160  	int ret = 0;
6161  
6162  	if (!amdgpu_device_supports_baco(dev))
6163  		return -ENOTSUPP;
6164  
6165  	ret = amdgpu_dpm_baco_exit(adev);
6166  	if (ret)
6167  		return ret;
6168  
6169  	if (ras && adev->ras_enabled &&
6170  	    adev->nbio.funcs->enable_doorbell_interrupt)
6171  		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6172  
6173  	if (amdgpu_passthrough(adev) &&
6174  	    adev->nbio.funcs->clear_doorbell_interrupt)
6175  		adev->nbio.funcs->clear_doorbell_interrupt(adev);
6176  
6177  	return 0;
6178  }
6179  
6180  /**
6181   * amdgpu_pci_error_detected - Called when a PCI error is detected.
6182   * @pdev: PCI device struct
6183   * @state: PCI channel state
6184   *
6185   * Description: Called when a PCI error is detected.
6186   *
6187   * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6188   */
6189  pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6190  {
6191  	struct drm_device *dev = pci_get_drvdata(pdev);
6192  	struct amdgpu_device *adev = drm_to_adev(dev);
6193  	int i;
6194  
6195  	DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
6196  
6197  	if (adev->gmc.xgmi.num_physical_nodes > 1) {
6198  		DRM_WARN("No support for XGMI hive yet...");
6199  		return PCI_ERS_RESULT_DISCONNECT;
6200  	}
6201  
6202  	adev->pci_channel_state = state;
6203  
6204  	switch (state) {
6205  	case pci_channel_io_normal:
6206  		return PCI_ERS_RESULT_CAN_RECOVER;
6207  	/* Fatal error, prepare for slot reset */
6208  	case pci_channel_io_frozen:
6209  		/*
6210  		 * Locking adev->reset_domain->sem will prevent any external access
6211  		 * to GPU during PCI error recovery
6212  		 */
6213  		amdgpu_device_lock_reset_domain(adev->reset_domain);
6214  		amdgpu_device_set_mp1_state(adev);
6215  
6216  		/*
6217  		 * Block any work scheduling as we do for regular GPU reset
6218  		 * for the duration of the recovery
6219  		 */
6220  		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6221  			struct amdgpu_ring *ring = adev->rings[i];
6222  
6223  			if (!amdgpu_ring_sched_ready(ring))
6224  				continue;
6225  
6226  			drm_sched_stop(&ring->sched, NULL);
6227  		}
6228  		atomic_inc(&adev->gpu_reset_counter);
6229  		return PCI_ERS_RESULT_NEED_RESET;
6230  	case pci_channel_io_perm_failure:
6231  		/* Permanent error, prepare for device removal */
6232  		return PCI_ERS_RESULT_DISCONNECT;
6233  	}
6234  
6235  	return PCI_ERS_RESULT_NEED_RESET;
6236  }
6237  
6238  /**
6239   * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6240   * @pdev: pointer to PCI device
6241   */
6242  pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6243  {
6244  
6245  	DRM_INFO("PCI error: mmio enabled callback!!\n");
6246  
6247  	/* TODO - dump whatever for debugging purposes */
6248  
6249  	/* This called only if amdgpu_pci_error_detected returns
6250  	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6251  	 * works, no need to reset slot.
6252  	 */
6253  
6254  	return PCI_ERS_RESULT_RECOVERED;
6255  }
6256  
6257  /**
6258   * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6259   * @pdev: PCI device struct
6260   *
6261   * Description: This routine is called by the pci error recovery
6262   * code after the PCI slot has been reset, just before we
6263   * should resume normal operations.
6264   */
6265  pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6266  {
6267  	struct drm_device *dev = pci_get_drvdata(pdev);
6268  	struct amdgpu_device *adev = drm_to_adev(dev);
6269  	int r, i;
6270  	struct amdgpu_reset_context reset_context;
6271  	u32 memsize;
6272  	struct list_head device_list;
6273  	struct amdgpu_hive_info *hive;
6274  	int hive_ras_recovery = 0;
6275  	struct amdgpu_ras *ras;
6276  
6277  	/* PCI error slot reset should be skipped During RAS recovery */
6278  	hive = amdgpu_get_xgmi_hive(adev);
6279  	if (hive) {
6280  		hive_ras_recovery = atomic_read(&hive->ras_recovery);
6281  		amdgpu_put_xgmi_hive(hive);
6282  	}
6283  	ras = amdgpu_ras_get_context(adev);
6284  	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) &&
6285  		 ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
6286  		return PCI_ERS_RESULT_RECOVERED;
6287  
6288  	DRM_INFO("PCI error: slot reset callback!!\n");
6289  
6290  	memset(&reset_context, 0, sizeof(reset_context));
6291  
6292  	INIT_LIST_HEAD(&device_list);
6293  	list_add_tail(&adev->reset_list, &device_list);
6294  
6295  	/* wait for asic to come out of reset */
6296  	msleep(500);
6297  
6298  	/* Restore PCI confspace */
6299  	amdgpu_device_load_pci_state(pdev);
6300  
6301  	/* confirm  ASIC came out of reset */
6302  	for (i = 0; i < adev->usec_timeout; i++) {
6303  		memsize = amdgpu_asic_get_config_memsize(adev);
6304  
6305  		if (memsize != 0xffffffff)
6306  			break;
6307  		udelay(1);
6308  	}
6309  	if (memsize == 0xffffffff) {
6310  		r = -ETIME;
6311  		goto out;
6312  	}
6313  
6314  	reset_context.method = AMD_RESET_METHOD_NONE;
6315  	reset_context.reset_req_dev = adev;
6316  	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6317  	set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6318  
6319  	adev->no_hw_access = true;
6320  	r = amdgpu_device_pre_asic_reset(adev, &reset_context);
6321  	adev->no_hw_access = false;
6322  	if (r)
6323  		goto out;
6324  
6325  	r = amdgpu_do_asic_reset(&device_list, &reset_context);
6326  
6327  out:
6328  	if (!r) {
6329  		if (amdgpu_device_cache_pci_state(adev->pdev))
6330  			pci_restore_state(adev->pdev);
6331  
6332  		DRM_INFO("PCIe error recovery succeeded\n");
6333  	} else {
6334  		DRM_ERROR("PCIe error recovery failed, err:%d", r);
6335  		amdgpu_device_unset_mp1_state(adev);
6336  		amdgpu_device_unlock_reset_domain(adev->reset_domain);
6337  	}
6338  
6339  	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6340  }
6341  
6342  /**
6343   * amdgpu_pci_resume() - resume normal ops after PCI reset
6344   * @pdev: pointer to PCI device
6345   *
6346   * Called when the error recovery driver tells us that its
6347   * OK to resume normal operation.
6348   */
6349  void amdgpu_pci_resume(struct pci_dev *pdev)
6350  {
6351  	struct drm_device *dev = pci_get_drvdata(pdev);
6352  	struct amdgpu_device *adev = drm_to_adev(dev);
6353  	int i;
6354  
6355  
6356  	DRM_INFO("PCI error: resume callback!!\n");
6357  
6358  	/* Only continue execution for the case of pci_channel_io_frozen */
6359  	if (adev->pci_channel_state != pci_channel_io_frozen)
6360  		return;
6361  
6362  	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6363  		struct amdgpu_ring *ring = adev->rings[i];
6364  
6365  		if (!amdgpu_ring_sched_ready(ring))
6366  			continue;
6367  
6368  		drm_sched_start(&ring->sched, true);
6369  	}
6370  
6371  	amdgpu_device_unset_mp1_state(adev);
6372  	amdgpu_device_unlock_reset_domain(adev->reset_domain);
6373  }
6374  
6375  bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6376  {
6377  	struct drm_device *dev = pci_get_drvdata(pdev);
6378  	struct amdgpu_device *adev = drm_to_adev(dev);
6379  	int r;
6380  
6381  	r = pci_save_state(pdev);
6382  	if (!r) {
6383  		kfree(adev->pci_state);
6384  
6385  		adev->pci_state = pci_store_saved_state(pdev);
6386  
6387  		if (!adev->pci_state) {
6388  			DRM_ERROR("Failed to store PCI saved state");
6389  			return false;
6390  		}
6391  	} else {
6392  		DRM_WARN("Failed to save PCI state, err:%d\n", r);
6393  		return false;
6394  	}
6395  
6396  	return true;
6397  }
6398  
6399  bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6400  {
6401  	struct drm_device *dev = pci_get_drvdata(pdev);
6402  	struct amdgpu_device *adev = drm_to_adev(dev);
6403  	int r;
6404  
6405  	if (!adev->pci_state)
6406  		return false;
6407  
6408  	r = pci_load_saved_state(pdev, adev->pci_state);
6409  
6410  	if (!r) {
6411  		pci_restore_state(pdev);
6412  	} else {
6413  		DRM_WARN("Failed to load PCI state, err:%d\n", r);
6414  		return false;
6415  	}
6416  
6417  	return true;
6418  }
6419  
6420  void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6421  		struct amdgpu_ring *ring)
6422  {
6423  #ifdef CONFIG_X86_64
6424  	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6425  		return;
6426  #endif
6427  	if (adev->gmc.xgmi.connected_to_cpu)
6428  		return;
6429  
6430  	if (ring && ring->funcs->emit_hdp_flush)
6431  		amdgpu_ring_emit_hdp_flush(ring);
6432  	else
6433  		amdgpu_asic_flush_hdp(adev, ring);
6434  }
6435  
6436  void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6437  		struct amdgpu_ring *ring)
6438  {
6439  #ifdef CONFIG_X86_64
6440  	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6441  		return;
6442  #endif
6443  	if (adev->gmc.xgmi.connected_to_cpu)
6444  		return;
6445  
6446  	amdgpu_asic_invalidate_hdp(adev, ring);
6447  }
6448  
6449  int amdgpu_in_reset(struct amdgpu_device *adev)
6450  {
6451  	return atomic_read(&adev->reset_domain->in_gpu_reset);
6452  }
6453  
6454  /**
6455   * amdgpu_device_halt() - bring hardware to some kind of halt state
6456   *
6457   * @adev: amdgpu_device pointer
6458   *
6459   * Bring hardware to some kind of halt state so that no one can touch it
6460   * any more. It will help to maintain error context when error occurred.
6461   * Compare to a simple hang, the system will keep stable at least for SSH
6462   * access. Then it should be trivial to inspect the hardware state and
6463   * see what's going on. Implemented as following:
6464   *
6465   * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6466   *    clears all CPU mappings to device, disallows remappings through page faults
6467   * 2. amdgpu_irq_disable_all() disables all interrupts
6468   * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6469   * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6470   * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6471   * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6472   *    flush any in flight DMA operations
6473   */
6474  void amdgpu_device_halt(struct amdgpu_device *adev)
6475  {
6476  	struct pci_dev *pdev = adev->pdev;
6477  	struct drm_device *ddev = adev_to_drm(adev);
6478  
6479  	amdgpu_xcp_dev_unplug(adev);
6480  	drm_dev_unplug(ddev);
6481  
6482  	amdgpu_irq_disable_all(adev);
6483  
6484  	amdgpu_fence_driver_hw_fini(adev);
6485  
6486  	adev->no_hw_access = true;
6487  
6488  	amdgpu_device_unmap_mmio(adev);
6489  
6490  	pci_disable_device(pdev);
6491  	pci_wait_for_pending_transaction(pdev);
6492  }
6493  
6494  u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6495  				u32 reg)
6496  {
6497  	unsigned long flags, address, data;
6498  	u32 r;
6499  
6500  	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6501  	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6502  
6503  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6504  	WREG32(address, reg * 4);
6505  	(void)RREG32(address);
6506  	r = RREG32(data);
6507  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6508  	return r;
6509  }
6510  
6511  void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6512  				u32 reg, u32 v)
6513  {
6514  	unsigned long flags, address, data;
6515  
6516  	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6517  	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6518  
6519  	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6520  	WREG32(address, reg * 4);
6521  	(void)RREG32(address);
6522  	WREG32(data, v);
6523  	(void)RREG32(data);
6524  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6525  }
6526  
6527  /**
6528   * amdgpu_device_switch_gang - switch to a new gang
6529   * @adev: amdgpu_device pointer
6530   * @gang: the gang to switch to
6531   *
6532   * Try to switch to a new gang.
6533   * Returns: NULL if we switched to the new gang or a reference to the current
6534   * gang leader.
6535   */
6536  struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6537  					    struct dma_fence *gang)
6538  {
6539  	struct dma_fence *old = NULL;
6540  
6541  	do {
6542  		dma_fence_put(old);
6543  		rcu_read_lock();
6544  		old = dma_fence_get_rcu_safe(&adev->gang_submit);
6545  		rcu_read_unlock();
6546  
6547  		if (old == gang)
6548  			break;
6549  
6550  		if (!dma_fence_is_signaled(old))
6551  			return old;
6552  
6553  	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6554  			 old, gang) != old);
6555  
6556  	dma_fence_put(old);
6557  	return NULL;
6558  }
6559  
6560  bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6561  {
6562  	switch (adev->asic_type) {
6563  #ifdef CONFIG_DRM_AMDGPU_SI
6564  	case CHIP_HAINAN:
6565  #endif
6566  	case CHIP_TOPAZ:
6567  		/* chips with no display hardware */
6568  		return false;
6569  #ifdef CONFIG_DRM_AMDGPU_SI
6570  	case CHIP_TAHITI:
6571  	case CHIP_PITCAIRN:
6572  	case CHIP_VERDE:
6573  	case CHIP_OLAND:
6574  #endif
6575  #ifdef CONFIG_DRM_AMDGPU_CIK
6576  	case CHIP_BONAIRE:
6577  	case CHIP_HAWAII:
6578  	case CHIP_KAVERI:
6579  	case CHIP_KABINI:
6580  	case CHIP_MULLINS:
6581  #endif
6582  	case CHIP_TONGA:
6583  	case CHIP_FIJI:
6584  	case CHIP_POLARIS10:
6585  	case CHIP_POLARIS11:
6586  	case CHIP_POLARIS12:
6587  	case CHIP_VEGAM:
6588  	case CHIP_CARRIZO:
6589  	case CHIP_STONEY:
6590  		/* chips with display hardware */
6591  		return true;
6592  	default:
6593  		/* IP discovery */
6594  		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6595  		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6596  			return false;
6597  		return true;
6598  	}
6599  }
6600  
6601  uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6602  		uint32_t inst, uint32_t reg_addr, char reg_name[],
6603  		uint32_t expected_value, uint32_t mask)
6604  {
6605  	uint32_t ret = 0;
6606  	uint32_t old_ = 0;
6607  	uint32_t tmp_ = RREG32(reg_addr);
6608  	uint32_t loop = adev->usec_timeout;
6609  
6610  	while ((tmp_ & (mask)) != (expected_value)) {
6611  		if (old_ != tmp_) {
6612  			loop = adev->usec_timeout;
6613  			old_ = tmp_;
6614  		} else
6615  			udelay(1);
6616  		tmp_ = RREG32(reg_addr);
6617  		loop--;
6618  		if (!loop) {
6619  			DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6620  				  inst, reg_name, (uint32_t)expected_value,
6621  				  (uint32_t)(tmp_ & (mask)));
6622  			ret = -ETIMEDOUT;
6623  			break;
6624  		}
6625  	}
6626  	return ret;
6627  }
6628