xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c (revision 182bdd59be41595e211ac98406d3637fc6141017)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 
29 #include <linux/aperture.h>
30 #include <linux/power_supply.h>
31 #include <linux/kthread.h>
32 #include <linux/module.h>
33 #include <linux/console.h>
34 #include <linux/slab.h>
35 #include <linux/iommu.h>
36 #include <linux/pci.h>
37 #include <linux/pci-p2pdma.h>
38 #include <linux/apple-gmux.h>
39 #include <linux/nospec.h>
40 
41 #include <drm/drm_atomic_helper.h>
42 #include <drm/drm_client_event.h>
43 #include <drm/drm_crtc_helper.h>
44 #include <drm/drm_probe_helper.h>
45 #include <drm/amdgpu_drm.h>
46 #include <linux/device.h>
47 #include <linux/vgaarb.h>
48 #include <linux/vga_switcheroo.h>
49 #include <linux/efi.h>
50 #include "amdgpu.h"
51 #include "amdgpu_trace.h"
52 #include "amdgpu_i2c.h"
53 #include "atom.h"
54 #include "amdgpu_atombios.h"
55 #include "amdgpu_atomfirmware.h"
56 #include "amd_pcie.h"
57 #ifdef CONFIG_DRM_AMDGPU_SI
58 #include "si.h"
59 #endif
60 #ifdef CONFIG_DRM_AMDGPU_CIK
61 #include "cik.h"
62 #endif
63 #include "vi.h"
64 #include "soc15.h"
65 #include "nv.h"
66 #include "bif/bif_4_1_d.h"
67 #include <linux/firmware.h>
68 #include "amdgpu_vf_error.h"
69 
70 #include "amdgpu_amdkfd.h"
71 #include "amdgpu_pm.h"
72 
73 #include "amdgpu_xgmi.h"
74 #include "amdgpu_ras.h"
75 #include "amdgpu_ras_mgr.h"
76 #include "amdgpu_pmu.h"
77 #include "amdgpu_fru_eeprom.h"
78 #include "amdgpu_reset.h"
79 #include "amdgpu_virt.h"
80 #include "amdgpu_dev_coredump.h"
81 
82 #include <linux/suspend.h>
83 #include <drm/task_barrier.h>
84 #include <linux/pm_runtime.h>
85 
86 #include <drm/drm_drv.h>
87 
88 #if IS_ENABLED(CONFIG_X86)
89 #include <asm/intel-family.h>
90 #include <asm/cpu_device_id.h>
91 #endif
92 
93 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
95 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
97 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
98 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
99 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
100 MODULE_FIRMWARE("amdgpu/cyan_skillfish_gpu_info.bin");
101 
102 #define AMDGPU_RESUME_MS		2000
103 #define AMDGPU_MAX_RETRY_LIMIT		2
104 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
105 #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
106 #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
107 #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
108 
109 #define AMDGPU_VBIOS_SKIP (1U << 0)
110 #define AMDGPU_VBIOS_OPTIONAL (1U << 1)
111 
112 static const struct drm_driver amdgpu_kms_driver;
113 
114 const char *amdgpu_asic_name[] = {
115 	"TAHITI",
116 	"PITCAIRN",
117 	"VERDE",
118 	"OLAND",
119 	"HAINAN",
120 	"BONAIRE",
121 	"KAVERI",
122 	"KABINI",
123 	"HAWAII",
124 	"MULLINS",
125 	"TOPAZ",
126 	"TONGA",
127 	"FIJI",
128 	"CARRIZO",
129 	"STONEY",
130 	"POLARIS10",
131 	"POLARIS11",
132 	"POLARIS12",
133 	"VEGAM",
134 	"VEGA10",
135 	"VEGA12",
136 	"VEGA20",
137 	"RAVEN",
138 	"ARCTURUS",
139 	"RENOIR",
140 	"ALDEBARAN",
141 	"NAVI10",
142 	"CYAN_SKILLFISH",
143 	"NAVI14",
144 	"NAVI12",
145 	"SIENNA_CICHLID",
146 	"NAVY_FLOUNDER",
147 	"VANGOGH",
148 	"DIMGREY_CAVEFISH",
149 	"BEIGE_GOBY",
150 	"YELLOW_CARP",
151 	"IP DISCOVERY",
152 	"LAST",
153 };
154 
155 #define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM  - 1, 0)
156 /*
157  * Default init level where all blocks are expected to be initialized. This is
158  * the level of initialization expected by default and also after a full reset
159  * of the device.
160  */
161 struct amdgpu_init_level amdgpu_init_default = {
162 	.level = AMDGPU_INIT_LEVEL_DEFAULT,
163 	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
164 };
165 
166 struct amdgpu_init_level amdgpu_init_recovery = {
167 	.level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
168 	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
169 };
170 
171 /*
172  * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
173  * is used for cases like reset on initialization where the entire hive needs to
174  * be reset before first use.
175  */
176 struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
177 	.level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
178 	.hwini_ip_block_mask =
179 		BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
180 		BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) |
181 		BIT(AMD_IP_BLOCK_TYPE_PSP)
182 };
183 
184 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev);
185 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev);
186 static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev);
187 
188 static void amdgpu_device_load_switch_state(struct amdgpu_device *adev);
189 
190 static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
191 					     enum amd_ip_block_type block)
192 {
193 	return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
194 }
195 
196 void amdgpu_set_init_level(struct amdgpu_device *adev,
197 			   enum amdgpu_init_lvl_id lvl)
198 {
199 	switch (lvl) {
200 	case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
201 		adev->init_lvl = &amdgpu_init_minimal_xgmi;
202 		break;
203 	case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
204 		adev->init_lvl = &amdgpu_init_recovery;
205 		break;
206 	case AMDGPU_INIT_LEVEL_DEFAULT:
207 		fallthrough;
208 	default:
209 		adev->init_lvl = &amdgpu_init_default;
210 		break;
211 	}
212 }
213 
214 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
215 static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
216 				     void *data);
217 
218 /**
219  * DOC: pcie_replay_count
220  *
221  * The amdgpu driver provides a sysfs API for reporting the total number
222  * of PCIe replays (NAKs).
223  * The file pcie_replay_count is used for this and returns the total
224  * number of replays as a sum of the NAKs generated and NAKs received.
225  */
226 
227 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
228 		struct device_attribute *attr, char *buf)
229 {
230 	struct drm_device *ddev = dev_get_drvdata(dev);
231 	struct amdgpu_device *adev = drm_to_adev(ddev);
232 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
233 
234 	return sysfs_emit(buf, "%llu\n", cnt);
235 }
236 
237 static DEVICE_ATTR(pcie_replay_count, 0444,
238 		amdgpu_device_get_pcie_replay_count, NULL);
239 
240 static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
241 {
242 	int ret = 0;
243 
244 	if (amdgpu_nbio_is_replay_cnt_supported(adev))
245 		ret = sysfs_create_file(&adev->dev->kobj,
246 					&dev_attr_pcie_replay_count.attr);
247 
248 	return ret;
249 }
250 
251 static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
252 {
253 	if (amdgpu_nbio_is_replay_cnt_supported(adev))
254 		sysfs_remove_file(&adev->dev->kobj,
255 				  &dev_attr_pcie_replay_count.attr);
256 }
257 
258 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
259 					  const struct bin_attribute *attr, char *buf,
260 					  loff_t ppos, size_t count)
261 {
262 	struct device *dev = kobj_to_dev(kobj);
263 	struct drm_device *ddev = dev_get_drvdata(dev);
264 	struct amdgpu_device *adev = drm_to_adev(ddev);
265 	ssize_t bytes_read;
266 
267 	switch (ppos) {
268 	case AMDGPU_SYS_REG_STATE_XGMI:
269 		bytes_read = amdgpu_asic_get_reg_state(
270 			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
271 		break;
272 	case AMDGPU_SYS_REG_STATE_WAFL:
273 		bytes_read = amdgpu_asic_get_reg_state(
274 			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
275 		break;
276 	case AMDGPU_SYS_REG_STATE_PCIE:
277 		bytes_read = amdgpu_asic_get_reg_state(
278 			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
279 		break;
280 	case AMDGPU_SYS_REG_STATE_USR:
281 		bytes_read = amdgpu_asic_get_reg_state(
282 			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
283 		break;
284 	case AMDGPU_SYS_REG_STATE_USR_1:
285 		bytes_read = amdgpu_asic_get_reg_state(
286 			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
287 		break;
288 	default:
289 		return -EINVAL;
290 	}
291 
292 	return bytes_read;
293 }
294 
295 static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
296 		      AMDGPU_SYS_REG_STATE_END);
297 
298 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
299 {
300 	int ret;
301 
302 	if (!amdgpu_asic_get_reg_state_supported(adev))
303 		return 0;
304 
305 	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
306 
307 	return ret;
308 }
309 
310 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
311 {
312 	if (!amdgpu_asic_get_reg_state_supported(adev))
313 		return;
314 	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
315 }
316 
317 /**
318  * DOC: board_info
319  *
320  * The amdgpu driver provides a sysfs API for giving board related information.
321  * It provides the form factor information in the format
322  *
323  *   type : form factor
324  *
325  * Possible form factor values
326  *
327  * - "cem"		- PCIE CEM card
328  * - "oam"		- Open Compute Accelerator Module
329  * - "unknown"	- Not known
330  *
331  */
332 
333 static ssize_t amdgpu_device_get_board_info(struct device *dev,
334 					    struct device_attribute *attr,
335 					    char *buf)
336 {
337 	struct drm_device *ddev = dev_get_drvdata(dev);
338 	struct amdgpu_device *adev = drm_to_adev(ddev);
339 	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
340 	const char *pkg;
341 
342 	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
343 		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
344 
345 	switch (pkg_type) {
346 	case AMDGPU_PKG_TYPE_CEM:
347 		pkg = "cem";
348 		break;
349 	case AMDGPU_PKG_TYPE_OAM:
350 		pkg = "oam";
351 		break;
352 	default:
353 		pkg = "unknown";
354 		break;
355 	}
356 
357 	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
358 }
359 
360 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
361 
362 static struct attribute *amdgpu_board_attrs[] = {
363 	&dev_attr_board_info.attr,
364 	NULL,
365 };
366 
367 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
368 					     struct attribute *attr, int n)
369 {
370 	struct device *dev = kobj_to_dev(kobj);
371 	struct drm_device *ddev = dev_get_drvdata(dev);
372 	struct amdgpu_device *adev = drm_to_adev(ddev);
373 
374 	if (adev->flags & AMD_IS_APU)
375 		return 0;
376 
377 	return attr->mode;
378 }
379 
380 static const struct attribute_group amdgpu_board_attrs_group = {
381 	.attrs = amdgpu_board_attrs,
382 	.is_visible = amdgpu_board_attrs_is_visible
383 };
384 
385 /**
386  * DOC: uma/carveout_options
387  *
388  * This is a read-only file that lists all available UMA allocation
389  * options and their corresponding indices. Example output::
390  *
391  *     $ cat uma/carveout_options
392  *     0: Minimum (512 MB)
393  *     1:  (1 GB)
394  *     2:  (2 GB)
395  *     3:  (4 GB)
396  *     4:  (6 GB)
397  *     5:  (8 GB)
398  *     6:  (12 GB)
399  *     7: Medium (16 GB)
400  *     8:  (24 GB)
401  *     9: High (32 GB)
402  */
403 static ssize_t carveout_options_show(struct device *dev,
404 				     struct device_attribute *attr,
405 				     char *buf)
406 {
407 	struct drm_device *ddev = dev_get_drvdata(dev);
408 	struct amdgpu_device *adev = drm_to_adev(ddev);
409 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
410 	uint32_t memory_carved;
411 	ssize_t size = 0;
412 
413 	if (!uma_info || !uma_info->num_entries)
414 		return -ENODEV;
415 
416 	for (int i = 0; i < uma_info->num_entries; i++) {
417 		memory_carved = uma_info->entries[i].memory_carved_mb;
418 		if (memory_carved >= SZ_1G/SZ_1M) {
419 			size += sysfs_emit_at(buf, size, "%d: %s (%u GB)\n",
420 					      i,
421 					      uma_info->entries[i].name,
422 					      memory_carved >> 10);
423 		} else {
424 			size += sysfs_emit_at(buf, size, "%d: %s (%u MB)\n",
425 					      i,
426 					      uma_info->entries[i].name,
427 					      memory_carved);
428 		}
429 	}
430 
431 	return size;
432 }
433 static DEVICE_ATTR_RO(carveout_options);
434 
435 /**
436  * DOC: uma/carveout
437  *
438  * This file is both readable and writable. When read, it shows the
439  * index of the current setting. Writing a valid index to this file
440  * allows users to change the UMA carveout size to the selected option
441  * on the next boot.
442  *
443  * The available options and their corresponding indices can be read
444  * from the uma/carveout_options file.
445  */
446 static ssize_t carveout_show(struct device *dev,
447 			     struct device_attribute *attr,
448 			     char *buf)
449 {
450 	struct drm_device *ddev = dev_get_drvdata(dev);
451 	struct amdgpu_device *adev = drm_to_adev(ddev);
452 
453 	return sysfs_emit(buf, "%u\n", adev->uma_info.uma_option_index);
454 }
455 
456 static ssize_t carveout_store(struct device *dev,
457 			      struct device_attribute *attr,
458 			      const char *buf, size_t count)
459 {
460 	struct drm_device *ddev = dev_get_drvdata(dev);
461 	struct amdgpu_device *adev = drm_to_adev(ddev);
462 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
463 	struct amdgpu_uma_carveout_option *opt;
464 	unsigned long val;
465 	uint8_t flags;
466 	int r;
467 
468 	r = kstrtoul(buf, 10, &val);
469 	if (r)
470 		return r;
471 
472 	if (val >= uma_info->num_entries)
473 		return -EINVAL;
474 
475 	val = array_index_nospec(val, uma_info->num_entries);
476 	opt = &uma_info->entries[val];
477 
478 	if (!(opt->flags & AMDGPU_UMA_FLAG_AUTO) &&
479 	    !(opt->flags & AMDGPU_UMA_FLAG_CUSTOM)) {
480 		drm_err_once(ddev, "Option %lu not supported due to lack of Custom/Auto flag", val);
481 		return -EINVAL;
482 	}
483 
484 	flags = opt->flags;
485 	flags &= ~((flags & AMDGPU_UMA_FLAG_AUTO) >> 1);
486 
487 	guard(mutex)(&uma_info->update_lock);
488 
489 	r = amdgpu_acpi_set_uma_allocation_size(adev, val, flags);
490 	if (r)
491 		return r;
492 
493 	uma_info->uma_option_index = val;
494 
495 	return count;
496 }
497 static DEVICE_ATTR_RW(carveout);
498 
499 static struct attribute *amdgpu_uma_attrs[] = {
500 	&dev_attr_carveout.attr,
501 	&dev_attr_carveout_options.attr,
502 	NULL
503 };
504 
505 const struct attribute_group amdgpu_uma_attr_group = {
506 	.name = "uma",
507 	.attrs = amdgpu_uma_attrs
508 };
509 
510 static void amdgpu_uma_sysfs_init(struct amdgpu_device *adev)
511 {
512 	int rc;
513 
514 	if (!(adev->flags & AMD_IS_APU))
515 		return;
516 
517 	if (!amdgpu_acpi_is_set_uma_allocation_size_supported())
518 		return;
519 
520 	rc = amdgpu_atomfirmware_get_uma_carveout_info(adev, &adev->uma_info);
521 	if (rc) {
522 		drm_dbg(adev_to_drm(adev),
523 			"Failed to parse UMA carveout info from VBIOS: %d\n", rc);
524 		goto out_info;
525 	}
526 
527 	mutex_init(&adev->uma_info.update_lock);
528 
529 	rc = devm_device_add_group(adev->dev, &amdgpu_uma_attr_group);
530 	if (rc) {
531 		drm_dbg(adev_to_drm(adev), "Failed to add UMA carveout sysfs interfaces %d\n", rc);
532 		goto out_attr;
533 	}
534 
535 	return;
536 
537 out_attr:
538 	mutex_destroy(&adev->uma_info.update_lock);
539 out_info:
540 	return;
541 }
542 
543 static void amdgpu_uma_sysfs_fini(struct amdgpu_device *adev)
544 {
545 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
546 
547 	if (!amdgpu_acpi_is_set_uma_allocation_size_supported())
548 		return;
549 
550 	mutex_destroy(&uma_info->update_lock);
551 	uma_info->num_entries = 0;
552 }
553 
554 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
555 
556 /**
557  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
558  *
559  * @adev: amdgpu device pointer
560  *
561  * Returns true if the device is a dGPU with ATPX power control,
562  * otherwise return false.
563  */
564 bool amdgpu_device_supports_px(struct amdgpu_device *adev)
565 {
566 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
567 		return true;
568 	return false;
569 }
570 
571 /**
572  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
573  *
574  * @adev: amdgpu device pointer
575  *
576  * Returns true if the device is a dGPU with ACPI power control,
577  * otherwise return false.
578  */
579 bool amdgpu_device_supports_boco(struct amdgpu_device *adev)
580 {
581 	if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
582 		return false;
583 
584 	if (adev->has_pr3 ||
585 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
586 		return true;
587 	return false;
588 }
589 
590 /**
591  * amdgpu_device_supports_baco - Does the device support BACO
592  *
593  * @adev: amdgpu device pointer
594  *
595  * Return:
596  * 1 if the device supports BACO;
597  * 3 if the device supports MACO (only works if BACO is supported)
598  * otherwise return 0.
599  */
600 int amdgpu_device_supports_baco(struct amdgpu_device *adev)
601 {
602 	return amdgpu_asic_supports_baco(adev);
603 }
604 
605 void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
606 {
607 	int bamaco_support;
608 
609 	adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
610 	bamaco_support = amdgpu_device_supports_baco(adev);
611 
612 	switch (amdgpu_runtime_pm) {
613 	case 2:
614 		if (bamaco_support & MACO_SUPPORT) {
615 			adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
616 			dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
617 		} else if (bamaco_support == BACO_SUPPORT) {
618 			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
619 			dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
620 		}
621 		break;
622 	case 1:
623 		if (bamaco_support & BACO_SUPPORT) {
624 			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
625 			dev_info(adev->dev, "Forcing BACO for runtime pm\n");
626 		}
627 		break;
628 	case -1:
629 	case -2:
630 		if (amdgpu_device_supports_px(adev)) {
631 			/* enable PX as runtime mode */
632 			adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
633 			dev_info(adev->dev, "Using ATPX for runtime pm\n");
634 		} else if (amdgpu_device_supports_boco(adev)) {
635 			/* enable boco as runtime mode */
636 			adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
637 			dev_info(adev->dev, "Using BOCO for runtime pm\n");
638 		} else {
639 			if (!bamaco_support)
640 				goto no_runtime_pm;
641 
642 			switch (adev->asic_type) {
643 			case CHIP_VEGA20:
644 			case CHIP_ARCTURUS:
645 				/* BACO are not supported on vega20 and arctrus */
646 				break;
647 			case CHIP_VEGA10:
648 				/* enable BACO as runpm mode if noretry=0 */
649 				if (!adev->gmc.noretry && !amdgpu_passthrough(adev))
650 					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
651 				break;
652 			default:
653 				/* enable BACO as runpm mode on CI+ */
654 				if (!amdgpu_passthrough(adev))
655 					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
656 				break;
657 			}
658 
659 			if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
660 				if (bamaco_support & MACO_SUPPORT) {
661 					adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
662 					dev_info(adev->dev, "Using BAMACO for runtime pm\n");
663 				} else {
664 					dev_info(adev->dev, "Using BACO for runtime pm\n");
665 				}
666 			}
667 		}
668 		break;
669 	case 0:
670 		dev_info(adev->dev, "runtime pm is manually disabled\n");
671 		break;
672 	default:
673 		break;
674 	}
675 
676 no_runtime_pm:
677 	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
678 		dev_info(adev->dev, "Runtime PM not available\n");
679 }
680 /**
681  * amdgpu_device_supports_smart_shift - Is the device dGPU with
682  * smart shift support
683  *
684  * @adev: amdgpu device pointer
685  *
686  * Returns true if the device is a dGPU with Smart Shift support,
687  * otherwise returns false.
688  */
689 bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev)
690 {
691 	return (amdgpu_device_supports_boco(adev) &&
692 		amdgpu_acpi_is_power_shift_control_supported());
693 }
694 
695 /*
696  * VRAM access helper functions
697  */
698 
699 /**
700  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
701  *
702  * @adev: amdgpu_device pointer
703  * @pos: offset of the buffer in vram
704  * @buf: virtual address of the buffer in system memory
705  * @size: read/write size, sizeof(@buf) must > @size
706  * @write: true - write to vram, otherwise - read from vram
707  */
708 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
709 			     void *buf, size_t size, bool write)
710 {
711 	unsigned long flags;
712 	uint32_t hi = ~0, tmp = 0;
713 	uint32_t *data = buf;
714 	uint64_t last;
715 	int idx;
716 
717 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
718 		return;
719 
720 	if (!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4)) {
721 		dev_err(adev->dev, "unaligned pos/size (pos=0x%llx, size=0x%zx)\n",
722 			pos, size);
723 		drm_dev_exit(idx);
724 		return;
725 	}
726 
727 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
728 	for (last = pos + size; pos < last; pos += 4) {
729 		tmp = pos >> 31;
730 
731 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
732 		if (tmp != hi) {
733 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
734 			hi = tmp;
735 		}
736 		if (write)
737 			WREG32_NO_KIQ(mmMM_DATA, *data++);
738 		else
739 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
740 	}
741 
742 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
743 	drm_dev_exit(idx);
744 }
745 
746 /**
747  * amdgpu_device_aper_access - access vram by vram aperture
748  *
749  * @adev: amdgpu_device pointer
750  * @pos: offset of the buffer in vram
751  * @buf: virtual address of the buffer in system memory
752  * @size: read/write size, sizeof(@buf) must > @size
753  * @write: true - write to vram, otherwise - read from vram
754  *
755  * The return value means how many bytes have been transferred.
756  */
757 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
758 				 void *buf, size_t size, bool write)
759 {
760 #ifdef CONFIG_64BIT
761 	void __iomem *addr;
762 	size_t count = 0;
763 	uint64_t last;
764 
765 	if (!adev->mman.aper_base_kaddr)
766 		return 0;
767 
768 	last = min(pos + size, adev->gmc.visible_vram_size);
769 	if (last > pos) {
770 		addr = adev->mman.aper_base_kaddr + pos;
771 		count = last - pos;
772 
773 		if (write) {
774 			memcpy_toio(addr, buf, count);
775 			/* Make sure HDP write cache flush happens without any reordering
776 			 * after the system memory contents are sent over PCIe device
777 			 */
778 			mb();
779 			amdgpu_device_flush_hdp(adev, NULL);
780 		} else {
781 			amdgpu_device_invalidate_hdp(adev, NULL);
782 			/* Make sure HDP read cache is invalidated before issuing a read
783 			 * to the PCIe device
784 			 */
785 			mb();
786 			memcpy_fromio(buf, addr, count);
787 		}
788 
789 	}
790 
791 	return count;
792 #else
793 	return 0;
794 #endif
795 }
796 
797 /**
798  * amdgpu_device_vram_access - read/write a buffer in vram
799  *
800  * @adev: amdgpu_device pointer
801  * @pos: offset of the buffer in vram
802  * @buf: virtual address of the buffer in system memory
803  * @size: read/write size, sizeof(@buf) must > @size
804  * @write: true - write to vram, otherwise - read from vram
805  */
806 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
807 			       void *buf, size_t size, bool write)
808 {
809 	size_t count;
810 
811 	/* try to using vram apreature to access vram first */
812 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
813 	size -= count;
814 	if (size) {
815 		/* using MM to access rest vram */
816 		pos += count;
817 		buf += count;
818 		amdgpu_device_mm_access(adev, pos, buf, size, write);
819 	}
820 }
821 
822 /*
823  * register access helper functions.
824  */
825 
826 /* Check if hw access should be skipped because of hotplug or device error */
827 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
828 {
829 	if (adev->no_hw_access)
830 		return true;
831 
832 #ifdef CONFIG_LOCKDEP
833 	/*
834 	 * This is a bit complicated to understand, so worth a comment. What we assert
835 	 * here is that the GPU reset is not running on another thread in parallel.
836 	 *
837 	 * For this we trylock the read side of the reset semaphore, if that succeeds
838 	 * we know that the reset is not running in parallel.
839 	 *
840 	 * If the trylock fails we assert that we are either already holding the read
841 	 * side of the lock or are the reset thread itself and hold the write side of
842 	 * the lock.
843 	 */
844 	if (in_task()) {
845 		if (down_read_trylock(&adev->reset_domain->sem))
846 			up_read(&adev->reset_domain->sem);
847 		else
848 			lockdep_assert_held(&adev->reset_domain->sem);
849 	}
850 #endif
851 	return false;
852 }
853 
854 /**
855  * amdgpu_device_get_rev_id - query device rev_id
856  *
857  * @adev: amdgpu_device pointer
858  *
859  * Return device rev_id
860  */
861 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
862 {
863 	return adev->nbio.funcs->get_rev_id(adev);
864 }
865 
866 static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev)
867 {
868 	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
869 		return AMDGPU_VBIOS_SKIP;
870 
871 	if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev))
872 		return AMDGPU_VBIOS_OPTIONAL;
873 
874 	return 0;
875 }
876 
877 /**
878  * amdgpu_device_asic_init - Wrapper for atom asic_init
879  *
880  * @adev: amdgpu_device pointer
881  *
882  * Does any asic specific work and then calls atom asic init.
883  */
884 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
885 {
886 	uint32_t flags;
887 	bool optional;
888 	int ret;
889 
890 	amdgpu_asic_pre_asic_init(adev);
891 	flags = amdgpu_device_get_vbios_flags(adev);
892 	optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL | AMDGPU_VBIOS_SKIP));
893 
894 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
895 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
896 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
897 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
898 		amdgpu_psp_wait_for_bootloader(adev);
899 		if (optional && !adev->bios)
900 			return 0;
901 
902 		ret = amdgpu_atomfirmware_asic_init(adev, true);
903 		return ret;
904 	} else {
905 		if (optional && !adev->bios)
906 			return 0;
907 
908 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
909 	}
910 
911 	return 0;
912 }
913 
914 /**
915  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
916  *
917  * @adev: amdgpu_device pointer
918  *
919  * Allocates a scratch page of VRAM for use by various things in the
920  * driver.
921  */
922 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
923 {
924 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
925 				       AMDGPU_GEM_DOMAIN_VRAM |
926 				       AMDGPU_GEM_DOMAIN_GTT,
927 				       &adev->mem_scratch.robj,
928 				       &adev->mem_scratch.gpu_addr,
929 				       (void **)&adev->mem_scratch.ptr);
930 }
931 
932 /**
933  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
934  *
935  * @adev: amdgpu_device pointer
936  *
937  * Frees the VRAM scratch page.
938  */
939 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
940 {
941 	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
942 }
943 
944 /**
945  * amdgpu_device_program_register_sequence - program an array of registers.
946  *
947  * @adev: amdgpu_device pointer
948  * @registers: pointer to the register array
949  * @array_size: size of the register array
950  *
951  * Programs an array or registers with and or masks.
952  * This is a helper for setting golden registers.
953  */
954 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
955 					     const u32 *registers,
956 					     const u32 array_size)
957 {
958 	u32 tmp, reg, and_mask, or_mask;
959 	int i;
960 
961 	if (array_size % 3)
962 		return;
963 
964 	for (i = 0; i < array_size; i += 3) {
965 		reg = registers[i + 0];
966 		and_mask = registers[i + 1];
967 		or_mask = registers[i + 2];
968 
969 		if (and_mask == 0xffffffff) {
970 			tmp = or_mask;
971 		} else {
972 			tmp = RREG32(reg);
973 			tmp &= ~and_mask;
974 			if (adev->family >= AMDGPU_FAMILY_AI)
975 				tmp |= (or_mask & and_mask);
976 			else
977 				tmp |= or_mask;
978 		}
979 		WREG32(reg, tmp);
980 	}
981 }
982 
983 /**
984  * amdgpu_device_pci_config_reset - reset the GPU
985  *
986  * @adev: amdgpu_device pointer
987  *
988  * Resets the GPU using the pci config reset sequence.
989  * Only applicable to asics prior to vega10.
990  */
991 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
992 {
993 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
994 }
995 
996 /**
997  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
998  *
999  * @adev: amdgpu_device pointer
1000  *
1001  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1002  */
1003 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1004 {
1005 	return pci_reset_function(adev->pdev);
1006 }
1007 
1008 /*
1009  * amdgpu_device_wb_*()
1010  * Writeback is the method by which the GPU updates special pages in memory
1011  * with the status of certain GPU events (fences, ring pointers,etc.).
1012  */
1013 
1014 /**
1015  * amdgpu_device_wb_fini - Disable Writeback and free memory
1016  *
1017  * @adev: amdgpu_device pointer
1018  *
1019  * Disables Writeback and frees the Writeback memory (all asics).
1020  * Used at driver shutdown.
1021  */
1022 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1023 {
1024 	if (adev->wb.wb_obj) {
1025 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1026 				      &adev->wb.gpu_addr,
1027 				      (void **)&adev->wb.wb);
1028 		adev->wb.wb_obj = NULL;
1029 	}
1030 }
1031 
1032 /**
1033  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1034  *
1035  * @adev: amdgpu_device pointer
1036  *
1037  * Initializes writeback and allocates writeback memory (all asics).
1038  * Used at driver startup.
1039  * Returns 0 on success or an -error on failure.
1040  */
1041 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1042 {
1043 	int r;
1044 
1045 	if (adev->wb.wb_obj == NULL) {
1046 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1047 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1048 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1049 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1050 					    (void **)&adev->wb.wb);
1051 		if (r) {
1052 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1053 			return r;
1054 		}
1055 
1056 		adev->wb.num_wb = AMDGPU_MAX_WB;
1057 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1058 
1059 		/* clear wb memory */
1060 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1061 	}
1062 
1063 	return 0;
1064 }
1065 
1066 /**
1067  * amdgpu_device_wb_get - Allocate a wb entry
1068  *
1069  * @adev: amdgpu_device pointer
1070  * @wb: wb index
1071  *
1072  * Allocate a wb slot for use by the driver (all asics).
1073  * Returns 0 on success or -EINVAL on failure.
1074  */
1075 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1076 {
1077 	unsigned long flags, offset;
1078 
1079 	spin_lock_irqsave(&adev->wb.lock, flags);
1080 	offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1081 	if (offset < adev->wb.num_wb) {
1082 		__set_bit(offset, adev->wb.used);
1083 		spin_unlock_irqrestore(&adev->wb.lock, flags);
1084 		*wb = offset << 3; /* convert to dw offset */
1085 		return 0;
1086 	} else {
1087 		spin_unlock_irqrestore(&adev->wb.lock, flags);
1088 		return -EINVAL;
1089 	}
1090 }
1091 
1092 /**
1093  * amdgpu_device_wb_free - Free a wb entry
1094  *
1095  * @adev: amdgpu_device pointer
1096  * @wb: wb index
1097  *
1098  * Free a wb slot allocated for use by the driver (all asics)
1099  */
1100 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1101 {
1102 	unsigned long flags;
1103 
1104 	wb >>= 3;
1105 	spin_lock_irqsave(&adev->wb.lock, flags);
1106 	if (wb < adev->wb.num_wb)
1107 		__clear_bit(wb, adev->wb.used);
1108 	spin_unlock_irqrestore(&adev->wb.lock, flags);
1109 }
1110 
1111 /**
1112  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1113  *
1114  * @adev: amdgpu_device pointer
1115  *
1116  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1117  * to fail, but if any of the BARs is not accessible after the size we abort
1118  * driver loading by returning -ENODEV.
1119  */
1120 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1121 {
1122 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1123 	struct pci_bus *root;
1124 	struct resource *res;
1125 	int max_size, r;
1126 	unsigned int i;
1127 	u16 cmd;
1128 
1129 	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1130 		return 0;
1131 
1132 	/* Bypass for VF */
1133 	if (amdgpu_sriov_vf(adev))
1134 		return 0;
1135 
1136 	if (!amdgpu_rebar)
1137 		return 0;
1138 
1139 	/* resizing on Dell G5 SE platforms causes problems with runtime pm */
1140 	if ((amdgpu_runtime_pm != 0) &&
1141 	    adev->pdev->vendor == PCI_VENDOR_ID_ATI &&
1142 	    adev->pdev->device == 0x731f &&
1143 	    adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)
1144 		return 0;
1145 
1146 	/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1147 	if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1148 		dev_warn(
1149 			adev->dev,
1150 			"System can't access extended configuration space, please check!!\n");
1151 
1152 	/* skip if the bios has already enabled large BAR */
1153 	if (adev->gmc.real_vram_size &&
1154 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1155 		return 0;
1156 
1157 	/* Check if the root BUS has 64bit memory resources */
1158 	root = adev->pdev->bus;
1159 	while (root->parent)
1160 		root = root->parent;
1161 
1162 	pci_bus_for_each_resource(root, res, i) {
1163 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1164 		    res->start > 0x100000000ull)
1165 			break;
1166 	}
1167 
1168 	/* Trying to resize is pointless without a root hub window above 4GB */
1169 	if (!res)
1170 		return 0;
1171 
1172 	/* Limit the BAR size to what is available */
1173 	max_size = pci_rebar_get_max_size(adev->pdev, 0);
1174 	if (max_size < 0)
1175 		return 0;
1176 	rbar_size = min(max_size, rbar_size);
1177 
1178 	/* Disable memory decoding while we change the BAR addresses and size */
1179 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1180 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1181 			      cmd & ~PCI_COMMAND_MEMORY);
1182 
1183 	/* Tear down doorbell as resizing will release BARs */
1184 	amdgpu_doorbell_fini(adev);
1185 
1186 	r = pci_resize_resource(adev->pdev, 0, rbar_size,
1187 				(adev->asic_type >= CHIP_BONAIRE) ? 1 << 5
1188 								  : 1 << 2);
1189 	if (r == -ENOSPC)
1190 		dev_info(adev->dev,
1191 			 "Not enough PCI address space for a large BAR.");
1192 	else if (r && r != -ENOTSUPP)
1193 		dev_err(adev->dev, "Problem resizing BAR0 (%d).", r);
1194 
1195 	/* When the doorbell or fb BAR isn't available we have no chance of
1196 	 * using the device.
1197 	 */
1198 	r = amdgpu_doorbell_init(adev);
1199 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1200 		return -ENODEV;
1201 
1202 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1203 
1204 	return 0;
1205 }
1206 
1207 /*
1208  * GPU helpers function.
1209  */
1210 /**
1211  * amdgpu_device_need_post - check if the hw need post or not
1212  *
1213  * @adev: amdgpu_device pointer
1214  *
1215  * Check if the asic has been initialized (all asics) at driver startup
1216  * or post is needed if  hw reset is performed.
1217  * Returns true if need or false if not.
1218  */
1219 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1220 {
1221 	uint32_t reg, flags;
1222 
1223 	if (amdgpu_sriov_vf(adev))
1224 		return false;
1225 
1226 	flags = amdgpu_device_get_vbios_flags(adev);
1227 	if (flags & AMDGPU_VBIOS_SKIP)
1228 		return false;
1229 	if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios)
1230 		return false;
1231 
1232 	if (amdgpu_passthrough(adev)) {
1233 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1234 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1235 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1236 		 * vpost executed for smc version below 22.15
1237 		 */
1238 		if (adev->asic_type == CHIP_FIJI) {
1239 			int err;
1240 			uint32_t fw_ver;
1241 
1242 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1243 			/* force vPost if error occurred */
1244 			if (err)
1245 				return true;
1246 
1247 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1248 			release_firmware(adev->pm.fw);
1249 			if (fw_ver < 0x00160e00)
1250 				return true;
1251 		}
1252 	}
1253 
1254 	/* Don't post if we need to reset whole hive on init */
1255 	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
1256 		return false;
1257 
1258 	if (adev->has_hw_reset) {
1259 		adev->has_hw_reset = false;
1260 		return true;
1261 	}
1262 
1263 	/* bios scratch used on CIK+ */
1264 	if (adev->asic_type >= CHIP_BONAIRE)
1265 		return amdgpu_atombios_scratch_need_asic_init(adev);
1266 
1267 	/* check MEM_SIZE for older asics */
1268 	reg = amdgpu_asic_get_config_memsize(adev);
1269 
1270 	if ((reg != 0) && (reg != 0xffffffff))
1271 		return false;
1272 
1273 	return true;
1274 }
1275 
1276 /*
1277  * Check whether seamless boot is supported.
1278  *
1279  * So far we only support seamless boot on DCE 3.0 or later.
1280  * If users report that it works on older ASICS as well, we may
1281  * loosen this.
1282  */
1283 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1284 {
1285 	switch (amdgpu_seamless) {
1286 	case -1:
1287 		break;
1288 	case 1:
1289 		return true;
1290 	case 0:
1291 		return false;
1292 	default:
1293 		dev_err(adev->dev, "Invalid value for amdgpu.seamless: %d\n",
1294 			amdgpu_seamless);
1295 		return false;
1296 	}
1297 
1298 	if (!(adev->flags & AMD_IS_APU))
1299 		return false;
1300 
1301 	if (adev->mman.keep_stolen_vga_memory)
1302 		return false;
1303 
1304 	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1305 }
1306 
1307 /*
1308  * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1309  * don't support dynamic speed switching. Until we have confirmation from Intel
1310  * that a specific host supports it, it's safer that we keep it disabled for all.
1311  *
1312  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1313  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1314  */
1315 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1316 {
1317 #if IS_ENABLED(CONFIG_X86)
1318 	struct cpuinfo_x86 *c = &cpu_data(0);
1319 
1320 	/* eGPU change speeds based on USB4 fabric conditions */
1321 	if (dev_is_removable(adev->dev))
1322 		return true;
1323 
1324 	if (c->x86_vendor == X86_VENDOR_INTEL)
1325 		return false;
1326 #endif
1327 	return true;
1328 }
1329 
1330 static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
1331 {
1332 	/* Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4.
1333 	 * It's unclear if this is a platform-specific or GPU-specific issue.
1334 	 * Disable ASPM on SI for the time being.
1335 	 */
1336 	if (adev->family == AMDGPU_FAMILY_SI)
1337 		return true;
1338 
1339 #if IS_ENABLED(CONFIG_X86)
1340 	struct cpuinfo_x86 *c = &cpu_data(0);
1341 
1342 	if (c->x86_vendor == X86_VENDOR_INTEL) {
1343 		switch (c->x86_model) {
1344 		case VFM_MODEL(INTEL_ALDERLAKE):
1345 		case VFM_MODEL(INTEL_ALDERLAKE_L):
1346 		case VFM_MODEL(INTEL_RAPTORLAKE):
1347 		case VFM_MODEL(INTEL_RAPTORLAKE_P):
1348 		case VFM_MODEL(INTEL_RAPTORLAKE_S):
1349 		case VFM_MODEL(INTEL_TIGERLAKE):
1350 		case VFM_MODEL(INTEL_TIGERLAKE_L):
1351 			return true;
1352 		default:
1353 			return false;
1354 		}
1355 	} else {
1356 		return false;
1357 	}
1358 #else
1359 	return false;
1360 #endif
1361 }
1362 
1363 /**
1364  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1365  *
1366  * @adev: amdgpu_device pointer
1367  *
1368  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1369  * be set for this device.
1370  *
1371  * Returns true if it should be used or false if not.
1372  */
1373 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1374 {
1375 	switch (amdgpu_aspm) {
1376 	case -1:
1377 		break;
1378 	case 0:
1379 		return false;
1380 	case 1:
1381 		return true;
1382 	default:
1383 		return false;
1384 	}
1385 	if (adev->flags & AMD_IS_APU)
1386 		return false;
1387 	if (amdgpu_device_aspm_support_quirk(adev))
1388 		return false;
1389 	return pcie_aspm_enabled(adev->pdev);
1390 }
1391 
1392 /* if we get transitioned to only one device, take VGA back */
1393 /**
1394  * amdgpu_device_vga_set_decode - enable/disable vga decode
1395  *
1396  * @pdev: PCI device pointer
1397  * @state: enable/disable vga decode
1398  *
1399  * Enable/disable vga decode (all asics).
1400  * Returns VGA resource flags.
1401  */
1402 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1403 		bool state)
1404 {
1405 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1406 
1407 	amdgpu_asic_set_vga_state(adev, state);
1408 	if (state)
1409 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1410 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1411 	else
1412 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1413 }
1414 
1415 /**
1416  * amdgpu_device_check_block_size - validate the vm block size
1417  *
1418  * @adev: amdgpu_device pointer
1419  *
1420  * Validates the vm block size specified via module parameter.
1421  * The vm block size defines number of bits in page table versus page directory,
1422  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1423  * page table and the remaining bits are in the page directory.
1424  */
1425 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1426 {
1427 	/* defines number of bits in page table versus page directory,
1428 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1429 	 * page table and the remaining bits are in the page directory
1430 	 */
1431 	if (amdgpu_vm_block_size == -1)
1432 		return;
1433 
1434 	if (amdgpu_vm_block_size < 9) {
1435 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1436 			 amdgpu_vm_block_size);
1437 		amdgpu_vm_block_size = -1;
1438 	}
1439 }
1440 
1441 /**
1442  * amdgpu_device_check_vm_size - validate the vm size
1443  *
1444  * @adev: amdgpu_device pointer
1445  *
1446  * Validates the vm size in GB specified via module parameter.
1447  * The VM size is the size of the GPU virtual memory space in GB.
1448  */
1449 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1450 {
1451 	/* no need to check the default value */
1452 	if (amdgpu_vm_size == -1)
1453 		return;
1454 
1455 	if (amdgpu_vm_size < 1) {
1456 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1457 			 amdgpu_vm_size);
1458 		amdgpu_vm_size = -1;
1459 	}
1460 }
1461 
1462 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1463 {
1464 	struct sysinfo si;
1465 	bool is_os_64 = (sizeof(void *) == 8);
1466 	uint64_t total_memory;
1467 	uint64_t dram_size_seven_GB = 0x1B8000000;
1468 	uint64_t dram_size_three_GB = 0xB8000000;
1469 
1470 	if (amdgpu_smu_memory_pool_size == 0)
1471 		return;
1472 
1473 	if (!is_os_64) {
1474 		dev_warn(adev->dev, "Not 64-bit OS, feature not supported\n");
1475 		goto def_value;
1476 	}
1477 	si_meminfo(&si);
1478 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1479 
1480 	if ((amdgpu_smu_memory_pool_size == 1) ||
1481 		(amdgpu_smu_memory_pool_size == 2)) {
1482 		if (total_memory < dram_size_three_GB)
1483 			goto def_value1;
1484 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1485 		(amdgpu_smu_memory_pool_size == 8)) {
1486 		if (total_memory < dram_size_seven_GB)
1487 			goto def_value1;
1488 	} else {
1489 		dev_warn(adev->dev, "Smu memory pool size not supported\n");
1490 		goto def_value;
1491 	}
1492 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1493 
1494 	return;
1495 
1496 def_value1:
1497 	dev_warn(adev->dev, "No enough system memory\n");
1498 def_value:
1499 	adev->pm.smu_prv_buffer_size = 0;
1500 }
1501 
1502 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1503 {
1504 	if (!(adev->flags & AMD_IS_APU) ||
1505 	    adev->asic_type < CHIP_RAVEN)
1506 		return 0;
1507 
1508 	switch (adev->asic_type) {
1509 	case CHIP_RAVEN:
1510 		if (adev->pdev->device == 0x15dd)
1511 			adev->apu_flags |= AMD_APU_IS_RAVEN;
1512 		if (adev->pdev->device == 0x15d8)
1513 			adev->apu_flags |= AMD_APU_IS_PICASSO;
1514 		break;
1515 	case CHIP_RENOIR:
1516 		if ((adev->pdev->device == 0x1636) ||
1517 		    (adev->pdev->device == 0x164c))
1518 			adev->apu_flags |= AMD_APU_IS_RENOIR;
1519 		else
1520 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1521 		break;
1522 	case CHIP_VANGOGH:
1523 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1524 		break;
1525 	case CHIP_YELLOW_CARP:
1526 		break;
1527 	case CHIP_CYAN_SKILLFISH:
1528 		if ((adev->pdev->device == 0x13FE) ||
1529 		    (adev->pdev->device == 0x143F))
1530 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1531 		break;
1532 	default:
1533 		break;
1534 	}
1535 
1536 	return 0;
1537 }
1538 
1539 /**
1540  * amdgpu_device_check_arguments - validate module params
1541  *
1542  * @adev: amdgpu_device pointer
1543  *
1544  * Validates certain module parameters and updates
1545  * the associated values used by the driver (all asics).
1546  */
1547 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1548 {
1549 	int i;
1550 
1551 	if (amdgpu_sched_jobs < 4) {
1552 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1553 			 amdgpu_sched_jobs);
1554 		amdgpu_sched_jobs = 4;
1555 	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
1556 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1557 			 amdgpu_sched_jobs);
1558 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1559 	}
1560 
1561 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1562 		/* gart size must be greater or equal to 32M */
1563 		dev_warn(adev->dev, "gart size (%d) too small\n",
1564 			 amdgpu_gart_size);
1565 		amdgpu_gart_size = -1;
1566 	}
1567 
1568 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1569 		/* gtt size must be greater or equal to 32M */
1570 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1571 				 amdgpu_gtt_size);
1572 		amdgpu_gtt_size = -1;
1573 	}
1574 
1575 	/* valid range is between 4 and 9 inclusive */
1576 	if (amdgpu_vm_fragment_size != -1 &&
1577 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1578 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1579 		amdgpu_vm_fragment_size = -1;
1580 	}
1581 
1582 	if (amdgpu_sched_hw_submission < 2) {
1583 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1584 			 amdgpu_sched_hw_submission);
1585 		amdgpu_sched_hw_submission = 2;
1586 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1587 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1588 			 amdgpu_sched_hw_submission);
1589 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1590 	}
1591 
1592 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1593 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1594 		amdgpu_reset_method = -1;
1595 	}
1596 
1597 	amdgpu_device_check_smu_prv_buffer_size(adev);
1598 
1599 	amdgpu_device_check_vm_size(adev);
1600 
1601 	amdgpu_device_check_block_size(adev);
1602 
1603 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1604 
1605 	for (i = 0; i < MAX_XCP; i++) {
1606 		switch (amdgpu_enforce_isolation) {
1607 		case -1:
1608 		case 0:
1609 		default:
1610 			/* disable */
1611 			adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
1612 			break;
1613 		case 1:
1614 			/* enable */
1615 			adev->enforce_isolation[i] =
1616 				AMDGPU_ENFORCE_ISOLATION_ENABLE;
1617 			break;
1618 		case 2:
1619 			/* enable legacy mode */
1620 			adev->enforce_isolation[i] =
1621 				AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
1622 			break;
1623 		case 3:
1624 			/* enable only process isolation without submitting cleaner shader */
1625 			adev->enforce_isolation[i] =
1626 				AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
1627 			break;
1628 		}
1629 	}
1630 
1631 	return 0;
1632 }
1633 
1634 /**
1635  * amdgpu_switcheroo_set_state - set switcheroo state
1636  *
1637  * @pdev: pci dev pointer
1638  * @state: vga_switcheroo state
1639  *
1640  * Callback for the switcheroo driver.  Suspends or resumes
1641  * the asics before or after it is powered up using ACPI methods.
1642  */
1643 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1644 					enum vga_switcheroo_state state)
1645 {
1646 	struct drm_device *dev = pci_get_drvdata(pdev);
1647 	int r;
1648 
1649 	if (amdgpu_device_supports_px(drm_to_adev(dev)) &&
1650 	    state == VGA_SWITCHEROO_OFF)
1651 		return;
1652 
1653 	if (state == VGA_SWITCHEROO_ON) {
1654 		pr_info("switched on\n");
1655 		/* don't suspend or resume card normally */
1656 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1657 
1658 		pci_set_power_state(pdev, PCI_D0);
1659 		amdgpu_device_load_pci_state(pdev);
1660 		r = pci_enable_device(pdev);
1661 		if (r)
1662 			dev_warn(&pdev->dev, "pci_enable_device failed (%d)\n",
1663 				 r);
1664 		amdgpu_device_resume(dev, true);
1665 
1666 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1667 	} else {
1668 		dev_info(&pdev->dev, "switched off\n");
1669 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1670 		amdgpu_device_prepare(dev);
1671 		amdgpu_device_suspend(dev, true);
1672 		amdgpu_device_cache_pci_state(pdev);
1673 		/* Shut down the device */
1674 		pci_disable_device(pdev);
1675 		pci_set_power_state(pdev, PCI_D3cold);
1676 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1677 	}
1678 }
1679 
1680 /**
1681  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1682  *
1683  * @pdev: pci dev pointer
1684  *
1685  * Callback for the switcheroo driver.  Check of the switcheroo
1686  * state can be changed.
1687  * Returns true if the state can be changed, false if not.
1688  */
1689 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1690 {
1691 	struct drm_device *dev = pci_get_drvdata(pdev);
1692 
1693        /*
1694 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1695 	* locking inversion with the driver load path. And the access here is
1696 	* completely racy anyway. So don't bother with locking for now.
1697 	*/
1698 	return atomic_read(&dev->open_count) == 0;
1699 }
1700 
1701 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1702 	.set_gpu_state = amdgpu_switcheroo_set_state,
1703 	.reprobe = NULL,
1704 	.can_switch = amdgpu_switcheroo_can_switch,
1705 };
1706 
1707 /**
1708  * amdgpu_device_enable_virtual_display - enable virtual display feature
1709  *
1710  * @adev: amdgpu_device pointer
1711  *
1712  * Enabled the virtual display feature if the user has enabled it via
1713  * the module parameter virtual_display.  This feature provides a virtual
1714  * display hardware on headless boards or in virtualized environments.
1715  * This function parses and validates the configuration string specified by
1716  * the user and configures the virtual display configuration (number of
1717  * virtual connectors, crtcs, etc.) specified.
1718  */
1719 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1720 {
1721 	adev->enable_virtual_display = false;
1722 
1723 	if (amdgpu_virtual_display) {
1724 		const char *pci_address_name = pci_name(adev->pdev);
1725 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1726 
1727 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1728 		pciaddstr_tmp = pciaddstr;
1729 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1730 			pciaddname = strsep(&pciaddname_tmp, ",");
1731 			if (!strcmp("all", pciaddname)
1732 			    || !strcmp(pci_address_name, pciaddname)) {
1733 				long num_crtc;
1734 				int res = -1;
1735 
1736 				adev->enable_virtual_display = true;
1737 
1738 				if (pciaddname_tmp)
1739 					res = kstrtol(pciaddname_tmp, 10,
1740 						      &num_crtc);
1741 
1742 				if (!res) {
1743 					if (num_crtc < 1)
1744 						num_crtc = 1;
1745 					if (num_crtc > 6)
1746 						num_crtc = 6;
1747 					adev->mode_info.num_crtc = num_crtc;
1748 				} else {
1749 					adev->mode_info.num_crtc = 1;
1750 				}
1751 				break;
1752 			}
1753 		}
1754 
1755 		dev_info(
1756 			adev->dev,
1757 			"virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1758 			amdgpu_virtual_display, pci_address_name,
1759 			adev->enable_virtual_display, adev->mode_info.num_crtc);
1760 
1761 		kfree(pciaddstr);
1762 	}
1763 }
1764 
1765 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1766 {
1767 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1768 		adev->mode_info.num_crtc = 1;
1769 		adev->enable_virtual_display = true;
1770 		dev_info(adev->dev, "virtual_display:%d, num_crtc:%d\n",
1771 			 adev->enable_virtual_display,
1772 			 adev->mode_info.num_crtc);
1773 	}
1774 }
1775 
1776 /**
1777  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1778  *
1779  * @adev: amdgpu_device pointer
1780  *
1781  * Parses the asic configuration parameters specified in the gpu info
1782  * firmware and makes them available to the driver for use in configuring
1783  * the asic.
1784  * Returns 0 on success, -EINVAL on failure.
1785  */
1786 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1787 {
1788 	const char *chip_name;
1789 	int err;
1790 	const struct gpu_info_firmware_header_v1_0 *hdr;
1791 
1792 	adev->firmware.gpu_info_fw = NULL;
1793 
1794 	switch (adev->asic_type) {
1795 	default:
1796 		return 0;
1797 	case CHIP_VEGA10:
1798 		chip_name = "vega10";
1799 		break;
1800 	case CHIP_VEGA12:
1801 		chip_name = "vega12";
1802 		break;
1803 	case CHIP_RAVEN:
1804 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1805 			chip_name = "raven2";
1806 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1807 			chip_name = "picasso";
1808 		else
1809 			chip_name = "raven";
1810 		break;
1811 	case CHIP_ARCTURUS:
1812 		chip_name = "arcturus";
1813 		break;
1814 	case CHIP_NAVI12:
1815 		if (adev->discovery.bin)
1816 			return 0;
1817 		chip_name = "navi12";
1818 		break;
1819 	case CHIP_CYAN_SKILLFISH:
1820 		if (adev->discovery.bin)
1821 			return 0;
1822 		chip_name = "cyan_skillfish";
1823 		break;
1824 	}
1825 
1826 	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
1827 				   AMDGPU_UCODE_OPTIONAL,
1828 				   "amdgpu/%s_gpu_info.bin", chip_name);
1829 	if (err) {
1830 		dev_err(adev->dev,
1831 			"Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
1832 			chip_name);
1833 		goto out;
1834 	}
1835 
1836 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1837 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1838 
1839 	switch (hdr->version_major) {
1840 	case 1:
1841 	{
1842 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1843 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1844 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1845 
1846 		/*
1847 		 * Should be dropped when DAL no longer needs it.
1848 		 */
1849 		if (adev->asic_type == CHIP_NAVI12)
1850 			goto parse_soc_bounding_box;
1851 
1852 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1853 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1854 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1855 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1856 		adev->gfx.config.max_texture_channel_caches =
1857 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
1858 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1859 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1860 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1861 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1862 		adev->gfx.config.double_offchip_lds_buf =
1863 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1864 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1865 		adev->gfx.cu_info.max_waves_per_simd =
1866 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1867 		adev->gfx.cu_info.max_scratch_slots_per_cu =
1868 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1869 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1870 		if (hdr->version_minor >= 1) {
1871 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1872 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1873 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1874 			adev->gfx.config.num_sc_per_sh =
1875 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1876 			adev->gfx.config.num_packer_per_sc =
1877 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1878 		}
1879 
1880 parse_soc_bounding_box:
1881 		/*
1882 		 * soc bounding box info is not integrated in disocovery table,
1883 		 * we always need to parse it from gpu info firmware if needed.
1884 		 */
1885 		if (hdr->version_minor == 2) {
1886 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1887 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1888 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1889 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1890 		}
1891 		break;
1892 	}
1893 	default:
1894 		dev_err(adev->dev,
1895 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1896 		err = -EINVAL;
1897 		goto out;
1898 	}
1899 out:
1900 	return err;
1901 }
1902 
1903 static void amdgpu_uid_init(struct amdgpu_device *adev)
1904 {
1905 	/* Initialize the UID for the device */
1906 	adev->uid_info = kzalloc_obj(struct amdgpu_uid);
1907 	if (!adev->uid_info) {
1908 		dev_warn(adev->dev, "Failed to allocate memory for UID\n");
1909 		return;
1910 	}
1911 	adev->uid_info->adev = adev;
1912 }
1913 
1914 static void amdgpu_uid_fini(struct amdgpu_device *adev)
1915 {
1916 	/* Free the UID memory */
1917 	kfree(adev->uid_info);
1918 	adev->uid_info = NULL;
1919 }
1920 
1921 static struct pci_dev *amdgpu_device_find_parent(struct amdgpu_device *adev)
1922 {
1923 	struct pci_dev *parent = adev->pdev;
1924 
1925 	/* skip upstream/downstream switches internal to dGPU */
1926 	while ((parent = pci_upstream_bridge(parent))) {
1927 		if (parent->vendor == PCI_VENDOR_ID_ATI)
1928 			continue;
1929 		break;
1930 	}
1931 
1932 	return parent;
1933 }
1934 
1935 /**
1936  * amdgpu_device_ip_early_init - run early init for hardware IPs
1937  *
1938  * @adev: amdgpu_device pointer
1939  *
1940  * Early initialization pass for hardware IPs.  The hardware IPs that make
1941  * up each asic are discovered each IP's early_init callback is run.  This
1942  * is the first stage in initializing the asic.
1943  * Returns 0 on success, negative error code on failure.
1944  */
1945 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1946 {
1947 	struct amdgpu_ip_block *ip_block;
1948 	struct pci_dev *parent;
1949 	bool total, skip_bios;
1950 	uint32_t bios_flags;
1951 	int i, r;
1952 
1953 	amdgpu_device_enable_virtual_display(adev);
1954 
1955 	if (amdgpu_sriov_vf(adev)) {
1956 		r = amdgpu_virt_request_full_gpu(adev, true);
1957 		if (r)
1958 			return r;
1959 
1960 		r = amdgpu_virt_init_critical_region(adev);
1961 		if (r)
1962 			return r;
1963 	}
1964 
1965 	switch (adev->asic_type) {
1966 #ifdef CONFIG_DRM_AMDGPU_SI
1967 	case CHIP_VERDE:
1968 	case CHIP_TAHITI:
1969 	case CHIP_PITCAIRN:
1970 	case CHIP_OLAND:
1971 	case CHIP_HAINAN:
1972 		adev->family = AMDGPU_FAMILY_SI;
1973 		r = si_set_ip_blocks(adev);
1974 		if (r)
1975 			return r;
1976 		break;
1977 #endif
1978 #ifdef CONFIG_DRM_AMDGPU_CIK
1979 	case CHIP_BONAIRE:
1980 	case CHIP_HAWAII:
1981 	case CHIP_KAVERI:
1982 	case CHIP_KABINI:
1983 	case CHIP_MULLINS:
1984 		if (adev->flags & AMD_IS_APU)
1985 			adev->family = AMDGPU_FAMILY_KV;
1986 		else
1987 			adev->family = AMDGPU_FAMILY_CI;
1988 
1989 		r = cik_set_ip_blocks(adev);
1990 		if (r)
1991 			return r;
1992 		break;
1993 #endif
1994 	case CHIP_TOPAZ:
1995 	case CHIP_TONGA:
1996 	case CHIP_FIJI:
1997 	case CHIP_POLARIS10:
1998 	case CHIP_POLARIS11:
1999 	case CHIP_POLARIS12:
2000 	case CHIP_VEGAM:
2001 	case CHIP_CARRIZO:
2002 	case CHIP_STONEY:
2003 		if (adev->flags & AMD_IS_APU)
2004 			adev->family = AMDGPU_FAMILY_CZ;
2005 		else
2006 			adev->family = AMDGPU_FAMILY_VI;
2007 
2008 		r = vi_set_ip_blocks(adev);
2009 		if (r)
2010 			return r;
2011 		break;
2012 	default:
2013 		r = amdgpu_discovery_set_ip_blocks(adev);
2014 		if (r) {
2015 			adev->num_ip_blocks = 0;
2016 			return r;
2017 		}
2018 		break;
2019 	}
2020 
2021 	/* Check for IP version 9.4.3 with A0 hardware */
2022 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
2023 	    !amdgpu_device_get_rev_id(adev)) {
2024 		dev_err(adev->dev, "Unsupported A0 hardware\n");
2025 		return -ENODEV;	/* device unsupported - no device error */
2026 	}
2027 
2028 	if (amdgpu_has_atpx() &&
2029 	    (amdgpu_is_atpx_hybrid() ||
2030 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2031 	    ((adev->flags & AMD_IS_APU) == 0) &&
2032 	    !dev_is_removable(&adev->pdev->dev))
2033 		adev->flags |= AMD_IS_PX;
2034 
2035 	if (!(adev->flags & AMD_IS_APU)) {
2036 		parent = amdgpu_device_find_parent(adev);
2037 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2038 	}
2039 
2040 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2041 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2042 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2043 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2044 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2045 	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2046 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2047 
2048 	adev->virt.is_xgmi_node_migrate_enabled = false;
2049 	if (amdgpu_sriov_vf(adev)) {
2050 		adev->virt.is_xgmi_node_migrate_enabled =
2051 			amdgpu_ip_version((adev), GC_HWIP, 0) == IP_VERSION(9, 4, 4);
2052 	}
2053 
2054 	total = true;
2055 	for (i = 0; i < adev->num_ip_blocks; i++) {
2056 		ip_block = &adev->ip_blocks[i];
2057 
2058 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2059 			dev_warn(adev->dev, "disabled ip block: %d <%s>\n", i,
2060 				 adev->ip_blocks[i].version->funcs->name);
2061 			adev->ip_blocks[i].status.valid = false;
2062 		} else if (ip_block->version->funcs->early_init) {
2063 			r = ip_block->version->funcs->early_init(ip_block);
2064 			if (r == -ENOENT) {
2065 				adev->ip_blocks[i].status.valid = false;
2066 			} else if (r) {
2067 				dev_err(adev->dev,
2068 					"early_init of IP block <%s> failed %d\n",
2069 					adev->ip_blocks[i].version->funcs->name,
2070 					r);
2071 				total = false;
2072 			} else {
2073 				adev->ip_blocks[i].status.valid = true;
2074 			}
2075 		} else {
2076 			adev->ip_blocks[i].status.valid = true;
2077 		}
2078 		/* get the vbios after the asic_funcs are set up */
2079 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2080 			r = amdgpu_device_parse_gpu_info_fw(adev);
2081 			if (r)
2082 				return r;
2083 
2084 			bios_flags = amdgpu_device_get_vbios_flags(adev);
2085 			skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP);
2086 			/* Read BIOS */
2087 			if (!skip_bios) {
2088 				bool optional =
2089 					!!(bios_flags & AMDGPU_VBIOS_OPTIONAL);
2090 				if (!amdgpu_get_bios(adev) && !optional)
2091 					return -EINVAL;
2092 
2093 				if (optional && !adev->bios)
2094 					dev_info(
2095 						adev->dev,
2096 						"VBIOS image optional, proceeding without VBIOS image");
2097 
2098 				if (adev->bios) {
2099 					r = amdgpu_atombios_init(adev);
2100 					if (r) {
2101 						dev_err(adev->dev,
2102 							"amdgpu_atombios_init failed\n");
2103 						amdgpu_vf_error_put(
2104 							adev,
2105 							AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL,
2106 							0, 0);
2107 						return r;
2108 					}
2109 				}
2110 			}
2111 
2112 			/*get pf2vf msg info at it's earliest time*/
2113 			if (amdgpu_sriov_vf(adev))
2114 				amdgpu_virt_init_data_exchange(adev);
2115 
2116 		}
2117 	}
2118 	if (!total)
2119 		return -ENODEV;
2120 
2121 	if (adev->gmc.xgmi.supported)
2122 		amdgpu_xgmi_early_init(adev);
2123 
2124 	if (amdgpu_is_multi_aid(adev))
2125 		amdgpu_uid_init(adev);
2126 	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
2127 	if (ip_block->status.valid != false)
2128 		amdgpu_amdkfd_device_probe(adev);
2129 
2130 	adev->cg_flags &= amdgpu_cg_mask;
2131 	adev->pg_flags &= amdgpu_pg_mask;
2132 
2133 	return 0;
2134 }
2135 
2136 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2137 {
2138 	int i, r;
2139 
2140 	for (i = 0; i < adev->num_ip_blocks; i++) {
2141 		if (!adev->ip_blocks[i].status.sw)
2142 			continue;
2143 		if (adev->ip_blocks[i].status.hw)
2144 			continue;
2145 		if (!amdgpu_ip_member_of_hwini(
2146 			    adev, adev->ip_blocks[i].version->type))
2147 			continue;
2148 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2149 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2150 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2151 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2152 			if (r) {
2153 				dev_err(adev->dev,
2154 					"hw_init of IP block <%s> failed %d\n",
2155 					adev->ip_blocks[i].version->funcs->name,
2156 					r);
2157 				return r;
2158 			}
2159 			adev->ip_blocks[i].status.hw = true;
2160 		}
2161 	}
2162 
2163 	return 0;
2164 }
2165 
2166 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2167 {
2168 	int i, r;
2169 
2170 	for (i = 0; i < adev->num_ip_blocks; i++) {
2171 		if (!adev->ip_blocks[i].status.sw)
2172 			continue;
2173 		if (adev->ip_blocks[i].status.hw)
2174 			continue;
2175 		if (!amdgpu_ip_member_of_hwini(
2176 			    adev, adev->ip_blocks[i].version->type))
2177 			continue;
2178 		r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2179 		if (r) {
2180 			dev_err(adev->dev,
2181 				"hw_init of IP block <%s> failed %d\n",
2182 				adev->ip_blocks[i].version->funcs->name, r);
2183 			return r;
2184 		}
2185 		adev->ip_blocks[i].status.hw = true;
2186 	}
2187 
2188 	return 0;
2189 }
2190 
2191 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2192 {
2193 	int r = 0;
2194 	int i;
2195 	uint32_t smu_version;
2196 
2197 	if (adev->asic_type >= CHIP_VEGA10) {
2198 		for (i = 0; i < adev->num_ip_blocks; i++) {
2199 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2200 				continue;
2201 
2202 			if (!amdgpu_ip_member_of_hwini(adev,
2203 						       AMD_IP_BLOCK_TYPE_PSP))
2204 				break;
2205 
2206 			if (!adev->ip_blocks[i].status.sw)
2207 				continue;
2208 
2209 			/* no need to do the fw loading again if already done*/
2210 			if (adev->ip_blocks[i].status.hw == true)
2211 				break;
2212 
2213 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2214 				r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
2215 				if (r)
2216 					return r;
2217 			} else {
2218 				r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2219 				if (r) {
2220 					dev_err(adev->dev,
2221 						"hw_init of IP block <%s> failed %d\n",
2222 						adev->ip_blocks[i]
2223 							.version->funcs->name,
2224 						r);
2225 					return r;
2226 				}
2227 				adev->ip_blocks[i].status.hw = true;
2228 			}
2229 			break;
2230 		}
2231 	}
2232 
2233 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2234 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2235 
2236 	return r;
2237 }
2238 
2239 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2240 {
2241 	struct drm_sched_init_args args = {
2242 		.ops = &amdgpu_sched_ops,
2243 		.timeout_wq = adev->reset_domain->wq,
2244 		.dev = adev->dev,
2245 	};
2246 	long timeout;
2247 	int r, i;
2248 
2249 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2250 		struct amdgpu_ring *ring = adev->rings[i];
2251 
2252 		/* No need to setup the GPU scheduler for rings that don't need it */
2253 		if (!ring || ring->no_scheduler)
2254 			continue;
2255 
2256 		switch (ring->funcs->type) {
2257 		case AMDGPU_RING_TYPE_GFX:
2258 			timeout = adev->gfx_timeout;
2259 			break;
2260 		case AMDGPU_RING_TYPE_COMPUTE:
2261 			timeout = adev->compute_timeout;
2262 			break;
2263 		case AMDGPU_RING_TYPE_SDMA:
2264 			timeout = adev->sdma_timeout;
2265 			break;
2266 		default:
2267 			timeout = adev->video_timeout;
2268 			break;
2269 		}
2270 
2271 		args.timeout = timeout;
2272 		args.credit_limit = ring->num_hw_submission;
2273 		args.score = ring->sched_score;
2274 		args.name = ring->name;
2275 
2276 		r = drm_sched_init(&ring->sched, &args);
2277 		if (r) {
2278 			dev_err(adev->dev,
2279 				"Failed to create scheduler on ring %s.\n",
2280 				ring->name);
2281 			return r;
2282 		}
2283 		r = amdgpu_uvd_entity_init(adev, ring);
2284 		if (r) {
2285 			dev_err(adev->dev,
2286 				"Failed to create UVD scheduling entity on ring %s.\n",
2287 				ring->name);
2288 			return r;
2289 		}
2290 		r = amdgpu_vce_entity_init(adev, ring);
2291 		if (r) {
2292 			dev_err(adev->dev,
2293 				"Failed to create VCE scheduling entity on ring %s.\n",
2294 				ring->name);
2295 			return r;
2296 		}
2297 	}
2298 
2299 	if (adev->xcp_mgr)
2300 		amdgpu_xcp_update_partition_sched_list(adev);
2301 
2302 	return 0;
2303 }
2304 
2305 
2306 /**
2307  * amdgpu_device_ip_init - run init for hardware IPs
2308  *
2309  * @adev: amdgpu_device pointer
2310  *
2311  * Main initialization pass for hardware IPs.  The list of all the hardware
2312  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2313  * are run.  sw_init initializes the software state associated with each IP
2314  * and hw_init initializes the hardware associated with each IP.
2315  * Returns 0 on success, negative error code on failure.
2316  */
2317 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2318 {
2319 	bool init_badpage;
2320 	int i, r;
2321 
2322 	r = amdgpu_ras_init(adev);
2323 	if (r)
2324 		return r;
2325 
2326 	for (i = 0; i < adev->num_ip_blocks; i++) {
2327 		if (!adev->ip_blocks[i].status.valid)
2328 			continue;
2329 		if (adev->ip_blocks[i].version->funcs->sw_init) {
2330 			r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
2331 			if (r) {
2332 				dev_err(adev->dev,
2333 					"sw_init of IP block <%s> failed %d\n",
2334 					adev->ip_blocks[i].version->funcs->name,
2335 					r);
2336 				goto init_failed;
2337 			}
2338 		}
2339 		adev->ip_blocks[i].status.sw = true;
2340 
2341 		if (!amdgpu_ip_member_of_hwini(
2342 			    adev, adev->ip_blocks[i].version->type))
2343 			continue;
2344 
2345 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2346 			/* need to do common hw init early so everything is set up for gmc */
2347 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2348 			if (r) {
2349 				dev_err(adev->dev, "hw_init %d failed %d\n", i,
2350 					r);
2351 				goto init_failed;
2352 			}
2353 			adev->ip_blocks[i].status.hw = true;
2354 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2355 			/* need to do gmc hw init early so we can allocate gpu mem */
2356 			/* Try to reserve bad pages early */
2357 			if (amdgpu_sriov_vf(adev))
2358 				amdgpu_virt_exchange_data(adev);
2359 
2360 			r = amdgpu_device_mem_scratch_init(adev);
2361 			if (r) {
2362 				dev_err(adev->dev,
2363 					"amdgpu_mem_scratch_init failed %d\n",
2364 					r);
2365 				goto init_failed;
2366 			}
2367 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2368 			if (r) {
2369 				dev_err(adev->dev, "hw_init %d failed %d\n", i,
2370 					r);
2371 				goto init_failed;
2372 			}
2373 			r = amdgpu_device_wb_init(adev);
2374 			if (r) {
2375 				dev_err(adev->dev,
2376 					"amdgpu_device_wb_init failed %d\n", r);
2377 				goto init_failed;
2378 			}
2379 			adev->ip_blocks[i].status.hw = true;
2380 
2381 			/* right after GMC hw init, we create CSA */
2382 			if (adev->gfx.mcbp) {
2383 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2384 							       AMDGPU_GEM_DOMAIN_VRAM |
2385 							       AMDGPU_GEM_DOMAIN_GTT,
2386 							       AMDGPU_CSA_SIZE);
2387 				if (r) {
2388 					dev_err(adev->dev,
2389 						"allocate CSA failed %d\n", r);
2390 					goto init_failed;
2391 				}
2392 			}
2393 
2394 			r = amdgpu_seq64_init(adev);
2395 			if (r) {
2396 				dev_err(adev->dev, "allocate seq64 failed %d\n",
2397 					r);
2398 				goto init_failed;
2399 			}
2400 		}
2401 	}
2402 
2403 	if (amdgpu_sriov_vf(adev))
2404 		amdgpu_virt_init_data_exchange(adev);
2405 
2406 	r = amdgpu_ib_pool_init(adev);
2407 	if (r) {
2408 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2409 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2410 		goto init_failed;
2411 	}
2412 
2413 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2414 	if (r)
2415 		goto init_failed;
2416 
2417 	r = amdgpu_device_ip_hw_init_phase1(adev);
2418 	if (r)
2419 		goto init_failed;
2420 
2421 	r = amdgpu_device_fw_loading(adev);
2422 	if (r)
2423 		goto init_failed;
2424 
2425 	r = amdgpu_device_ip_hw_init_phase2(adev);
2426 	if (r)
2427 		goto init_failed;
2428 
2429 	/*
2430 	 * retired pages will be loaded from eeprom and reserved here,
2431 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2432 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2433 	 * for I2C communication which only true at this point.
2434 	 *
2435 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2436 	 * failure from bad gpu situation and stop amdgpu init process
2437 	 * accordingly. For other failed cases, it will still release all
2438 	 * the resource and print error message, rather than returning one
2439 	 * negative value to upper level.
2440 	 *
2441 	 * Note: theoretically, this should be called before all vram allocations
2442 	 * to protect retired page from abusing
2443 	 */
2444 	init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
2445 	r = amdgpu_ras_recovery_init(adev, init_badpage);
2446 	if (r)
2447 		goto init_failed;
2448 
2449 	/**
2450 	 * In case of XGMI grab extra reference for reset domain for this device
2451 	 */
2452 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2453 		if (amdgpu_xgmi_add_device(adev) == 0) {
2454 			if (!amdgpu_sriov_vf(adev)) {
2455 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2456 
2457 				if (WARN_ON(!hive)) {
2458 					r = -ENOENT;
2459 					goto init_failed;
2460 				}
2461 
2462 				if (!hive->reset_domain ||
2463 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2464 					r = -ENOENT;
2465 					amdgpu_put_xgmi_hive(hive);
2466 					goto init_failed;
2467 				}
2468 
2469 				/* Drop the early temporary reset domain we created for device */
2470 				amdgpu_reset_put_reset_domain(adev->reset_domain);
2471 				adev->reset_domain = hive->reset_domain;
2472 				amdgpu_put_xgmi_hive(hive);
2473 			}
2474 		}
2475 	}
2476 
2477 	r = amdgpu_device_init_schedulers(adev);
2478 	if (r)
2479 		goto init_failed;
2480 
2481 	amdgpu_ttm_enable_buffer_funcs(adev);
2482 
2483 	/* Don't init kfd if whole hive need to be reset during init */
2484 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
2485 		amdgpu_amdkfd_device_init(adev);
2486 	}
2487 
2488 	amdgpu_fru_get_product_info(adev);
2489 
2490 	r = amdgpu_cper_init(adev);
2491 
2492 init_failed:
2493 
2494 	return r;
2495 }
2496 
2497 /**
2498  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2499  *
2500  * @adev: amdgpu_device pointer
2501  *
2502  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2503  * this function before a GPU reset.  If the value is retained after a
2504  * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents.
2505  */
2506 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2507 {
2508 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2509 }
2510 
2511 /**
2512  * amdgpu_device_check_vram_lost - check if vram is valid
2513  *
2514  * @adev: amdgpu_device pointer
2515  *
2516  * Checks the reset magic value written to the gart pointer in VRAM.
2517  * The driver calls this after a GPU reset to see if the contents of
2518  * VRAM is lost or now.
2519  * returns true if vram is lost, false if not.
2520  */
2521 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2522 {
2523 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2524 			AMDGPU_RESET_MAGIC_NUM))
2525 		return true;
2526 
2527 	if (!amdgpu_in_reset(adev))
2528 		return false;
2529 
2530 	/*
2531 	 * For all ASICs with baco/mode1 reset, the VRAM is
2532 	 * always assumed to be lost.
2533 	 */
2534 	switch (amdgpu_asic_reset_method(adev)) {
2535 	case AMD_RESET_METHOD_LEGACY:
2536 	case AMD_RESET_METHOD_LINK:
2537 	case AMD_RESET_METHOD_BACO:
2538 	case AMD_RESET_METHOD_MODE1:
2539 		return true;
2540 	default:
2541 		return false;
2542 	}
2543 }
2544 
2545 /**
2546  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2547  *
2548  * @adev: amdgpu_device pointer
2549  * @state: clockgating state (gate or ungate)
2550  *
2551  * The list of all the hardware IPs that make up the asic is walked and the
2552  * set_clockgating_state callbacks are run.
2553  * Late initialization pass enabling clockgating for hardware IPs.
2554  * Fini or suspend, pass disabling clockgating for hardware IPs.
2555  * Returns 0 on success, negative error code on failure.
2556  */
2557 
2558 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2559 			       enum amd_clockgating_state state)
2560 {
2561 	int i, j, r;
2562 
2563 	if (amdgpu_emu_mode == 1)
2564 		return 0;
2565 
2566 	for (j = 0; j < adev->num_ip_blocks; j++) {
2567 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2568 		if (!adev->ip_blocks[i].status.late_initialized)
2569 			continue;
2570 		if (!adev->ip_blocks[i].version)
2571 			continue;
2572 		/* skip CG for GFX, SDMA on S0ix */
2573 		if (adev->in_s0ix &&
2574 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2575 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2576 			continue;
2577 		/* skip CG for VCE/UVD, it's handled specially */
2578 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2579 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2580 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2581 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2582 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2583 			/* enable clockgating to save power */
2584 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i],
2585 										     state);
2586 			if (r) {
2587 				dev_err(adev->dev,
2588 					"set_clockgating_state(gate) of IP block <%s> failed %d\n",
2589 					adev->ip_blocks[i].version->funcs->name,
2590 					r);
2591 				return r;
2592 			}
2593 		}
2594 	}
2595 
2596 	return 0;
2597 }
2598 
2599 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2600 			       enum amd_powergating_state state)
2601 {
2602 	int i, j, r;
2603 
2604 	if (amdgpu_emu_mode == 1)
2605 		return 0;
2606 
2607 	for (j = 0; j < adev->num_ip_blocks; j++) {
2608 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2609 		if (!adev->ip_blocks[i].status.late_initialized)
2610 			continue;
2611 		if (!adev->ip_blocks[i].version)
2612 			continue;
2613 		/* skip PG for GFX, SDMA on S0ix */
2614 		if (adev->in_s0ix &&
2615 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2616 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2617 			continue;
2618 		/* skip CG for VCE/UVD, it's handled specially */
2619 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2620 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2621 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2622 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2623 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2624 			/* enable powergating to save power */
2625 			r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i],
2626 											state);
2627 			if (r) {
2628 				dev_err(adev->dev,
2629 					"set_powergating_state(gate) of IP block <%s> failed %d\n",
2630 					adev->ip_blocks[i].version->funcs->name,
2631 					r);
2632 				return r;
2633 			}
2634 		}
2635 	}
2636 	return 0;
2637 }
2638 
2639 static int amdgpu_device_enable_mgpu_fan_boost(void)
2640 {
2641 	struct amdgpu_gpu_instance *gpu_ins;
2642 	struct amdgpu_device *adev;
2643 	int i, ret = 0;
2644 
2645 	mutex_lock(&mgpu_info.mutex);
2646 
2647 	/*
2648 	 * MGPU fan boost feature should be enabled
2649 	 * only when there are two or more dGPUs in
2650 	 * the system
2651 	 */
2652 	if (mgpu_info.num_dgpu < 2)
2653 		goto out;
2654 
2655 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2656 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2657 		adev = gpu_ins->adev;
2658 		if (!(adev->flags & AMD_IS_APU || amdgpu_sriov_multi_vf_mode(adev)) &&
2659 		    !gpu_ins->mgpu_fan_enabled) {
2660 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2661 			if (ret)
2662 				break;
2663 
2664 			gpu_ins->mgpu_fan_enabled = 1;
2665 		}
2666 	}
2667 
2668 out:
2669 	mutex_unlock(&mgpu_info.mutex);
2670 
2671 	return ret;
2672 }
2673 
2674 /**
2675  * amdgpu_device_ip_late_init - run late init for hardware IPs
2676  *
2677  * @adev: amdgpu_device pointer
2678  *
2679  * Late initialization pass for hardware IPs.  The list of all the hardware
2680  * IPs that make up the asic is walked and the late_init callbacks are run.
2681  * late_init covers any special initialization that an IP requires
2682  * after all of the have been initialized or something that needs to happen
2683  * late in the init process.
2684  * Returns 0 on success, negative error code on failure.
2685  */
2686 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2687 {
2688 	struct amdgpu_gpu_instance *gpu_instance;
2689 	int i = 0, r;
2690 
2691 	for (i = 0; i < adev->num_ip_blocks; i++) {
2692 		if (!adev->ip_blocks[i].status.hw)
2693 			continue;
2694 		if (adev->ip_blocks[i].version->funcs->late_init) {
2695 			r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
2696 			if (r) {
2697 				dev_err(adev->dev,
2698 					"late_init of IP block <%s> failed %d\n",
2699 					adev->ip_blocks[i].version->funcs->name,
2700 					r);
2701 				return r;
2702 			}
2703 		}
2704 		adev->ip_blocks[i].status.late_initialized = true;
2705 	}
2706 
2707 	r = amdgpu_ras_late_init(adev);
2708 	if (r) {
2709 		dev_err(adev->dev, "amdgpu_ras_late_init failed %d", r);
2710 		return r;
2711 	}
2712 
2713 	if (!amdgpu_reset_in_recovery(adev))
2714 		amdgpu_ras_set_error_query_ready(adev, true);
2715 
2716 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2717 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2718 
2719 	amdgpu_device_fill_reset_magic(adev);
2720 
2721 	r = amdgpu_device_enable_mgpu_fan_boost();
2722 	if (r)
2723 		dev_err(adev->dev, "enable mgpu fan boost failed (%d).\n", r);
2724 
2725 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2726 	if (amdgpu_passthrough(adev) &&
2727 	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2728 	     adev->asic_type == CHIP_ALDEBARAN))
2729 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
2730 
2731 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2732 		mutex_lock(&mgpu_info.mutex);
2733 
2734 		/*
2735 		 * Reset device p-state to low as this was booted with high.
2736 		 *
2737 		 * This should be performed only after all devices from the same
2738 		 * hive get initialized.
2739 		 *
2740 		 * However, it's unknown how many device in the hive in advance.
2741 		 * As this is counted one by one during devices initializations.
2742 		 *
2743 		 * So, we wait for all XGMI interlinked devices initialized.
2744 		 * This may bring some delays as those devices may come from
2745 		 * different hives. But that should be OK.
2746 		 */
2747 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2748 			for (i = 0; i < mgpu_info.num_gpu; i++) {
2749 				gpu_instance = &(mgpu_info.gpu_ins[i]);
2750 				if (gpu_instance->adev->flags & AMD_IS_APU)
2751 					continue;
2752 
2753 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2754 						AMDGPU_XGMI_PSTATE_MIN);
2755 				if (r) {
2756 					dev_err(adev->dev,
2757 						"pstate setting failed (%d).\n",
2758 						r);
2759 					break;
2760 				}
2761 			}
2762 		}
2763 
2764 		mutex_unlock(&mgpu_info.mutex);
2765 	}
2766 
2767 	return 0;
2768 }
2769 
2770 static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block)
2771 {
2772 	struct amdgpu_device *adev = ip_block->adev;
2773 	int r;
2774 
2775 	if (!ip_block->version->funcs->hw_fini) {
2776 		dev_err(adev->dev, "hw_fini of IP block <%s> not defined\n",
2777 			ip_block->version->funcs->name);
2778 	} else {
2779 		r = ip_block->version->funcs->hw_fini(ip_block);
2780 		/* XXX handle errors */
2781 		if (r) {
2782 			dev_dbg(adev->dev,
2783 				"hw_fini of IP block <%s> failed %d\n",
2784 				ip_block->version->funcs->name, r);
2785 		}
2786 	}
2787 
2788 	ip_block->status.hw = false;
2789 }
2790 
2791 /**
2792  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2793  *
2794  * @adev: amdgpu_device pointer
2795  *
2796  * For ASICs need to disable SMC first
2797  */
2798 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2799 {
2800 	int i;
2801 
2802 	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
2803 		return;
2804 
2805 	for (i = 0; i < adev->num_ip_blocks; i++) {
2806 		if (!adev->ip_blocks[i].status.hw)
2807 			continue;
2808 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2809 			amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
2810 			break;
2811 		}
2812 	}
2813 }
2814 
2815 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
2816 {
2817 	int i, r;
2818 
2819 	for (i = 0; i < adev->num_ip_blocks; i++) {
2820 		if (!adev->ip_blocks[i].version)
2821 			continue;
2822 		if (!adev->ip_blocks[i].version->funcs->early_fini)
2823 			continue;
2824 
2825 		r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
2826 		if (r) {
2827 			dev_dbg(adev->dev,
2828 				"early_fini of IP block <%s> failed %d\n",
2829 				adev->ip_blocks[i].version->funcs->name, r);
2830 		}
2831 	}
2832 
2833 	amdgpu_amdkfd_suspend(adev, true);
2834 	amdgpu_amdkfd_teardown_processes(adev);
2835 	amdgpu_userq_suspend(adev);
2836 
2837 	/* Workaround for ASICs need to disable SMC first */
2838 	amdgpu_device_smu_fini_early(adev);
2839 
2840 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2841 		if (!adev->ip_blocks[i].status.hw)
2842 			continue;
2843 
2844 		amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
2845 	}
2846 
2847 	if (amdgpu_sriov_vf(adev)) {
2848 		if (amdgpu_virt_release_full_gpu(adev, false))
2849 			dev_err(adev->dev,
2850 				"failed to release exclusive mode on fini\n");
2851 	}
2852 
2853 	/*
2854 	 * Driver reload on the APU can fail due to firmware validation because
2855 	 * the PSP is always running, as it is shared across the whole SoC.
2856 	 * This same issue does not occur on dGPU because it has a mechanism
2857 	 * that checks whether the PSP is running. A solution for those issues
2858 	 * in the APU is to trigger a GPU reset, but this should be done during
2859 	 * the unload phase to avoid adding boot latency and screen flicker.
2860 	 * GFX V11 has GC block as default off IP. Every time AMDGPU driver sends
2861 	 * a request to PMFW to unload MP1, PMFW will put GC in reset and power down
2862 	 * the voltage. Hence, skipping reset for APUs with GFX V11 or later.
2863 	 */
2864 	if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu &&
2865 		amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 0, 0)) {
2866 		r = amdgpu_asic_reset(adev);
2867 		if (r)
2868 			dev_err(adev->dev, "asic reset on %s failed\n", __func__);
2869 	}
2870 
2871 	return 0;
2872 }
2873 
2874 /**
2875  * amdgpu_device_ip_fini - run fini for hardware IPs
2876  *
2877  * @adev: amdgpu_device pointer
2878  *
2879  * Main teardown pass for hardware IPs.  The list of all the hardware
2880  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2881  * are run.  hw_fini tears down the hardware associated with each IP
2882  * and sw_fini tears down any software state associated with each IP.
2883  * Returns 0 on success, negative error code on failure.
2884  */
2885 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2886 {
2887 	int i, r;
2888 
2889 	amdgpu_cper_fini(adev);
2890 
2891 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2892 		amdgpu_virt_release_ras_err_handler_data(adev);
2893 
2894 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2895 		amdgpu_xgmi_remove_device(adev);
2896 
2897 	amdgpu_amdkfd_device_fini_sw(adev);
2898 
2899 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2900 		if (!adev->ip_blocks[i].status.sw)
2901 			continue;
2902 
2903 		if (!adev->ip_blocks[i].version)
2904 			continue;
2905 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2906 			amdgpu_ucode_free_bo(adev);
2907 			amdgpu_free_static_csa(&adev->virt.csa_obj);
2908 			amdgpu_device_wb_fini(adev);
2909 			amdgpu_device_mem_scratch_fini(adev);
2910 			amdgpu_ib_pool_fini(adev);
2911 			amdgpu_seq64_fini(adev);
2912 			amdgpu_doorbell_fini(adev);
2913 		}
2914 		if (adev->ip_blocks[i].version->funcs->sw_fini) {
2915 			r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
2916 			/* XXX handle errors */
2917 			if (r) {
2918 				dev_dbg(adev->dev,
2919 					"sw_fini of IP block <%s> failed %d\n",
2920 					adev->ip_blocks[i].version->funcs->name,
2921 					r);
2922 			}
2923 		}
2924 		adev->ip_blocks[i].status.sw = false;
2925 		adev->ip_blocks[i].status.valid = false;
2926 	}
2927 
2928 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2929 		if (!adev->ip_blocks[i].status.late_initialized)
2930 			continue;
2931 		if (!adev->ip_blocks[i].version)
2932 			continue;
2933 		if (adev->ip_blocks[i].version->funcs->late_fini)
2934 			adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
2935 		adev->ip_blocks[i].status.late_initialized = false;
2936 	}
2937 
2938 	amdgpu_ras_fini(adev);
2939 	amdgpu_uid_fini(adev);
2940 
2941 	return 0;
2942 }
2943 
2944 /**
2945  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2946  *
2947  * @work: work_struct.
2948  */
2949 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2950 {
2951 	struct amdgpu_device *adev =
2952 		container_of(work, struct amdgpu_device, delayed_init_work.work);
2953 	int r;
2954 
2955 	r = amdgpu_ib_ring_tests(adev);
2956 	if (r)
2957 		dev_err(adev->dev, "ib ring test failed (%d).\n", r);
2958 }
2959 
2960 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2961 {
2962 	struct amdgpu_device *adev =
2963 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2964 
2965 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
2966 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2967 
2968 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0))
2969 		adev->gfx.gfx_off_state = true;
2970 }
2971 
2972 /**
2973  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2974  *
2975  * @adev: amdgpu_device pointer
2976  *
2977  * Main suspend function for hardware IPs.  The list of all the hardware
2978  * IPs that make up the asic is walked, clockgating is disabled and the
2979  * suspend callbacks are run.  suspend puts the hardware and software state
2980  * in each IP into a state suitable for suspend.
2981  * Returns 0 on success, negative error code on failure.
2982  */
2983 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2984 {
2985 	int i, r, rec;
2986 
2987 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2988 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2989 
2990 	/*
2991 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
2992 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2993 	 * scenario. Add the missing df cstate disablement here.
2994 	 */
2995 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2996 		dev_warn(adev->dev, "Failed to disallow df cstate");
2997 
2998 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2999 		if (!adev->ip_blocks[i].status.valid)
3000 			continue;
3001 
3002 		/* displays are handled separately */
3003 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3004 			continue;
3005 
3006 		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3007 		if (r)
3008 			goto unwind;
3009 	}
3010 
3011 	return 0;
3012 unwind:
3013 	rec = amdgpu_device_ip_resume_phase3(adev);
3014 	if (rec)
3015 		dev_err(adev->dev,
3016 			"amdgpu_device_ip_resume_phase3 failed during unwind: %d\n",
3017 			rec);
3018 
3019 	amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW);
3020 
3021 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3022 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3023 
3024 	return r;
3025 }
3026 
3027 /**
3028  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3029  *
3030  * @adev: amdgpu_device pointer
3031  *
3032  * Main suspend function for hardware IPs.  The list of all the hardware
3033  * IPs that make up the asic is walked, clockgating is disabled and the
3034  * suspend callbacks are run.  suspend puts the hardware and software state
3035  * in each IP into a state suitable for suspend.
3036  * Returns 0 on success, negative error code on failure.
3037  */
3038 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3039 {
3040 	int i, r, rec;
3041 
3042 	if (adev->in_s0ix)
3043 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3044 
3045 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3046 		if (!adev->ip_blocks[i].status.valid)
3047 			continue;
3048 		/* displays are handled in phase1 */
3049 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3050 			continue;
3051 		/* PSP lost connection when err_event_athub occurs */
3052 		if (amdgpu_ras_intr_triggered() &&
3053 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3054 			adev->ip_blocks[i].status.hw = false;
3055 			continue;
3056 		}
3057 
3058 		/* skip unnecessary suspend if we do not initialize them yet */
3059 		if (!amdgpu_ip_member_of_hwini(
3060 			    adev, adev->ip_blocks[i].version->type))
3061 			continue;
3062 
3063 		/* Since we skip suspend for S0i3, we need to cancel the delayed
3064 		 * idle work here as the suspend callback never gets called.
3065 		 */
3066 		if (adev->in_s0ix &&
3067 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX &&
3068 		    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
3069 			cancel_delayed_work_sync(&adev->gfx.idle_work);
3070 		/* skip suspend of gfx/mes and psp for S0ix
3071 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3072 		 * like at runtime. PSP is also part of the always on hardware
3073 		 * so no need to suspend it.
3074 		 */
3075 		if (adev->in_s0ix &&
3076 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3077 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3078 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3079 			continue;
3080 
3081 		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3082 		if (adev->in_s0ix &&
3083 		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3084 		     IP_VERSION(5, 0, 0)) &&
3085 		    (adev->ip_blocks[i].version->type ==
3086 		     AMD_IP_BLOCK_TYPE_SDMA))
3087 			continue;
3088 
3089 		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3090 		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3091 		 * from this location and RLC Autoload automatically also gets loaded
3092 		 * from here based on PMFW -> PSP message during re-init sequence.
3093 		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3094 		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3095 		 */
3096 		if (amdgpu_in_reset(adev) &&
3097 		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3098 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3099 			continue;
3100 
3101 		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3102 		if (r)
3103 			goto unwind;
3104 
3105 		/* handle putting the SMC in the appropriate state */
3106 		if (!amdgpu_sriov_vf(adev)) {
3107 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3108 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3109 				if (r) {
3110 					dev_err(adev->dev,
3111 						"SMC failed to set mp1 state %d, %d\n",
3112 						adev->mp1_state, r);
3113 					goto unwind;
3114 				}
3115 			}
3116 		}
3117 	}
3118 
3119 	return 0;
3120 unwind:
3121 	/* suspend phase 2 = resume phase 1 + resume phase 2 */
3122 	rec = amdgpu_device_ip_resume_phase1(adev);
3123 	if (rec) {
3124 		dev_err(adev->dev,
3125 			"amdgpu_device_ip_resume_phase1 failed during unwind: %d\n",
3126 			rec);
3127 		return r;
3128 	}
3129 
3130 	rec = amdgpu_device_fw_loading(adev);
3131 	if (rec) {
3132 		dev_err(adev->dev,
3133 			"amdgpu_device_fw_loading failed during unwind: %d\n",
3134 			rec);
3135 		return r;
3136 	}
3137 
3138 	rec = amdgpu_device_ip_resume_phase2(adev);
3139 	if (rec) {
3140 		dev_err(adev->dev,
3141 			"amdgpu_device_ip_resume_phase2 failed during unwind: %d\n",
3142 			rec);
3143 		return r;
3144 	}
3145 
3146 	return r;
3147 }
3148 
3149 /**
3150  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3151  *
3152  * @adev: amdgpu_device pointer
3153  *
3154  * Main suspend function for hardware IPs.  The list of all the hardware
3155  * IPs that make up the asic is walked, clockgating is disabled and the
3156  * suspend callbacks are run.  suspend puts the hardware and software state
3157  * in each IP into a state suitable for suspend.
3158  * Returns 0 on success, negative error code on failure.
3159  */
3160 static int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3161 {
3162 	int r;
3163 
3164 	if (amdgpu_sriov_vf(adev)) {
3165 		amdgpu_virt_fini_data_exchange(adev);
3166 		amdgpu_virt_request_full_gpu(adev, false);
3167 	}
3168 
3169 	amdgpu_ttm_disable_buffer_funcs(adev);
3170 
3171 	r = amdgpu_device_ip_suspend_phase1(adev);
3172 	if (r)
3173 		return r;
3174 	r = amdgpu_device_ip_suspend_phase2(adev);
3175 
3176 	if (amdgpu_sriov_vf(adev))
3177 		amdgpu_virt_release_full_gpu(adev, false);
3178 
3179 	return r;
3180 }
3181 
3182 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3183 {
3184 	int i, r;
3185 
3186 	static enum amd_ip_block_type ip_order[] = {
3187 		AMD_IP_BLOCK_TYPE_COMMON,
3188 		AMD_IP_BLOCK_TYPE_GMC,
3189 		AMD_IP_BLOCK_TYPE_PSP,
3190 		AMD_IP_BLOCK_TYPE_IH,
3191 	};
3192 
3193 	for (i = 0; i < adev->num_ip_blocks; i++) {
3194 		int j;
3195 		struct amdgpu_ip_block *block;
3196 
3197 		block = &adev->ip_blocks[i];
3198 		block->status.hw = false;
3199 
3200 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3201 
3202 			if (block->version->type != ip_order[j] ||
3203 				!block->status.valid)
3204 				continue;
3205 
3206 			r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
3207 			if (r) {
3208 				dev_err(adev->dev, "RE-INIT-early: %s failed\n",
3209 					 block->version->funcs->name);
3210 				return r;
3211 			}
3212 			block->status.hw = true;
3213 		}
3214 	}
3215 
3216 	return 0;
3217 }
3218 
3219 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3220 {
3221 	struct amdgpu_ip_block *block;
3222 	int i, r = 0;
3223 
3224 	static enum amd_ip_block_type ip_order[] = {
3225 		AMD_IP_BLOCK_TYPE_SMC,
3226 		AMD_IP_BLOCK_TYPE_DCE,
3227 		AMD_IP_BLOCK_TYPE_GFX,
3228 		AMD_IP_BLOCK_TYPE_SDMA,
3229 		AMD_IP_BLOCK_TYPE_MES,
3230 		AMD_IP_BLOCK_TYPE_UVD,
3231 		AMD_IP_BLOCK_TYPE_VCE,
3232 		AMD_IP_BLOCK_TYPE_VCN,
3233 		AMD_IP_BLOCK_TYPE_JPEG
3234 	};
3235 
3236 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3237 		block = amdgpu_device_ip_get_ip_block(adev, ip_order[i]);
3238 
3239 		if (!block)
3240 			continue;
3241 
3242 		if (block->status.valid && !block->status.hw) {
3243 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
3244 				r = amdgpu_ip_block_resume(block);
3245 			} else {
3246 				r = block->version->funcs->hw_init(block);
3247 			}
3248 
3249 			if (r) {
3250 				dev_err(adev->dev, "RE-INIT-late: %s failed\n",
3251 					 block->version->funcs->name);
3252 				break;
3253 			}
3254 			block->status.hw = true;
3255 		}
3256 	}
3257 
3258 	return r;
3259 }
3260 
3261 /**
3262  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3263  *
3264  * @adev: amdgpu_device pointer
3265  *
3266  * First resume function for hardware IPs.  The list of all the hardware
3267  * IPs that make up the asic is walked and the resume callbacks are run for
3268  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3269  * after a suspend and updates the software state as necessary.  This
3270  * function is also used for restoring the GPU after a GPU reset.
3271  * Returns 0 on success, negative error code on failure.
3272  */
3273 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3274 {
3275 	int i, r;
3276 
3277 	for (i = 0; i < adev->num_ip_blocks; i++) {
3278 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3279 			continue;
3280 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3281 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3282 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3283 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3284 
3285 			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3286 			if (r)
3287 				return r;
3288 		}
3289 	}
3290 
3291 	return 0;
3292 }
3293 
3294 /**
3295  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3296  *
3297  * @adev: amdgpu_device pointer
3298  *
3299  * Second resume function for hardware IPs.  The list of all the hardware
3300  * IPs that make up the asic is walked and the resume callbacks are run for
3301  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3302  * functional state after a suspend and updates the software state as
3303  * necessary.  This function is also used for restoring the GPU after a GPU
3304  * reset.
3305  * Returns 0 on success, negative error code on failure.
3306  */
3307 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3308 {
3309 	int i, r;
3310 
3311 	for (i = 0; i < adev->num_ip_blocks; i++) {
3312 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3313 			continue;
3314 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3315 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3316 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3317 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
3318 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3319 			continue;
3320 		r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3321 		if (r)
3322 			return r;
3323 	}
3324 
3325 	return 0;
3326 }
3327 
3328 /**
3329  * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
3330  *
3331  * @adev: amdgpu_device pointer
3332  *
3333  * Third resume function for hardware IPs.  The list of all the hardware
3334  * IPs that make up the asic is walked and the resume callbacks are run for
3335  * all DCE.  resume puts the hardware into a functional state after a suspend
3336  * and updates the software state as necessary.  This function is also used
3337  * for restoring the GPU after a GPU reset.
3338  *
3339  * Returns 0 on success, negative error code on failure.
3340  */
3341 static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
3342 {
3343 	int i, r;
3344 
3345 	for (i = 0; i < adev->num_ip_blocks; i++) {
3346 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3347 			continue;
3348 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
3349 			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3350 			if (r)
3351 				return r;
3352 		}
3353 	}
3354 
3355 	return 0;
3356 }
3357 
3358 /**
3359  * amdgpu_device_ip_resume - run resume for hardware IPs
3360  *
3361  * @adev: amdgpu_device pointer
3362  *
3363  * Main resume function for hardware IPs.  The hardware IPs
3364  * are split into two resume functions because they are
3365  * also used in recovering from a GPU reset and some additional
3366  * steps need to be take between them.  In this case (S3/S4) they are
3367  * run sequentially.
3368  * Returns 0 on success, negative error code on failure.
3369  */
3370 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3371 {
3372 	int r;
3373 
3374 	r = amdgpu_device_ip_resume_phase1(adev);
3375 	if (r)
3376 		return r;
3377 
3378 	r = amdgpu_device_fw_loading(adev);
3379 	if (r)
3380 		return r;
3381 
3382 	r = amdgpu_device_ip_resume_phase2(adev);
3383 
3384 	amdgpu_ttm_enable_buffer_funcs(adev);
3385 
3386 	if (r)
3387 		return r;
3388 
3389 	amdgpu_fence_driver_hw_init(adev);
3390 
3391 	r = amdgpu_device_ip_resume_phase3(adev);
3392 
3393 	return r;
3394 }
3395 
3396 /**
3397  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3398  *
3399  * @adev: amdgpu_device pointer
3400  *
3401  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3402  */
3403 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3404 {
3405 	if (amdgpu_sriov_vf(adev)) {
3406 		if (adev->is_atom_fw) {
3407 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3408 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3409 		} else {
3410 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3411 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3412 		}
3413 
3414 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3415 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3416 	}
3417 }
3418 
3419 /**
3420  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3421  *
3422  * @pdev : pci device context
3423  * @asic_type: AMD asic type
3424  *
3425  * Check if there is DC (new modesetting infrastructre) support for an asic.
3426  * returns true if DC has support, false if not.
3427  */
3428 bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
3429 				       enum amd_asic_type asic_type)
3430 {
3431 	switch (asic_type) {
3432 #ifdef CONFIG_DRM_AMDGPU_SI
3433 	case CHIP_HAINAN:
3434 #endif
3435 	case CHIP_TOPAZ:
3436 		/* chips with no display hardware */
3437 		return false;
3438 #if defined(CONFIG_DRM_AMD_DC)
3439 	case CHIP_TAHITI:
3440 	case CHIP_PITCAIRN:
3441 	case CHIP_VERDE:
3442 	case CHIP_OLAND:
3443 		return amdgpu_dc != 0 && IS_ENABLED(CONFIG_DRM_AMD_DC_SI);
3444 	default:
3445 		return amdgpu_dc != 0;
3446 #else
3447 	default:
3448 		if (amdgpu_dc > 0)
3449 			dev_info_once(
3450 				&pdev->dev,
3451 				"Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3452 		return false;
3453 #endif
3454 	}
3455 }
3456 
3457 /**
3458  * amdgpu_device_has_dc_support - check if dc is supported
3459  *
3460  * @adev: amdgpu_device pointer
3461  *
3462  * Returns true for supported, false for not supported
3463  */
3464 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3465 {
3466 	if (adev->enable_virtual_display ||
3467 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3468 		return false;
3469 
3470 	return amdgpu_device_asic_has_dc_support(adev->pdev, adev->asic_type);
3471 }
3472 
3473 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3474 {
3475 	struct amdgpu_device *adev =
3476 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3477 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3478 
3479 	/* It's a bug to not have a hive within this function */
3480 	if (WARN_ON(!hive))
3481 		return;
3482 
3483 	/*
3484 	 * Use task barrier to synchronize all xgmi reset works across the
3485 	 * hive. task_barrier_enter and task_barrier_exit will block
3486 	 * until all the threads running the xgmi reset works reach
3487 	 * those points. task_barrier_full will do both blocks.
3488 	 */
3489 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3490 
3491 		task_barrier_enter(&hive->tb);
3492 		adev->asic_reset_res = amdgpu_device_baco_enter(adev);
3493 
3494 		if (adev->asic_reset_res)
3495 			goto fail;
3496 
3497 		task_barrier_exit(&hive->tb);
3498 		adev->asic_reset_res = amdgpu_device_baco_exit(adev);
3499 
3500 		if (adev->asic_reset_res)
3501 			goto fail;
3502 
3503 		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3504 	} else {
3505 
3506 		task_barrier_full(&hive->tb);
3507 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3508 	}
3509 
3510 fail:
3511 	if (adev->asic_reset_res)
3512 		dev_warn(adev->dev,
3513 			 "ASIC reset failed with error, %d for drm dev, %s",
3514 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3515 	amdgpu_put_xgmi_hive(hive);
3516 }
3517 
3518 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3519 {
3520 	char buf[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH];
3521 	char *input = buf;
3522 	char *timeout_setting = NULL;
3523 	int index = 0;
3524 	long timeout;
3525 	int ret = 0;
3526 
3527 	/* By default timeout for all queues is 2 sec */
3528 	adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
3529 		adev->video_timeout = msecs_to_jiffies(2000);
3530 
3531 	if (!strnlen(amdgpu_lockup_timeout, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
3532 		return 0;
3533 
3534 	/*
3535 	 * strsep() destructively modifies its input by replacing delimiters
3536 	 * with '\0'. Use a stack copy so the global module parameter buffer
3537 	 * remains intact for multi-GPU systems where this function is called
3538 	 * once per device.
3539 	 */
3540 	strscpy(buf, amdgpu_lockup_timeout, sizeof(buf));
3541 
3542 	while ((timeout_setting = strsep(&input, ",")) &&
3543 	       strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3544 		ret = kstrtol(timeout_setting, 0, &timeout);
3545 		if (ret)
3546 			return ret;
3547 
3548 		if (timeout == 0) {
3549 			index++;
3550 			continue;
3551 		} else if (timeout < 0) {
3552 			timeout = MAX_SCHEDULE_TIMEOUT;
3553 			dev_warn(adev->dev, "lockup timeout disabled");
3554 			add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3555 		} else {
3556 			timeout = msecs_to_jiffies(timeout);
3557 		}
3558 
3559 		switch (index++) {
3560 		case 0:
3561 			adev->gfx_timeout = timeout;
3562 			break;
3563 		case 1:
3564 			adev->compute_timeout = timeout;
3565 			break;
3566 		case 2:
3567 			adev->sdma_timeout = timeout;
3568 			break;
3569 		case 3:
3570 			adev->video_timeout = timeout;
3571 			break;
3572 		default:
3573 			break;
3574 		}
3575 	}
3576 
3577 	/* When only one value specified apply it to all queues. */
3578 	if (index == 1)
3579 		adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
3580 			adev->video_timeout = timeout;
3581 
3582 	return ret;
3583 }
3584 
3585 /**
3586  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3587  *
3588  * @adev: amdgpu_device pointer
3589  *
3590  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3591  */
3592 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3593 {
3594 	struct iommu_domain *domain;
3595 
3596 	domain = iommu_get_domain_for_dev(adev->dev);
3597 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3598 		adev->ram_is_direct_mapped = true;
3599 }
3600 
3601 #if defined(CONFIG_HSA_AMD_P2P)
3602 /**
3603  * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
3604  *
3605  * @adev: amdgpu_device pointer
3606  *
3607  * return if IOMMU remapping bar address
3608  */
3609 static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
3610 {
3611 	struct iommu_domain *domain;
3612 
3613 	domain = iommu_get_domain_for_dev(adev->dev);
3614 	if (domain && (domain->type == IOMMU_DOMAIN_DMA ||
3615 		domain->type ==	IOMMU_DOMAIN_DMA_FQ))
3616 		return true;
3617 
3618 	return false;
3619 }
3620 #endif
3621 
3622 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3623 {
3624 	if (amdgpu_mcbp == 1)
3625 		adev->gfx.mcbp = true;
3626 	else if (amdgpu_mcbp == 0)
3627 		adev->gfx.mcbp = false;
3628 
3629 	if (amdgpu_sriov_vf(adev))
3630 		adev->gfx.mcbp = true;
3631 
3632 	if (adev->gfx.mcbp)
3633 		dev_info(adev->dev, "MCBP is enabled\n");
3634 }
3635 
3636 static int amdgpu_device_sys_interface_init(struct amdgpu_device *adev)
3637 {
3638 	int r;
3639 
3640 	r = amdgpu_atombios_sysfs_init(adev);
3641 	if (r)
3642 		drm_err(&adev->ddev,
3643 			"registering atombios sysfs failed (%d).\n", r);
3644 
3645 	r = amdgpu_pm_sysfs_init(adev);
3646 	if (r)
3647 		dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r);
3648 
3649 	r = amdgpu_ucode_sysfs_init(adev);
3650 	if (r) {
3651 		adev->ucode_sysfs_en = false;
3652 		dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r);
3653 	} else
3654 		adev->ucode_sysfs_en = true;
3655 
3656 	r = amdgpu_device_attr_sysfs_init(adev);
3657 	if (r)
3658 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
3659 
3660 	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
3661 	if (r)
3662 		dev_err(adev->dev,
3663 			"Could not create amdgpu board attributes\n");
3664 
3665 	amdgpu_fru_sysfs_init(adev);
3666 	amdgpu_reg_state_sysfs_init(adev);
3667 	amdgpu_xcp_sysfs_init(adev);
3668 	amdgpu_uma_sysfs_init(adev);
3669 	amdgpu_ptl_sysfs_init(adev);
3670 
3671 	return r;
3672 }
3673 
3674 static void amdgpu_device_sys_interface_fini(struct amdgpu_device *adev)
3675 {
3676 	if (adev->pm.sysfs_initialized)
3677 		amdgpu_pm_sysfs_fini(adev);
3678 	if (adev->ucode_sysfs_en)
3679 		amdgpu_ucode_sysfs_fini(adev);
3680 	amdgpu_device_attr_sysfs_fini(adev);
3681 	amdgpu_fru_sysfs_fini(adev);
3682 
3683 	amdgpu_reg_state_sysfs_fini(adev);
3684 	amdgpu_xcp_sysfs_fini(adev);
3685 	amdgpu_uma_sysfs_fini(adev);
3686 	amdgpu_ptl_sysfs_fini(adev);
3687 }
3688 
3689 /**
3690  * amdgpu_device_init - initialize the driver
3691  *
3692  * @adev: amdgpu_device pointer
3693  * @flags: driver flags
3694  *
3695  * Initializes the driver info and hw (all asics).
3696  * Returns 0 for success or an error on failure.
3697  * Called at driver startup.
3698  */
3699 int amdgpu_device_init(struct amdgpu_device *adev,
3700 		       uint32_t flags)
3701 {
3702 	struct pci_dev *pdev = adev->pdev;
3703 	int r, i;
3704 	bool px = false;
3705 	u32 max_MBps;
3706 	int tmp;
3707 
3708 	adev->shutdown = false;
3709 	adev->flags = flags;
3710 
3711 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3712 		adev->asic_type = amdgpu_force_asic_type;
3713 	else
3714 		adev->asic_type = flags & AMD_ASIC_MASK;
3715 
3716 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3717 	if (amdgpu_emu_mode == 1)
3718 		adev->usec_timeout *= 10;
3719 	adev->gmc.gart_size = 512 * 1024 * 1024;
3720 	adev->accel_working = false;
3721 	adev->num_rings = 0;
3722 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3723 	adev->mman.buffer_funcs = NULL;
3724 	adev->mman.num_buffer_funcs_scheds = 0;
3725 	adev->vm_manager.vm_pte_funcs = NULL;
3726 	adev->vm_manager.vm_pte_num_scheds = 0;
3727 	adev->gmc.gmc_funcs = NULL;
3728 	adev->harvest_ip_mask = 0x0;
3729 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3730 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3731 
3732 	amdgpu_reg_access_init(adev);
3733 
3734 	dev_info(
3735 		adev->dev,
3736 		"initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3737 		amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3738 		pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3739 
3740 	/* mutex initialization are all done here so we
3741 	 * can recall function without having locking issues
3742 	 */
3743 	mutex_init(&adev->firmware.mutex);
3744 	mutex_init(&adev->pm.mutex);
3745 	mutex_init(&adev->gfx.gpu_clock_mutex);
3746 	mutex_init(&adev->srbm_mutex);
3747 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3748 	mutex_init(&adev->gfx.gfx_off_mutex);
3749 	mutex_init(&adev->gfx.partition_mutex);
3750 	mutex_init(&adev->grbm_idx_mutex);
3751 	mutex_init(&adev->mn_lock);
3752 	mutex_init(&adev->virt.vf_errors.lock);
3753 	hash_init(adev->mn_hash);
3754 	mutex_init(&adev->psp.mutex);
3755 	mutex_init(&adev->psp.ptl.mutex);
3756 	mutex_init(&adev->notifier_lock);
3757 	mutex_init(&adev->pm.stable_pstate_ctx_lock);
3758 	mutex_init(&adev->benchmark_mutex);
3759 	mutex_init(&adev->gfx.reset_sem_mutex);
3760 
3761 	/* Associate locks with lockdep classes for ordering validation */
3762 	amdgpu_lockdep_set_class(adev);
3763 	/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
3764 	mutex_init(&adev->enforce_isolation_mutex);
3765 	for (i = 0; i < MAX_XCP; ++i) {
3766 		adev->isolation[i].spearhead = dma_fence_get_stub();
3767 		amdgpu_sync_create(&adev->isolation[i].active);
3768 		amdgpu_sync_create(&adev->isolation[i].prev);
3769 	}
3770 	mutex_init(&adev->gfx.userq_sch_mutex);
3771 	mutex_init(&adev->gfx.workload_profile_mutex);
3772 	mutex_init(&adev->vcn.workload_profile_mutex);
3773 
3774 	amdgpu_device_init_apu_flags(adev);
3775 
3776 	r = amdgpu_device_check_arguments(adev);
3777 	if (r)
3778 		return r;
3779 
3780 	spin_lock_init(&adev->mmio_idx_lock);
3781 	spin_lock_init(&adev->mm_stats.lock);
3782 	spin_lock_init(&adev->virt.rlcg_reg_lock);
3783 	spin_lock_init(&adev->wb.lock);
3784 
3785 	INIT_LIST_HEAD(&adev->reset_list);
3786 
3787 	INIT_LIST_HEAD(&adev->ras_list);
3788 
3789 	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3790 
3791 	xa_init_flags(&adev->userq_doorbell_xa, XA_FLAGS_LOCK_IRQ);
3792 
3793 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3794 			  amdgpu_device_delayed_init_work_handler);
3795 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3796 			  amdgpu_device_delay_enable_gfx_off);
3797 	/*
3798 	 * Initialize the enforce_isolation work structures for each XCP
3799 	 * partition.  This work handler is responsible for enforcing shader
3800 	 * isolation on AMD GPUs.  It counts the number of emitted fences for
3801 	 * each GFX and compute ring.  If there are any fences, it schedules
3802 	 * the `enforce_isolation_work` to be run after a delay.  If there are
3803 	 * no fences, it signals the Kernel Fusion Driver (KFD) to resume the
3804 	 * runqueue.
3805 	 */
3806 	for (i = 0; i < MAX_XCP; i++) {
3807 		INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
3808 				  amdgpu_gfx_enforce_isolation_handler);
3809 		adev->gfx.enforce_isolation[i].adev = adev;
3810 		adev->gfx.enforce_isolation[i].xcp_id = i;
3811 	}
3812 
3813 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3814 
3815 	amdgpu_coredump_init(adev);
3816 
3817 	adev->gfx.gfx_off_req_count = 1;
3818 	adev->gfx.gfx_off_residency = 0;
3819 	adev->gfx.gfx_off_entrycount = 0;
3820 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3821 
3822 	atomic_set(&adev->throttling_logging_enabled, 1);
3823 	/*
3824 	 * If throttling continues, logging will be performed every minute
3825 	 * to avoid log flooding. "-1" is subtracted since the thermal
3826 	 * throttling interrupt comes every second. Thus, the total logging
3827 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3828 	 * for throttling interrupt) = 60 seconds.
3829 	 */
3830 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3831 
3832 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3833 
3834 	/* Registers mapping */
3835 	/* TODO: block userspace mapping of io register */
3836 	if (adev->asic_type >= CHIP_BONAIRE) {
3837 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3838 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3839 	} else {
3840 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3841 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3842 	}
3843 
3844 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3845 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3846 
3847 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3848 	if (!adev->rmmio)
3849 		return -ENOMEM;
3850 
3851 	dev_info(adev->dev, "register mmio base: 0x%08X\n",
3852 		 (uint32_t)adev->rmmio_base);
3853 	dev_info(adev->dev, "register mmio size: %u\n",
3854 		 (unsigned int)adev->rmmio_size);
3855 
3856 	/*
3857 	 * Reset domain needs to be present early, before XGMI hive discovered
3858 	 * (if any) and initialized to use reset sem and in_gpu reset flag
3859 	 * early on during init and before calling to RREG32.
3860 	 */
3861 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3862 	if (!adev->reset_domain)
3863 		return -ENOMEM;
3864 
3865 	/* detect hw virtualization here */
3866 	amdgpu_virt_init(adev);
3867 
3868 	amdgpu_device_get_pcie_info(adev);
3869 
3870 	r = amdgpu_device_get_job_timeout_settings(adev);
3871 	if (r) {
3872 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3873 		return r;
3874 	}
3875 
3876 	amdgpu_device_set_mcbp(adev);
3877 
3878 	/*
3879 	 * By default, use default mode where all blocks are expected to be
3880 	 * initialized. At present a 'swinit' of blocks is required to be
3881 	 * completed before the need for a different level is detected.
3882 	 */
3883 	amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
3884 
3885 	amdgpu_device_check_iommu_direct_map(adev);
3886 
3887 	/* early init functions */
3888 	r = amdgpu_device_ip_early_init(adev);
3889 	if (r)
3890 		return r;
3891 
3892 	/*
3893 	 * No need to remove conflicting FBs for non-display class devices.
3894 	 * This prevents the sysfb from being freed accidently.
3895 	 */
3896 	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
3897 	    (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
3898 		/* Get rid of things like offb */
3899 		r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name);
3900 		if (r)
3901 			return r;
3902 	}
3903 
3904 	/* Enable TMZ based on IP_VERSION */
3905 	amdgpu_gmc_tmz_set(adev);
3906 
3907 	if (amdgpu_sriov_vf(adev) &&
3908 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
3909 		/* VF MMIO access (except mailbox range) from CPU
3910 		 * will be blocked during sriov runtime
3911 		 */
3912 		adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
3913 
3914 	amdgpu_gmc_noretry_set(adev);
3915 	/* Need to get xgmi info early to decide the reset behavior*/
3916 	if (adev->gmc.xgmi.supported) {
3917 		if (adev->gfxhub.funcs &&
3918 		    adev->gfxhub.funcs->get_xgmi_info) {
3919 			r = adev->gfxhub.funcs->get_xgmi_info(adev);
3920 			if (r)
3921 				return r;
3922 		}
3923 	}
3924 
3925 	if (adev->gmc.xgmi.connected_to_cpu) {
3926 		if (adev->mmhub.funcs &&
3927 		    adev->mmhub.funcs->get_xgmi_info) {
3928 			r = adev->mmhub.funcs->get_xgmi_info(adev);
3929 			if (r)
3930 				return r;
3931 		}
3932 	}
3933 
3934 	/* enable PCIE atomic ops */
3935 	if (amdgpu_sriov_vf(adev)) {
3936 		if (adev->virt.fw_reserve.p_pf2vf)
3937 			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3938 						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3939 				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3940 	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3941 	 * internal path natively support atomics, set have_atomics_support to true.
3942 	 */
3943 	} else if ((adev->flags & AMD_IS_APU &&
3944 		   amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0)) ||
3945 		   (adev->gmc.xgmi.connected_to_cpu &&
3946 		   amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 1, 0))) {
3947 		adev->have_atomics_support = true;
3948 	} else {
3949 		adev->have_atomics_support =
3950 			!pci_enable_atomic_ops_to_root(adev->pdev,
3951 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3952 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3953 	}
3954 
3955 	if (!adev->have_atomics_support)
3956 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3957 
3958 	/* doorbell bar mapping and doorbell index init*/
3959 	amdgpu_doorbell_init(adev);
3960 
3961 	if (amdgpu_emu_mode == 1) {
3962 		/* post the asic on emulation mode */
3963 		emu_soc_asic_init(adev);
3964 		goto fence_driver_init;
3965 	}
3966 
3967 	amdgpu_reset_init(adev);
3968 
3969 	/* detect if we are with an SRIOV vbios */
3970 	if (adev->bios)
3971 		amdgpu_device_detect_sriov_bios(adev);
3972 
3973 	/* check if we need to reset the asic
3974 	 *  E.g., driver was not cleanly unloaded previously, etc.
3975 	 */
3976 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3977 		if (adev->gmc.xgmi.num_physical_nodes) {
3978 			dev_info(adev->dev, "Pending hive reset.\n");
3979 			amdgpu_set_init_level(adev,
3980 					      AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
3981 		} else {
3982 				tmp = amdgpu_reset_method;
3983 				/* It should do a default reset when loading or reloading the driver,
3984 				 * regardless of the module parameter reset_method.
3985 				 */
3986 				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
3987 				r = amdgpu_asic_reset(adev);
3988 				amdgpu_reset_method = tmp;
3989 		}
3990 
3991 		if (r) {
3992 		  dev_err(adev->dev, "asic reset on init failed\n");
3993 		  goto failed;
3994 		}
3995 	}
3996 
3997 	/* Post card if necessary */
3998 	if (amdgpu_device_need_post(adev)) {
3999 		if (!adev->bios) {
4000 			dev_err(adev->dev, "no vBIOS found\n");
4001 			r = -EINVAL;
4002 			goto failed;
4003 		}
4004 		dev_info(adev->dev, "GPU posting now...\n");
4005 		r = amdgpu_device_asic_init(adev);
4006 		if (r) {
4007 			dev_err(adev->dev, "gpu post error!\n");
4008 			goto failed;
4009 		}
4010 	}
4011 
4012 	if (adev->bios) {
4013 		if (adev->is_atom_fw) {
4014 			/* Initialize clocks */
4015 			r = amdgpu_atomfirmware_get_clock_info(adev);
4016 			if (r) {
4017 				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4018 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4019 				goto failed;
4020 			}
4021 		} else {
4022 			/* Initialize clocks */
4023 			r = amdgpu_atombios_get_clock_info(adev);
4024 			if (r) {
4025 				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4026 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4027 				goto failed;
4028 			}
4029 			/* init i2c buses */
4030 			amdgpu_i2c_init(adev);
4031 		}
4032 	}
4033 
4034 fence_driver_init:
4035 	/* Fence driver */
4036 	r = amdgpu_fence_driver_sw_init(adev);
4037 	if (r) {
4038 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4039 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4040 		goto failed;
4041 	}
4042 
4043 	/* init the mode config */
4044 	drm_mode_config_init(adev_to_drm(adev));
4045 
4046 	r = amdgpu_device_ip_init(adev);
4047 	if (r) {
4048 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4049 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4050 		goto release_ras_con;
4051 	}
4052 
4053 	amdgpu_fence_driver_hw_init(adev);
4054 
4055 	dev_info(adev->dev,
4056 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4057 			adev->gfx.config.max_shader_engines,
4058 			adev->gfx.config.max_sh_per_se,
4059 			adev->gfx.config.max_cu_per_sh,
4060 			adev->gfx.cu_info.number);
4061 
4062 	adev->accel_working = true;
4063 
4064 	amdgpu_vm_check_compute_bug(adev);
4065 
4066 	/* Initialize the buffer migration limit. */
4067 	if (amdgpu_moverate >= 0)
4068 		max_MBps = amdgpu_moverate;
4069 	else
4070 		max_MBps = 8; /* Allow 8 MB/s. */
4071 	/* Get a log2 for easy divisions. */
4072 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4073 
4074 	/*
4075 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4076 	 * Otherwise the mgpu fan boost feature will be skipped due to the
4077 	 * gpu instance is counted less.
4078 	 */
4079 	amdgpu_register_gpu_instance(adev);
4080 
4081 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4082 	 * explicit gating rather than handling it automatically.
4083 	 */
4084 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4085 		r = amdgpu_device_ip_late_init(adev);
4086 		if (r) {
4087 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4088 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4089 			goto release_ras_con;
4090 		}
4091 		/* must succeed. */
4092 		amdgpu_ras_resume(adev);
4093 		queue_delayed_work(system_dfl_wq, &adev->delayed_init_work,
4094 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4095 	}
4096 
4097 	if (amdgpu_sriov_vf(adev)) {
4098 		amdgpu_virt_release_full_gpu(adev, true);
4099 		flush_delayed_work(&adev->delayed_init_work);
4100 	}
4101 
4102 	/* Don't init kfd if whole hive need to be reset during init */
4103 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4104 		kgd2kfd_init_zone_device(adev);
4105 		kfd_update_svm_support_properties(adev);
4106 	}
4107 
4108 	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
4109 		amdgpu_xgmi_reset_on_init(adev);
4110 
4111 	/*
4112 	 * Place those sysfs registering after `late_init`. As some of those
4113 	 * operations performed in `late_init` might affect the sysfs
4114 	 * interfaces creating.
4115 	 */
4116 	r = amdgpu_device_sys_interface_init(adev);
4117 
4118 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4119 		r = amdgpu_pmu_init(adev);
4120 	if (r)
4121 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4122 
4123 	/* Have stored pci confspace at hand for restore in sudden PCI error */
4124 	if (amdgpu_device_cache_pci_state(adev->pdev))
4125 		pci_restore_state(pdev);
4126 
4127 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4128 	/* this will fail for cards that aren't VGA class devices, just
4129 	 * ignore it
4130 	 */
4131 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4132 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4133 
4134 	px = amdgpu_device_supports_px(adev);
4135 
4136 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4137 				apple_gmux_detect(NULL, NULL)))
4138 		vga_switcheroo_register_client(adev->pdev,
4139 					       &amdgpu_switcheroo_ops, px);
4140 
4141 	if (px)
4142 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4143 
4144 	adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
4145 	r = register_pm_notifier(&adev->pm_nb);
4146 	if (r)
4147 		goto failed;
4148 
4149 	return 0;
4150 
4151 release_ras_con:
4152 	if (amdgpu_sriov_vf(adev))
4153 		amdgpu_virt_release_full_gpu(adev, true);
4154 
4155 	/* failed in exclusive mode due to timeout */
4156 	if (amdgpu_sriov_vf(adev) &&
4157 		!amdgpu_sriov_runtime(adev) &&
4158 		amdgpu_virt_mmio_blocked(adev) &&
4159 		!amdgpu_virt_wait_reset(adev)) {
4160 		dev_err(adev->dev, "VF exclusive mode timeout\n");
4161 		/* Don't send request since VF is inactive. */
4162 		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4163 		adev->virt.ops = NULL;
4164 		r = -EAGAIN;
4165 	}
4166 	amdgpu_release_ras_context(adev);
4167 
4168 failed:
4169 	amdgpu_vf_error_trans_all(adev);
4170 
4171 	return r;
4172 }
4173 
4174 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4175 {
4176 
4177 	/* Clear all CPU mappings pointing to this device */
4178 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4179 
4180 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4181 	amdgpu_doorbell_fini(adev);
4182 
4183 	iounmap(adev->rmmio);
4184 	adev->rmmio = NULL;
4185 	if (adev->mman.aper_base_kaddr)
4186 		iounmap(adev->mman.aper_base_kaddr);
4187 	adev->mman.aper_base_kaddr = NULL;
4188 
4189 	/* Memory manager related */
4190 	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4191 		arch_phys_wc_del(adev->gmc.vram_mtrr);
4192 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4193 	}
4194 }
4195 
4196 /**
4197  * amdgpu_device_fini_hw - tear down the driver
4198  *
4199  * @adev: amdgpu_device pointer
4200  *
4201  * Tear down the driver info (all asics).
4202  * Called at driver shutdown.
4203  */
4204 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4205 {
4206 	dev_info(adev->dev, "finishing device.\n");
4207 	flush_delayed_work(&adev->delayed_init_work);
4208 
4209 	if (adev->mman.initialized)
4210 		drain_workqueue(adev->mman.bdev.wq);
4211 	adev->shutdown = true;
4212 
4213 	unregister_pm_notifier(&adev->pm_nb);
4214 
4215 	/* make sure IB test finished before entering exclusive mode
4216 	 * to avoid preemption on IB test
4217 	 */
4218 	if (amdgpu_sriov_vf(adev)) {
4219 		amdgpu_virt_request_full_gpu(adev, false);
4220 		amdgpu_virt_fini_data_exchange(adev);
4221 	}
4222 
4223 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
4224 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
4225 
4226 	/* disable all interrupts */
4227 	amdgpu_irq_disable_all(adev);
4228 	if (adev->mode_info.mode_config_initialized) {
4229 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4230 			drm_helper_force_disable_all(adev_to_drm(adev));
4231 		else
4232 			drm_atomic_helper_shutdown(adev_to_drm(adev));
4233 	}
4234 	amdgpu_fence_driver_hw_fini(adev);
4235 
4236 	amdgpu_device_sys_interface_fini(adev);
4237 
4238 	/* disable ras feature must before hw fini */
4239 	amdgpu_ras_pre_fini(adev);
4240 
4241 	amdgpu_ttm_disable_buffer_funcs(adev);
4242 
4243 	/*
4244 	 * device went through surprise hotplug; we need to destroy topology
4245 	 * before ip_fini_early to prevent kfd locking refcount issues by calling
4246 	 * amdgpu_amdkfd_suspend()
4247 	 */
4248 	if (pci_dev_is_disconnected(adev->pdev))
4249 		amdgpu_amdkfd_device_fini_sw(adev);
4250 
4251 	amdgpu_coredump_fini(adev);
4252 	amdgpu_device_ip_fini_early(adev);
4253 
4254 	amdgpu_irq_fini_hw(adev);
4255 
4256 	if (adev->mman.initialized)
4257 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4258 
4259 	amdgpu_gart_dummy_page_fini(adev);
4260 
4261 	if (pci_dev_is_disconnected(adev->pdev))
4262 		amdgpu_device_unmap_mmio(adev);
4263 
4264 }
4265 
4266 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4267 {
4268 	int i, idx;
4269 	bool px;
4270 
4271 	amdgpu_device_ip_fini(adev);
4272 	amdgpu_fence_driver_sw_fini(adev);
4273 	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4274 	adev->accel_working = false;
4275 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4276 	for (i = 0; i < MAX_XCP; ++i) {
4277 		dma_fence_put(adev->isolation[i].spearhead);
4278 		amdgpu_sync_free(&adev->isolation[i].active);
4279 		amdgpu_sync_free(&adev->isolation[i].prev);
4280 	}
4281 
4282 	amdgpu_reset_fini(adev);
4283 
4284 	/* free i2c buses */
4285 	amdgpu_i2c_fini(adev);
4286 
4287 	if (adev->bios) {
4288 		if (amdgpu_emu_mode != 1)
4289 			amdgpu_atombios_fini(adev);
4290 		amdgpu_bios_release(adev);
4291 	}
4292 
4293 	kfree(adev->fru_info);
4294 	adev->fru_info = NULL;
4295 
4296 	kfree(adev->xcp_mgr);
4297 	adev->xcp_mgr = NULL;
4298 
4299 	px = amdgpu_device_supports_px(adev);
4300 
4301 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4302 				apple_gmux_detect(NULL, NULL)))
4303 		vga_switcheroo_unregister_client(adev->pdev);
4304 
4305 	if (px)
4306 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4307 
4308 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4309 		vga_client_unregister(adev->pdev);
4310 
4311 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4312 
4313 		iounmap(adev->rmmio);
4314 		adev->rmmio = NULL;
4315 		drm_dev_exit(idx);
4316 	}
4317 
4318 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4319 		amdgpu_pmu_fini(adev);
4320 	if (adev->discovery.bin)
4321 		amdgpu_discovery_fini(adev);
4322 
4323 	amdgpu_reset_put_reset_domain(adev->reset_domain);
4324 	adev->reset_domain = NULL;
4325 
4326 	kfree(adev->pci_state);
4327 	kfree(adev->pcie_reset_ctx.swds_pcistate);
4328 	kfree(adev->pcie_reset_ctx.swus_pcistate);
4329 }
4330 
4331 /**
4332  * amdgpu_device_evict_resources - evict device resources
4333  * @adev: amdgpu device object
4334  *
4335  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4336  * of the vram memory type. Mainly used for evicting device resources
4337  * at suspend time.
4338  *
4339  */
4340 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4341 {
4342 	int ret;
4343 
4344 	/* No need to evict vram on APUs unless going to S4 */
4345 	if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
4346 		return 0;
4347 
4348 	/* No need to evict when going to S5 through S4 callbacks */
4349 	if (system_state == SYSTEM_POWER_OFF)
4350 		return 0;
4351 
4352 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4353 	if (ret) {
4354 		dev_warn(adev->dev, "evicting device resources failed\n");
4355 		return ret;
4356 	}
4357 
4358 	if (adev->in_s4) {
4359 		ret = ttm_device_prepare_hibernation(&adev->mman.bdev);
4360 		if (ret)
4361 			dev_err(adev->dev, "prepare hibernation failed, %d\n", ret);
4362 	}
4363 	return ret;
4364 }
4365 
4366 /*
4367  * Suspend & resume.
4368  */
4369 /**
4370  * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events
4371  * @nb: notifier block
4372  * @mode: suspend mode
4373  * @data: data
4374  *
4375  * This function is called when the system is about to suspend or hibernate.
4376  * It is used to set the appropriate flags so that eviction can be optimized
4377  * in the pm prepare callback.
4378  */
4379 static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
4380 				     void *data)
4381 {
4382 	struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
4383 
4384 	switch (mode) {
4385 	case PM_HIBERNATION_PREPARE:
4386 		adev->in_s4 = true;
4387 		break;
4388 	case PM_POST_HIBERNATION:
4389 		adev->in_s4 = false;
4390 		break;
4391 	}
4392 
4393 	return NOTIFY_DONE;
4394 }
4395 
4396 /**
4397  * amdgpu_device_prepare - prepare for device suspend
4398  *
4399  * @dev: drm dev pointer
4400  *
4401  * Prepare to put the hw in the suspend state (all asics).
4402  * Returns 0 for success or an error on failure.
4403  * Called at driver suspend.
4404  */
4405 int amdgpu_device_prepare(struct drm_device *dev)
4406 {
4407 	struct amdgpu_device *adev = drm_to_adev(dev);
4408 	int i, r;
4409 
4410 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4411 		return 0;
4412 
4413 	/* Evict the majority of BOs before starting suspend sequence */
4414 	r = amdgpu_device_evict_resources(adev);
4415 	if (r)
4416 		return r;
4417 
4418 	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4419 
4420 	for (i = 0; i < adev->num_ip_blocks; i++) {
4421 		if (!adev->ip_blocks[i].status.valid)
4422 			continue;
4423 		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4424 			continue;
4425 		r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
4426 		if (r)
4427 			return r;
4428 	}
4429 
4430 	return 0;
4431 }
4432 
4433 /**
4434  * amdgpu_device_complete - complete power state transition
4435  *
4436  * @dev: drm dev pointer
4437  *
4438  * Undo the changes from amdgpu_device_prepare. This will be
4439  * called on all resume transitions, including those that failed.
4440  */
4441 void amdgpu_device_complete(struct drm_device *dev)
4442 {
4443 	struct amdgpu_device *adev = drm_to_adev(dev);
4444 	int i;
4445 
4446 	for (i = 0; i < adev->num_ip_blocks; i++) {
4447 		if (!adev->ip_blocks[i].status.valid)
4448 			continue;
4449 		if (!adev->ip_blocks[i].version->funcs->complete)
4450 			continue;
4451 		adev->ip_blocks[i].version->funcs->complete(&adev->ip_blocks[i]);
4452 	}
4453 }
4454 
4455 /**
4456  * amdgpu_device_suspend - initiate device suspend
4457  *
4458  * @dev: drm dev pointer
4459  * @notify_clients: notify in-kernel DRM clients
4460  *
4461  * Puts the hw in the suspend state (all asics).
4462  * Returns 0 for success or an error on failure.
4463  * Called at driver suspend.
4464  */
4465 int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
4466 {
4467 	struct amdgpu_device *adev = drm_to_adev(dev);
4468 	int r, rec;
4469 
4470 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4471 		return 0;
4472 
4473 	adev->in_suspend = true;
4474 
4475 	if (amdgpu_sriov_vf(adev)) {
4476 		if (!adev->in_runpm)
4477 			amdgpu_amdkfd_suspend_process(adev);
4478 		amdgpu_virt_fini_data_exchange(adev);
4479 		r = amdgpu_virt_request_full_gpu(adev, false);
4480 		if (r)
4481 			return r;
4482 	}
4483 
4484 	r = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3);
4485 	if (r)
4486 		goto unwind_sriov;
4487 
4488 	if (notify_clients)
4489 		drm_client_dev_suspend(adev_to_drm(adev));
4490 
4491 	cancel_delayed_work_sync(&adev->delayed_init_work);
4492 
4493 	amdgpu_ras_suspend(adev);
4494 
4495 	r = amdgpu_device_ip_suspend_phase1(adev);
4496 	if (r)
4497 		goto unwind_smartshift;
4498 
4499 	amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4500 	r = amdgpu_userq_suspend(adev);
4501 	if (r)
4502 		goto unwind_ip_phase1;
4503 
4504 	r = amdgpu_device_evict_resources(adev);
4505 	if (r)
4506 		goto unwind_userq;
4507 
4508 	amdgpu_ttm_disable_buffer_funcs(adev);
4509 
4510 	amdgpu_fence_driver_hw_fini(adev);
4511 
4512 	r = amdgpu_device_ip_suspend_phase2(adev);
4513 	if (r)
4514 		goto unwind_evict;
4515 
4516 	if (amdgpu_sriov_vf(adev))
4517 		amdgpu_virt_release_full_gpu(adev, false);
4518 
4519 	return 0;
4520 
4521 unwind_evict:
4522 	amdgpu_ttm_enable_buffer_funcs(adev);
4523 	amdgpu_fence_driver_hw_init(adev);
4524 
4525 unwind_userq:
4526 	rec = amdgpu_userq_resume(adev);
4527 	if (rec) {
4528 		dev_warn(adev->dev, "failed to re-initialize user queues: %d\n", rec);
4529 		return r;
4530 	}
4531 	rec = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4532 	if (rec) {
4533 		dev_warn(adev->dev, "failed to re-initialize kfd: %d\n", rec);
4534 		return r;
4535 	}
4536 
4537 unwind_ip_phase1:
4538 	/* suspend phase 1 = resume phase 3 */
4539 	rec = amdgpu_device_ip_resume_phase3(adev);
4540 	if (rec) {
4541 		dev_warn(adev->dev, "failed to re-initialize IPs phase1: %d\n", rec);
4542 		return r;
4543 	}
4544 
4545 unwind_smartshift:
4546 	rec = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0);
4547 	if (rec) {
4548 		dev_warn(adev->dev, "failed to re-update smart shift: %d\n", rec);
4549 		return r;
4550 	}
4551 
4552 	if (notify_clients)
4553 		drm_client_dev_resume(adev_to_drm(adev));
4554 
4555 	amdgpu_ras_resume(adev);
4556 
4557 unwind_sriov:
4558 	if (amdgpu_sriov_vf(adev)) {
4559 		rec = amdgpu_virt_request_full_gpu(adev, true);
4560 		if (rec) {
4561 			dev_warn(adev->dev, "failed to reinitialize sriov: %d\n", rec);
4562 			return r;
4563 		}
4564 	}
4565 
4566 	adev->in_suspend = adev->in_s0ix = adev->in_s3 = false;
4567 
4568 	return r;
4569 }
4570 
4571 static inline int amdgpu_virt_resume(struct amdgpu_device *adev)
4572 {
4573 	int r;
4574 	unsigned int prev_physical_node_id = adev->gmc.xgmi.physical_node_id;
4575 
4576 	/* During VM resume, QEMU programming of VF MSIX table (register GFXMSIX_VECT0_ADDR_LO)
4577 	 * may not work. The access could be blocked by nBIF protection as VF isn't in
4578 	 * exclusive access mode. Exclusive access is enabled now, disable/enable MSIX
4579 	 * so that QEMU reprograms MSIX table.
4580 	 */
4581 	amdgpu_restore_msix(adev);
4582 
4583 	r = adev->gfxhub.funcs->get_xgmi_info(adev);
4584 	if (r)
4585 		return r;
4586 
4587 	dev_info(adev->dev, "xgmi node, old id %d, new id %d\n",
4588 		prev_physical_node_id, adev->gmc.xgmi.physical_node_id);
4589 
4590 	adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
4591 	adev->vm_manager.vram_base_offset +=
4592 		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
4593 
4594 	return 0;
4595 }
4596 
4597 /**
4598  * amdgpu_device_resume - initiate device resume
4599  *
4600  * @dev: drm dev pointer
4601  * @notify_clients: notify in-kernel DRM clients
4602  *
4603  * Bring the hw back to operating state (all asics).
4604  * Returns 0 for success or an error on failure.
4605  * Called at driver resume.
4606  */
4607 int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
4608 {
4609 	struct amdgpu_device *adev = drm_to_adev(dev);
4610 	int r = 0;
4611 
4612 	if (amdgpu_sriov_vf(adev)) {
4613 		r = amdgpu_virt_request_full_gpu(adev, true);
4614 		if (r)
4615 			return r;
4616 	}
4617 
4618 	if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
4619 		r = amdgpu_virt_resume(adev);
4620 		if (r)
4621 			goto exit;
4622 	}
4623 
4624 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4625 		return 0;
4626 
4627 	if (adev->in_s0ix)
4628 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4629 
4630 	/* post card */
4631 	if (amdgpu_device_need_post(adev)) {
4632 		r = amdgpu_device_asic_init(adev);
4633 		if (r)
4634 			dev_err(adev->dev, "amdgpu asic init failed\n");
4635 	}
4636 
4637 	r = amdgpu_device_ip_resume(adev);
4638 
4639 	if (r) {
4640 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4641 		goto exit;
4642 	}
4643 
4644 	r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4645 	if (r)
4646 		goto exit;
4647 
4648 	r = amdgpu_userq_resume(adev);
4649 	if (r)
4650 		goto exit;
4651 
4652 	r = amdgpu_device_ip_late_init(adev);
4653 	if (r)
4654 		goto exit;
4655 
4656 	queue_delayed_work(system_dfl_wq, &adev->delayed_init_work,
4657 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4658 exit:
4659 	if (amdgpu_sriov_vf(adev)) {
4660 		amdgpu_virt_init_data_exchange(adev);
4661 		amdgpu_virt_release_full_gpu(adev, true);
4662 
4663 		if (!r && !adev->in_runpm)
4664 			r = amdgpu_amdkfd_resume_process(adev);
4665 	}
4666 
4667 	if (r)
4668 		return r;
4669 
4670 	/* Make sure IB tests flushed */
4671 	flush_delayed_work(&adev->delayed_init_work);
4672 
4673 	if (notify_clients)
4674 		drm_client_dev_resume(adev_to_drm(adev));
4675 
4676 	amdgpu_ras_resume(adev);
4677 
4678 	if (adev->mode_info.num_crtc) {
4679 		/*
4680 		 * Most of the connector probing functions try to acquire runtime pm
4681 		 * refs to ensure that the GPU is powered on when connector polling is
4682 		 * performed. Since we're calling this from a runtime PM callback,
4683 		 * trying to acquire rpm refs will cause us to deadlock.
4684 		 *
4685 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4686 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4687 		 */
4688 #ifdef CONFIG_PM
4689 		dev->dev->power.disable_depth++;
4690 #endif
4691 		if (!adev->dc_enabled)
4692 			drm_helper_hpd_irq_event(dev);
4693 		else
4694 			drm_kms_helper_hotplug_event(dev);
4695 #ifdef CONFIG_PM
4696 		dev->dev->power.disable_depth--;
4697 #endif
4698 	}
4699 
4700 	amdgpu_vram_mgr_clear_reset_blocks(adev);
4701 	adev->in_suspend = false;
4702 
4703 	if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0))
4704 		dev_warn(adev->dev, "smart shift update failed\n");
4705 
4706 	return 0;
4707 }
4708 
4709 /**
4710  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4711  *
4712  * @adev: amdgpu_device pointer
4713  *
4714  * The list of all the hardware IPs that make up the asic is walked and
4715  * the check_soft_reset callbacks are run.  check_soft_reset determines
4716  * if the asic is still hung or not.
4717  * Returns true if any of the IPs are still in a hung state, false if not.
4718  */
4719 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4720 {
4721 	int i;
4722 	bool asic_hang = false;
4723 
4724 	if (amdgpu_sriov_vf(adev))
4725 		return true;
4726 
4727 	if (amdgpu_asic_need_full_reset(adev))
4728 		return true;
4729 
4730 	for (i = 0; i < adev->num_ip_blocks; i++) {
4731 		if (!adev->ip_blocks[i].status.valid)
4732 			continue;
4733 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4734 			adev->ip_blocks[i].status.hang =
4735 				adev->ip_blocks[i].version->funcs->check_soft_reset(
4736 					&adev->ip_blocks[i]);
4737 		if (adev->ip_blocks[i].status.hang) {
4738 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4739 			asic_hang = true;
4740 		}
4741 	}
4742 	return asic_hang;
4743 }
4744 
4745 /**
4746  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4747  *
4748  * @adev: amdgpu_device pointer
4749  *
4750  * The list of all the hardware IPs that make up the asic is walked and the
4751  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4752  * handles any IP specific hardware or software state changes that are
4753  * necessary for a soft reset to succeed.
4754  * Returns 0 on success, negative error code on failure.
4755  */
4756 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4757 {
4758 	int i, r = 0;
4759 
4760 	for (i = 0; i < adev->num_ip_blocks; i++) {
4761 		if (!adev->ip_blocks[i].status.valid)
4762 			continue;
4763 		if (adev->ip_blocks[i].status.hang &&
4764 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4765 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
4766 			if (r)
4767 				return r;
4768 		}
4769 	}
4770 
4771 	return 0;
4772 }
4773 
4774 /**
4775  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4776  *
4777  * @adev: amdgpu_device pointer
4778  *
4779  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4780  * reset is necessary to recover.
4781  * Returns true if a full asic reset is required, false if not.
4782  */
4783 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4784 {
4785 	int i;
4786 
4787 	if (amdgpu_asic_need_full_reset(adev))
4788 		return true;
4789 
4790 	for (i = 0; i < adev->num_ip_blocks; i++) {
4791 		if (!adev->ip_blocks[i].status.valid)
4792 			continue;
4793 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4794 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4795 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4796 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4797 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4798 			if (adev->ip_blocks[i].status.hang) {
4799 				dev_info(adev->dev, "Some block need full reset!\n");
4800 				return true;
4801 			}
4802 		}
4803 	}
4804 	return false;
4805 }
4806 
4807 /**
4808  * amdgpu_device_ip_soft_reset - do a soft reset
4809  *
4810  * @adev: amdgpu_device pointer
4811  *
4812  * The list of all the hardware IPs that make up the asic is walked and the
4813  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4814  * IP specific hardware or software state changes that are necessary to soft
4815  * reset the IP.
4816  * Returns 0 on success, negative error code on failure.
4817  */
4818 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4819 {
4820 	int i, r = 0;
4821 
4822 	for (i = 0; i < adev->num_ip_blocks; i++) {
4823 		if (!adev->ip_blocks[i].status.valid)
4824 			continue;
4825 		if (adev->ip_blocks[i].status.hang &&
4826 		    adev->ip_blocks[i].version->funcs->soft_reset) {
4827 			r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
4828 			if (r)
4829 				return r;
4830 		}
4831 	}
4832 
4833 	return 0;
4834 }
4835 
4836 /**
4837  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4838  *
4839  * @adev: amdgpu_device pointer
4840  *
4841  * The list of all the hardware IPs that make up the asic is walked and the
4842  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4843  * handles any IP specific hardware or software state changes that are
4844  * necessary after the IP has been soft reset.
4845  * Returns 0 on success, negative error code on failure.
4846  */
4847 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4848 {
4849 	int i, r = 0;
4850 
4851 	for (i = 0; i < adev->num_ip_blocks; i++) {
4852 		if (!adev->ip_blocks[i].status.valid)
4853 			continue;
4854 		if (adev->ip_blocks[i].status.hang &&
4855 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4856 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]);
4857 		if (r)
4858 			return r;
4859 	}
4860 
4861 	return 0;
4862 }
4863 
4864 /**
4865  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4866  *
4867  * @adev: amdgpu_device pointer
4868  * @reset_context: amdgpu reset context pointer
4869  *
4870  * do VF FLR and reinitialize Asic
4871  * return 0 means succeeded otherwise failed
4872  */
4873 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4874 				     struct amdgpu_reset_context *reset_context)
4875 {
4876 	int r;
4877 	struct amdgpu_hive_info *hive = NULL;
4878 
4879 	if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
4880 		if (!amdgpu_ras_get_fed_status(adev))
4881 			amdgpu_virt_ready_to_reset(adev);
4882 		amdgpu_virt_wait_reset(adev);
4883 		clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
4884 		r = amdgpu_virt_request_full_gpu(adev, true);
4885 	} else {
4886 		r = amdgpu_virt_reset_gpu(adev);
4887 	}
4888 	if (r)
4889 		return r;
4890 
4891 	amdgpu_ras_clear_err_state(adev);
4892 	amdgpu_irq_gpu_reset_resume_helper(adev);
4893 
4894 	/* some sw clean up VF needs to do before recover */
4895 	amdgpu_virt_post_reset(adev);
4896 
4897 	/* Resume IP prior to SMC */
4898 	r = amdgpu_device_ip_reinit_early_sriov(adev);
4899 	if (r)
4900 		return r;
4901 
4902 	amdgpu_virt_init_data_exchange(adev);
4903 
4904 	r = amdgpu_device_fw_loading(adev);
4905 	if (r)
4906 		return r;
4907 
4908 	/* now we are okay to resume SMC/CP/SDMA */
4909 	r = amdgpu_device_ip_reinit_late_sriov(adev);
4910 	if (r)
4911 		return r;
4912 
4913 	hive = amdgpu_get_xgmi_hive(adev);
4914 	/* Update PSP FW topology after reset */
4915 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4916 		r = amdgpu_xgmi_update_topology(hive, adev);
4917 	if (hive)
4918 		amdgpu_put_xgmi_hive(hive);
4919 	if (r)
4920 		return r;
4921 
4922 	r = amdgpu_ib_ring_tests(adev);
4923 	if (r)
4924 		return r;
4925 
4926 	if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
4927 		amdgpu_inc_vram_lost(adev);
4928 
4929 	/* need to be called during full access so we can't do it later like
4930 	 * bare-metal does.
4931 	 */
4932 	amdgpu_amdkfd_post_reset(adev);
4933 	amdgpu_virt_release_full_gpu(adev, true);
4934 
4935 	/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4936 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
4937 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
4938 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
4939 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
4940 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
4941 		amdgpu_ras_resume(adev);
4942 
4943 	amdgpu_virt_ras_telemetry_post_reset(adev);
4944 
4945 	return 0;
4946 }
4947 
4948 /**
4949  * amdgpu_device_has_job_running - check if there is any unfinished job
4950  *
4951  * @adev: amdgpu_device pointer
4952  *
4953  * check if there is any job running on the device when guest driver receives
4954  * FLR notification from host driver. If there are still jobs running, then
4955  * the guest driver will not respond the FLR reset. Instead, let the job hit
4956  * the timeout and guest driver then issue the reset request.
4957  */
4958 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4959 {
4960 	int i;
4961 
4962 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4963 		struct amdgpu_ring *ring = adev->rings[i];
4964 
4965 		if (!amdgpu_ring_sched_ready(ring))
4966 			continue;
4967 
4968 		if (amdgpu_fence_count_emitted(ring))
4969 			return true;
4970 	}
4971 	return false;
4972 }
4973 
4974 /**
4975  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4976  *
4977  * @adev: amdgpu_device pointer
4978  *
4979  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4980  * a hung GPU.
4981  */
4982 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4983 {
4984 
4985 	if (amdgpu_gpu_recovery == 0)
4986 		goto disabled;
4987 
4988 	/* Skip soft reset check in fatal error mode */
4989 	if (!amdgpu_ras_is_poison_mode_supported(adev))
4990 		return true;
4991 
4992 	if (amdgpu_sriov_vf(adev))
4993 		return true;
4994 
4995 	if (amdgpu_gpu_recovery == -1) {
4996 		switch (adev->asic_type) {
4997 #ifdef CONFIG_DRM_AMDGPU_SI
4998 		case CHIP_VERDE:
4999 		case CHIP_TAHITI:
5000 		case CHIP_PITCAIRN:
5001 		case CHIP_OLAND:
5002 		case CHIP_HAINAN:
5003 #endif
5004 #ifdef CONFIG_DRM_AMDGPU_CIK
5005 		case CHIP_KAVERI:
5006 		case CHIP_KABINI:
5007 		case CHIP_MULLINS:
5008 #endif
5009 		case CHIP_CARRIZO:
5010 		case CHIP_STONEY:
5011 		case CHIP_CYAN_SKILLFISH:
5012 			goto disabled;
5013 		default:
5014 			break;
5015 		}
5016 	}
5017 
5018 	return true;
5019 
5020 disabled:
5021 		dev_info(adev->dev, "GPU recovery disabled.\n");
5022 		return false;
5023 }
5024 
5025 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5026 {
5027 	u32 i;
5028 	int ret = 0;
5029 
5030 	if (adev->bios)
5031 		amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5032 
5033 	dev_info(adev->dev, "GPU mode1 reset\n");
5034 
5035 	/* Cache the state before bus master disable. The saved config space
5036 	 * values are used in other cases like restore after mode-2 reset.
5037 	 */
5038 	amdgpu_device_cache_pci_state(adev->pdev);
5039 
5040 	/* disable BM */
5041 	pci_clear_master(adev->pdev);
5042 
5043 	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5044 		dev_info(adev->dev, "GPU smu mode1 reset\n");
5045 		ret = amdgpu_dpm_mode1_reset(adev);
5046 	} else {
5047 		dev_info(adev->dev, "GPU psp mode1 reset\n");
5048 		ret = psp_gpu_reset(adev);
5049 	}
5050 
5051 	if (ret)
5052 		goto mode1_reset_failed;
5053 
5054 	/* enable mmio access after mode 1 reset completed */
5055 	adev->no_hw_access = false;
5056 
5057 	/* ensure no_hw_access is updated before we access hw */
5058 	smp_mb();
5059 
5060 	amdgpu_device_load_pci_state(adev->pdev);
5061 	ret = amdgpu_psp_wait_for_bootloader(adev);
5062 	if (ret)
5063 		goto mode1_reset_failed;
5064 
5065 	/* wait for asic to come out of reset */
5066 	for (i = 0; i < adev->usec_timeout; i++) {
5067 		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5068 
5069 		if (memsize != 0xffffffff)
5070 			break;
5071 		udelay(1);
5072 	}
5073 
5074 	if (i >= adev->usec_timeout) {
5075 		ret = -ETIMEDOUT;
5076 		goto mode1_reset_failed;
5077 	}
5078 
5079 	if (adev->bios)
5080 		amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5081 
5082 	return 0;
5083 
5084 mode1_reset_failed:
5085 	dev_err(adev->dev, "GPU mode1 reset failed\n");
5086 	return ret;
5087 }
5088 
5089 int amdgpu_device_link_reset(struct amdgpu_device *adev)
5090 {
5091 	int ret = 0;
5092 
5093 	dev_info(adev->dev, "GPU link reset\n");
5094 
5095 	if (!amdgpu_reset_in_dpc(adev))
5096 		ret = amdgpu_dpm_link_reset(adev);
5097 
5098 	if (ret)
5099 		goto link_reset_failed;
5100 
5101 	ret = amdgpu_psp_wait_for_bootloader(adev);
5102 	if (ret)
5103 		goto link_reset_failed;
5104 
5105 	return 0;
5106 
5107 link_reset_failed:
5108 	dev_err(adev->dev, "GPU link reset failed\n");
5109 	return ret;
5110 }
5111 
5112 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5113 				 struct amdgpu_reset_context *reset_context)
5114 {
5115 	struct amdgpu_job *job = NULL;
5116 	struct dma_fence *fence = NULL;
5117 	struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
5118 	bool need_full_reset =
5119 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5120 	int i, r;
5121 
5122 	if (reset_context->reset_req_dev == adev)
5123 		job = reset_context->job;
5124 
5125 	if (amdgpu_sriov_vf(adev))
5126 		amdgpu_virt_pre_reset(adev);
5127 
5128 	amdgpu_fence_driver_isr_toggle(adev, true);
5129 
5130 	if (job)
5131 		fence = &job->hw_fence->base;
5132 
5133 	/* block all schedulers and reset given job's ring */
5134 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5135 		struct amdgpu_ring *ring = adev->rings[i];
5136 
5137 		if (!amdgpu_ring_sched_ready(ring))
5138 			continue;
5139 
5140 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5141 		amdgpu_fence_driver_force_completion(ring, fence);
5142 	}
5143 
5144 	amdgpu_fence_driver_isr_toggle(adev, false);
5145 
5146 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5147 	/* If reset handler not implemented, continue; otherwise return */
5148 	if (r == -EOPNOTSUPP)
5149 		r = 0;
5150 	else
5151 		return r;
5152 
5153 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5154 	if (!amdgpu_sriov_vf(adev)) {
5155 
5156 		if (!need_full_reset)
5157 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5158 
5159 		if (!need_full_reset && amdgpu_gpu_recovery &&
5160 		    amdgpu_device_ip_check_soft_reset(adev)) {
5161 			amdgpu_device_ip_pre_soft_reset(adev);
5162 			r = amdgpu_device_ip_soft_reset(adev);
5163 			amdgpu_device_ip_post_soft_reset(adev);
5164 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5165 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5166 				need_full_reset = true;
5167 			}
5168 		}
5169 
5170 		if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5171 			dev_info(tmp_adev->dev, "Dumping IP State\n");
5172 			/* Trigger ip dump before we reset the asic */
5173 			for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5174 				if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5175 					tmp_adev->ip_blocks[i].version->funcs
5176 						->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
5177 			dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
5178 		}
5179 
5180 		if (need_full_reset)
5181 			r = amdgpu_device_ip_suspend(adev);
5182 		if (need_full_reset)
5183 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5184 		else
5185 			clear_bit(AMDGPU_NEED_FULL_RESET,
5186 				  &reset_context->flags);
5187 	}
5188 
5189 	return r;
5190 }
5191 
5192 int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
5193 {
5194 	struct list_head *device_list_handle;
5195 	bool full_reset, vram_lost = false;
5196 	struct amdgpu_device *tmp_adev;
5197 	int r, init_level;
5198 
5199 	device_list_handle = reset_context->reset_device_list;
5200 
5201 	if (!device_list_handle)
5202 		return -EINVAL;
5203 
5204 	full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5205 
5206 	/**
5207 	 * If it's reset on init, it's default init level, otherwise keep level
5208 	 * as recovery level.
5209 	 */
5210 	if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
5211 			init_level = AMDGPU_INIT_LEVEL_DEFAULT;
5212 	else
5213 			init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
5214 
5215 	r = 0;
5216 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5217 		amdgpu_set_init_level(tmp_adev, init_level);
5218 		if (full_reset) {
5219 			/* post card */
5220 			amdgpu_reset_set_dpc_status(tmp_adev, false);
5221 			amdgpu_ras_clear_err_state(tmp_adev);
5222 			r = amdgpu_device_asic_init(tmp_adev);
5223 			if (r) {
5224 				dev_warn(tmp_adev->dev, "asic atom init failed!");
5225 			} else {
5226 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5227 
5228 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5229 				if (r)
5230 					goto out;
5231 
5232 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5233 
5234 				if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5235 					amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);
5236 
5237 				if (vram_lost) {
5238 					dev_info(
5239 						tmp_adev->dev,
5240 						"VRAM is lost due to GPU reset!\n");
5241 					amdgpu_inc_vram_lost(tmp_adev);
5242 				}
5243 
5244 				r = amdgpu_device_fw_loading(tmp_adev);
5245 				if (r)
5246 					return r;
5247 
5248 				r = amdgpu_xcp_restore_partition_mode(
5249 					tmp_adev->xcp_mgr);
5250 				if (r)
5251 					goto out;
5252 
5253 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5254 				if (r)
5255 					goto out;
5256 
5257 				amdgpu_ttm_enable_buffer_funcs(tmp_adev);
5258 
5259 				r = amdgpu_device_ip_resume_phase3(tmp_adev);
5260 				if (r)
5261 					goto out;
5262 
5263 				if (vram_lost)
5264 					amdgpu_device_fill_reset_magic(tmp_adev);
5265 
5266 				/*
5267 				 * Add this ASIC as tracked as reset was already
5268 				 * complete successfully.
5269 				 */
5270 				amdgpu_register_gpu_instance(tmp_adev);
5271 
5272 				if (!reset_context->hive &&
5273 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5274 					amdgpu_xgmi_add_device(tmp_adev);
5275 
5276 				r = amdgpu_device_ip_late_init(tmp_adev);
5277 				if (r)
5278 					goto out;
5279 
5280 				r = amdgpu_userq_post_reset(tmp_adev, vram_lost);
5281 				if (r)
5282 					goto out;
5283 
5284 				drm_client_dev_resume(adev_to_drm(tmp_adev));
5285 
5286 				/*
5287 				 * The GPU enters bad state once faulty pages
5288 				 * by ECC has reached the threshold, and ras
5289 				 * recovery is scheduled next. So add one check
5290 				 * here to break recovery if it indeed exceeds
5291 				 * bad page threshold, and remind user to
5292 				 * retire this GPU or setting one bigger
5293 				 * bad_page_threshold value to fix this once
5294 				 * probing driver again.
5295 				 */
5296 				if (!amdgpu_ras_is_rma(tmp_adev)) {
5297 					/* must succeed. */
5298 					amdgpu_ras_resume(tmp_adev);
5299 				} else {
5300 					r = -EINVAL;
5301 					goto out;
5302 				}
5303 
5304 				/* Update PSP FW topology after reset */
5305 				if (reset_context->hive &&
5306 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5307 					r = amdgpu_xgmi_update_topology(
5308 						reset_context->hive, tmp_adev);
5309 			}
5310 		}
5311 
5312 out:
5313 		if (!r) {
5314 			/* IP init is complete now, set level as default */
5315 			amdgpu_set_init_level(tmp_adev,
5316 					      AMDGPU_INIT_LEVEL_DEFAULT);
5317 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5318 			r = amdgpu_ib_ring_tests(tmp_adev);
5319 			if (r) {
5320 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5321 				r = -EAGAIN;
5322 				goto end;
5323 			}
5324 		}
5325 
5326 		if (r)
5327 			tmp_adev->asic_reset_res = r;
5328 	}
5329 
5330 end:
5331 	return r;
5332 }
5333 
5334 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5335 			 struct amdgpu_reset_context *reset_context)
5336 {
5337 	struct amdgpu_device *tmp_adev = NULL;
5338 	bool need_full_reset, skip_hw_reset;
5339 	int r = 0;
5340 
5341 	/* Try reset handler method first */
5342 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5343 				    reset_list);
5344 
5345 	reset_context->reset_device_list = device_list_handle;
5346 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5347 	/* If reset handler not implemented, continue; otherwise return */
5348 	if (r == -EOPNOTSUPP)
5349 		r = 0;
5350 	else
5351 		return r;
5352 
5353 	/* Reset handler not implemented, use the default method */
5354 	need_full_reset =
5355 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5356 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5357 
5358 	/*
5359 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5360 	 * to allow proper links negotiation in FW (within 1 sec)
5361 	 */
5362 	if (!skip_hw_reset && need_full_reset) {
5363 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5364 			/* For XGMI run all resets in parallel to speed up the process */
5365 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5366 				if (!queue_work(system_dfl_wq,
5367 						&tmp_adev->xgmi_reset_work))
5368 					r = -EALREADY;
5369 			} else
5370 				r = amdgpu_asic_reset(tmp_adev);
5371 
5372 			if (r) {
5373 				dev_err(tmp_adev->dev,
5374 					"ASIC reset failed with error, %d for drm dev, %s",
5375 					r, adev_to_drm(tmp_adev)->unique);
5376 				goto out;
5377 			}
5378 		}
5379 
5380 		/* For XGMI wait for all resets to complete before proceed */
5381 		if (!r) {
5382 			list_for_each_entry(tmp_adev, device_list_handle,
5383 					    reset_list) {
5384 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5385 					flush_work(&tmp_adev->xgmi_reset_work);
5386 					r = tmp_adev->asic_reset_res;
5387 					if (r)
5388 						break;
5389 				}
5390 			}
5391 		}
5392 	}
5393 
5394 	if (!r && amdgpu_ras_intr_triggered()) {
5395 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5396 			amdgpu_ras_reset_error_count(tmp_adev,
5397 						     AMDGPU_RAS_BLOCK__MMHUB);
5398 		}
5399 
5400 		amdgpu_ras_intr_cleared();
5401 	}
5402 
5403 	r = amdgpu_device_reinit_after_reset(reset_context);
5404 	if (r == -EAGAIN)
5405 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5406 	else
5407 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5408 
5409 out:
5410 	return r;
5411 }
5412 
5413 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5414 {
5415 
5416 	switch (amdgpu_asic_reset_method(adev)) {
5417 	case AMD_RESET_METHOD_MODE1:
5418 	case AMD_RESET_METHOD_LINK:
5419 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5420 		break;
5421 	case AMD_RESET_METHOD_MODE2:
5422 		adev->mp1_state = PP_MP1_STATE_RESET;
5423 		break;
5424 	default:
5425 		adev->mp1_state = PP_MP1_STATE_NONE;
5426 		break;
5427 	}
5428 }
5429 
5430 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5431 {
5432 	amdgpu_vf_error_trans_all(adev);
5433 	adev->mp1_state = PP_MP1_STATE_NONE;
5434 }
5435 
5436 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5437 {
5438 	struct pci_dev *p = NULL;
5439 
5440 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5441 			adev->pdev->bus->number, 1);
5442 	if (p) {
5443 		pm_runtime_enable(&(p->dev));
5444 		pm_runtime_resume(&(p->dev));
5445 	}
5446 
5447 	pci_dev_put(p);
5448 }
5449 
5450 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5451 {
5452 	enum amd_reset_method reset_method;
5453 	struct pci_dev *p = NULL;
5454 	u64 expires;
5455 
5456 	/*
5457 	 * For now, only BACO and mode1 reset are confirmed
5458 	 * to suffer the audio issue without proper suspended.
5459 	 */
5460 	reset_method = amdgpu_asic_reset_method(adev);
5461 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5462 	     (reset_method != AMD_RESET_METHOD_MODE1))
5463 		return -EINVAL;
5464 
5465 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5466 			adev->pdev->bus->number, 1);
5467 	if (!p)
5468 		return -ENODEV;
5469 
5470 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5471 	if (!expires)
5472 		/*
5473 		 * If we cannot get the audio device autosuspend delay,
5474 		 * a fixed 4S interval will be used. Considering 3S is
5475 		 * the audio controller default autosuspend delay setting.
5476 		 * 4S used here is guaranteed to cover that.
5477 		 */
5478 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5479 
5480 	while (!pm_runtime_status_suspended(&(p->dev))) {
5481 		if (!pm_runtime_suspend(&(p->dev)))
5482 			break;
5483 
5484 		if (expires < ktime_get_mono_fast_ns()) {
5485 			dev_warn(adev->dev, "failed to suspend display audio\n");
5486 			pci_dev_put(p);
5487 			/* TODO: abort the succeeding gpu reset? */
5488 			return -ETIMEDOUT;
5489 		}
5490 	}
5491 
5492 	pm_runtime_disable(&(p->dev));
5493 
5494 	pci_dev_put(p);
5495 	return 0;
5496 }
5497 
5498 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5499 {
5500 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5501 
5502 #if defined(CONFIG_DEBUG_FS)
5503 	if (!amdgpu_sriov_vf(adev))
5504 		cancel_work(&adev->reset_work);
5505 #endif
5506 	amdgpu_userq_mgr_cancel_reset_work(adev);
5507 
5508 	if (adev->kfd.dev)
5509 		cancel_work(&adev->kfd.reset_work);
5510 
5511 	if (amdgpu_sriov_vf(adev))
5512 		cancel_work(&adev->virt.flr_work);
5513 
5514 	if (con && adev->ras_enabled)
5515 		cancel_work(&con->recovery_work);
5516 
5517 }
5518 
5519 static int amdgpu_device_health_check(struct list_head *device_list_handle)
5520 {
5521 	struct amdgpu_device *tmp_adev;
5522 	int ret = 0;
5523 
5524 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5525 		ret |= amdgpu_device_bus_status_check(tmp_adev);
5526 	}
5527 
5528 	return ret;
5529 }
5530 
5531 static void amdgpu_device_recovery_prepare(struct amdgpu_device *adev,
5532 					  struct list_head *device_list,
5533 					  struct amdgpu_hive_info *hive)
5534 {
5535 	struct amdgpu_device *tmp_adev = NULL;
5536 
5537 	/*
5538 	 * Build list of devices to reset.
5539 	 * In case we are in XGMI hive mode, resort the device list
5540 	 * to put adev in the 1st position.
5541 	 */
5542 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
5543 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5544 			list_add_tail(&tmp_adev->reset_list, device_list);
5545 			if (adev->shutdown)
5546 				tmp_adev->shutdown = true;
5547 		}
5548 		if (!list_is_first(&adev->reset_list, device_list))
5549 			list_rotate_to_front(&adev->reset_list, device_list);
5550 	} else {
5551 		list_add_tail(&adev->reset_list, device_list);
5552 	}
5553 }
5554 
5555 static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev,
5556 						  struct list_head *device_list)
5557 {
5558 	struct amdgpu_device *tmp_adev = NULL;
5559 
5560 	if (list_empty(device_list))
5561 		return;
5562 	tmp_adev =
5563 		list_first_entry(device_list, struct amdgpu_device, reset_list);
5564 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5565 }
5566 
5567 static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev,
5568 						  struct list_head *device_list)
5569 {
5570 	struct amdgpu_device *tmp_adev = NULL;
5571 
5572 	if (list_empty(device_list))
5573 		return;
5574 	tmp_adev =
5575 		list_first_entry(device_list, struct amdgpu_device, reset_list);
5576 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5577 }
5578 
5579 static void amdgpu_device_halt_activities(struct amdgpu_device *adev,
5580 					  struct amdgpu_job *job,
5581 					  struct amdgpu_reset_context *reset_context,
5582 					  struct list_head *device_list,
5583 					  struct amdgpu_hive_info *hive,
5584 					  bool need_emergency_restart)
5585 {
5586 	struct amdgpu_device *tmp_adev = NULL;
5587 	int i;
5588 
5589 	/* block all schedulers and reset given job's ring */
5590 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5591 		amdgpu_device_set_mp1_state(tmp_adev);
5592 
5593 		/*
5594 		 * Try to put the audio codec into suspend state
5595 		 * before gpu reset started.
5596 		 *
5597 		 * Due to the power domain of the graphics device
5598 		 * is shared with AZ power domain. Without this,
5599 		 * we may change the audio hardware from behind
5600 		 * the audio driver's back. That will trigger
5601 		 * some audio codec errors.
5602 		 */
5603 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5604 			tmp_adev->pcie_reset_ctx.audio_suspended = true;
5605 
5606 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5607 
5608 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5609 
5610 		amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
5611 
5612 		/*
5613 		 * Mark these ASICs to be reset as untracked first
5614 		 * And add them back after reset completed
5615 		 */
5616 		amdgpu_unregister_gpu_instance(tmp_adev);
5617 
5618 		drm_client_dev_suspend(adev_to_drm(tmp_adev));
5619 
5620 		/* disable ras on ALL IPs */
5621 		if (!need_emergency_restart && !amdgpu_reset_in_dpc(adev) &&
5622 		    amdgpu_device_ip_need_full_reset(tmp_adev))
5623 			amdgpu_ras_suspend(tmp_adev);
5624 
5625 		amdgpu_userq_pre_reset(tmp_adev);
5626 
5627 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5628 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5629 
5630 			if (!amdgpu_ring_sched_ready(ring))
5631 				continue;
5632 
5633 			drm_sched_wqueue_stop(&ring->sched);
5634 
5635 			if (need_emergency_restart)
5636 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5637 		}
5638 		atomic_inc(&tmp_adev->gpu_reset_counter);
5639 	}
5640 }
5641 
5642 static int amdgpu_device_asic_reset(struct amdgpu_device *adev,
5643 			      struct list_head *device_list,
5644 			      struct amdgpu_reset_context *reset_context)
5645 {
5646 	struct amdgpu_device *tmp_adev = NULL;
5647 	int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
5648 	int r = 0;
5649 
5650 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5651 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5652 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5653 		/*TODO Should we stop ?*/
5654 		if (r) {
5655 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5656 				  r, adev_to_drm(tmp_adev)->unique);
5657 			tmp_adev->asic_reset_res = r;
5658 		}
5659 	}
5660 
5661 	/* Actual ASIC resets if needed.*/
5662 	/* Host driver will handle XGMI hive reset for SRIOV */
5663 	if (amdgpu_sriov_vf(adev)) {
5664 
5665 		/* Bail out of reset early */
5666 		if (amdgpu_ras_is_rma(adev))
5667 			return -ENODEV;
5668 
5669 		if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
5670 			dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
5671 			amdgpu_ras_set_fed(adev, true);
5672 			set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5673 		}
5674 
5675 		r = amdgpu_device_reset_sriov(adev, reset_context);
5676 		if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
5677 			amdgpu_virt_release_full_gpu(adev, true);
5678 			goto retry;
5679 		}
5680 		if (r)
5681 			adev->asic_reset_res = r;
5682 	} else {
5683 		r = amdgpu_do_asic_reset(device_list, reset_context);
5684 		if (r && r == -EAGAIN)
5685 			goto retry;
5686 	}
5687 
5688 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5689 		/*
5690 		 * Drop any pending non scheduler resets queued before reset is done.
5691 		 * Any reset scheduled after this point would be valid. Scheduler resets
5692 		 * were already dropped during drm_sched_stop and no new ones can come
5693 		 * in before drm_sched_start.
5694 		 */
5695 		amdgpu_device_stop_pending_resets(tmp_adev);
5696 	}
5697 
5698 	return r;
5699 }
5700 
5701 static int amdgpu_device_sched_resume(struct list_head *device_list,
5702 			      struct amdgpu_reset_context *reset_context,
5703 			      bool   job_signaled)
5704 {
5705 	struct amdgpu_device *tmp_adev = NULL;
5706 	int i, r = 0;
5707 
5708 	/* Post ASIC reset for all devs .*/
5709 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5710 
5711 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5712 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5713 
5714 			if (!amdgpu_ring_sched_ready(ring))
5715 				continue;
5716 
5717 			drm_sched_wqueue_start(&ring->sched);
5718 		}
5719 
5720 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5721 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5722 
5723 		if (tmp_adev->asic_reset_res) {
5724 			/* bad news, how to tell it to userspace ?
5725 			 * for ras error, we should report GPU bad status instead of
5726 			 * reset failure
5727 			 */
5728 			if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
5729 			    !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
5730 				dev_info(
5731 					tmp_adev->dev,
5732 					"GPU reset(%d) failed with error %d\n",
5733 					atomic_read(
5734 						&tmp_adev->gpu_reset_counter),
5735 					tmp_adev->asic_reset_res);
5736 			amdgpu_vf_error_put(tmp_adev,
5737 					    AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0,
5738 					    tmp_adev->asic_reset_res);
5739 			if (!r)
5740 				r = tmp_adev->asic_reset_res;
5741 			tmp_adev->asic_reset_res = 0;
5742 		} else {
5743 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n",
5744 				 atomic_read(&tmp_adev->gpu_reset_counter));
5745 			if (amdgpu_acpi_smart_shift_update(tmp_adev,
5746 							   AMDGPU_SS_DEV_D0))
5747 				dev_warn(tmp_adev->dev,
5748 					 "smart shift update failed\n");
5749 		}
5750 	}
5751 
5752 	return r;
5753 }
5754 
5755 static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
5756 			      struct list_head *device_list,
5757 			      bool   need_emergency_restart)
5758 {
5759 	struct amdgpu_device *tmp_adev = NULL;
5760 
5761 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5762 		/* unlock kfd: SRIOV would do it separately */
5763 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5764 			amdgpu_amdkfd_post_reset(tmp_adev);
5765 
5766 		/* kfd_post_reset will do nothing if kfd device is not initialized,
5767 		 * need to bring up kfd here if it's not be initialized before
5768 		 */
5769 		if (!adev->kfd.init_complete)
5770 			amdgpu_amdkfd_device_init(adev);
5771 
5772 		if (tmp_adev->pcie_reset_ctx.audio_suspended)
5773 			amdgpu_device_resume_display_audio(tmp_adev);
5774 
5775 		amdgpu_device_unset_mp1_state(tmp_adev);
5776 
5777 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5778 
5779 	}
5780 }
5781 
5782 
5783 /**
5784  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5785  *
5786  * @adev: amdgpu_device pointer
5787  * @job: which job trigger hang
5788  * @reset_context: amdgpu reset context pointer
5789  *
5790  * Attempt to reset the GPU if it has hung (all asics).
5791  * Attempt to do soft-reset or full-reset and reinitialize Asic
5792  * Returns 0 for success or an error on failure.
5793  */
5794 
5795 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5796 			      struct amdgpu_job *job,
5797 			      struct amdgpu_reset_context *reset_context)
5798 {
5799 	struct list_head device_list;
5800 	bool job_signaled = false;
5801 	struct amdgpu_hive_info *hive = NULL;
5802 	int r = 0;
5803 	bool need_emergency_restart = false;
5804 	/* save the pasid here as the job may be freed before the end of the reset */
5805 	int pasid = job ? job->pasid : -EINVAL;
5806 
5807 	/*
5808 	 * If it reaches here because of hang/timeout and a RAS error is
5809 	 * detected at the same time, let RAS recovery take care of it.
5810 	 */
5811 	if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
5812 	    !amdgpu_sriov_vf(adev) &&
5813 	    reset_context->src != AMDGPU_RESET_SRC_RAS) {
5814 		dev_dbg(adev->dev,
5815 			"Gpu recovery from source: %d yielding to RAS error recovery handling",
5816 			reset_context->src);
5817 		return 0;
5818 	}
5819 
5820 	/*
5821 	 * Special case: RAS triggered and full reset isn't supported
5822 	 */
5823 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5824 
5825 	/*
5826 	 * Flush RAM to disk so that after reboot
5827 	 * the user can read log and see why the system rebooted.
5828 	 */
5829 	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5830 		amdgpu_ras_get_context(adev)->reboot) {
5831 		dev_warn(adev->dev, "Emergency reboot.");
5832 
5833 		ksys_sync_helper();
5834 		emergency_restart();
5835 	}
5836 
5837 	dev_info(adev->dev, "GPU %s begin!. Source:  %d\n",
5838 		 need_emergency_restart ? "jobs stop" : "reset",
5839 		 reset_context->src);
5840 
5841 	if (!amdgpu_sriov_vf(adev))
5842 		hive = amdgpu_get_xgmi_hive(adev);
5843 	if (hive)
5844 		mutex_lock(&hive->hive_lock);
5845 
5846 	reset_context->job = job;
5847 	reset_context->hive = hive;
5848 	INIT_LIST_HEAD(&device_list);
5849 
5850 	amdgpu_device_recovery_prepare(adev, &device_list, hive);
5851 
5852 	if (!amdgpu_sriov_vf(adev)) {
5853 		r = amdgpu_device_health_check(&device_list);
5854 		if (r)
5855 			goto end_reset;
5856 	}
5857 
5858 	/* Cannot be called after locking reset domain */
5859 	amdgpu_ras_pre_reset(adev, &device_list);
5860 
5861 	/* We need to lock reset domain only once both for XGMI and single device */
5862 	amdgpu_device_recovery_get_reset_lock(adev, &device_list);
5863 
5864 	/* unmap all the mappings of doorbell and framebuffer to prevent user space from
5865 	 * accessing them
5866 	 */
5867 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
5868 	amdgpu_amdkfd_clear_kfd_mapping(adev);
5869 
5870 	amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
5871 				      hive, need_emergency_restart);
5872 	if (need_emergency_restart)
5873 		goto skip_sched_resume;
5874 	/*
5875 	 * Must check guilty signal here since after this point all old
5876 	 * HW fences are force signaled.
5877 	 *
5878 	 * job->base holds a reference to parent fence
5879 	 */
5880 	if (job && (dma_fence_get_status(&job->hw_fence->base) > 0)) {
5881 		job_signaled = true;
5882 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5883 		goto skip_hw_reset;
5884 	}
5885 
5886 	r = amdgpu_device_asic_reset(adev, &device_list, reset_context);
5887 	if (r)
5888 		goto reset_unlock;
5889 skip_hw_reset:
5890 	r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled);
5891 	if (r)
5892 		goto reset_unlock;
5893 skip_sched_resume:
5894 	amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart);
5895 reset_unlock:
5896 	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
5897 	amdgpu_ras_post_reset(adev, &device_list);
5898 end_reset:
5899 	if (hive) {
5900 		mutex_unlock(&hive->hive_lock);
5901 		amdgpu_put_xgmi_hive(hive);
5902 	}
5903 
5904 	if (r)
5905 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5906 
5907 	atomic_set(&adev->reset_domain->reset_res, r);
5908 
5909 	if (!r) {
5910 		struct amdgpu_task_info *ti = NULL;
5911 
5912 		/*
5913 		 * The job may already be freed at this point via the sched tdr workqueue so
5914 		 * use the cached pasid.
5915 		 */
5916 		if (pasid >= 0)
5917 			ti = amdgpu_vm_get_task_info_pasid(adev, pasid);
5918 
5919 		drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE,
5920 				     ti ? &ti->task : NULL);
5921 
5922 		amdgpu_vm_put_task_info(ti);
5923 	}
5924 
5925 	return r;
5926 }
5927 
5928 /**
5929  * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5930  *
5931  * @adev: amdgpu_device pointer
5932  * @speed: pointer to the speed of the link
5933  * @width: pointer to the width of the link
5934  *
5935  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5936  * first physical partner to an AMD dGPU.
5937  * This will exclude any virtual switches and links.
5938  */
5939 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5940 					    enum pci_bus_speed *speed,
5941 					    enum pcie_link_width *width)
5942 {
5943 	if (!speed || !width)
5944 		return;
5945 
5946 	*speed = PCI_SPEED_UNKNOWN;
5947 	*width = PCIE_LNK_WIDTH_UNKNOWN;
5948 
5949 	if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
5950 		struct pci_dev *parent = amdgpu_device_find_parent(adev);
5951 
5952 		if (parent) {
5953 			*speed = pcie_get_speed_cap(parent);
5954 			*width = pcie_get_width_cap(parent);
5955 		}
5956 	} else {
5957 		/* use the current speeds rather than max if switching is not supported */
5958 		pcie_bandwidth_available(adev->pdev, NULL, speed, width);
5959 	}
5960 }
5961 
5962 /**
5963  * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU
5964  *
5965  * @adev: amdgpu_device pointer
5966  * @speed: pointer to the speed of the link
5967  * @width: pointer to the width of the link
5968  *
5969  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5970  * AMD dGPU which may be a virtual upstream bridge.
5971  */
5972 static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev,
5973 					enum pci_bus_speed *speed,
5974 					enum pcie_link_width *width)
5975 {
5976 	struct pci_dev *parent = adev->pdev;
5977 
5978 	if (!speed || !width)
5979 		return;
5980 
5981 	/* use the device itself */
5982 	*speed = pcie_get_speed_cap(adev->pdev);
5983 	*width = pcie_get_width_cap(adev->pdev);
5984 
5985 	/* use the link outside the device */
5986 	parent = amdgpu_device_find_parent(adev);
5987 	if (parent) {
5988 		*speed = pcie_get_speed_cap(parent);
5989 		*width = pcie_get_width_cap(parent);
5990 	}
5991 }
5992 
5993 /**
5994  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5995  *
5996  * @adev: amdgpu_device pointer
5997  *
5998  * Fetches and stores in the driver the PCIE capabilities (gen speed
5999  * and lanes) of the slot the device is in. Handles APUs and
6000  * virtualized environments where PCIE config space may not be available.
6001  */
6002 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
6003 {
6004 	enum pci_bus_speed speed_cap, platform_speed_cap;
6005 	enum pcie_link_width platform_link_width, link_width;
6006 
6007 	if (amdgpu_pcie_gen_cap)
6008 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
6009 
6010 	if (amdgpu_pcie_lane_cap)
6011 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
6012 
6013 	/* covers APUs as well */
6014 	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
6015 		if (adev->pm.pcie_gen_mask == 0)
6016 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
6017 		if (adev->pm.pcie_mlw_mask == 0)
6018 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
6019 		return;
6020 	}
6021 
6022 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
6023 		return;
6024 
6025 	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
6026 					&platform_link_width);
6027 	amdgpu_device_gpu_bandwidth(adev, &speed_cap, &link_width);
6028 
6029 	if (adev->pm.pcie_gen_mask == 0) {
6030 		/* asic caps */
6031 		if (speed_cap == PCI_SPEED_UNKNOWN) {
6032 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6033 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6034 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6035 		} else {
6036 			if (speed_cap == PCIE_SPEED_32_0GT)
6037 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6038 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6039 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6040 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6041 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6042 			else if (speed_cap == PCIE_SPEED_16_0GT)
6043 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6044 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6045 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6046 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6047 			else if (speed_cap == PCIE_SPEED_8_0GT)
6048 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6049 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6050 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6051 			else if (speed_cap == PCIE_SPEED_5_0GT)
6052 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6053 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6054 			else
6055 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6056 		}
6057 		/* platform caps */
6058 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
6059 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6060 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6061 		} else {
6062 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
6063 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6064 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6065 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6066 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6067 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6068 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6069 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6070 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6071 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6072 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6073 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6074 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6075 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6076 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6077 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6078 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6079 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6080 			else
6081 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6082 
6083 		}
6084 	}
6085 	if (adev->pm.pcie_mlw_mask == 0) {
6086 		/* asic caps */
6087 		if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6088 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK;
6089 		} else {
6090 			switch (link_width) {
6091 			case PCIE_LNK_X32:
6092 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 |
6093 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6094 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6095 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6096 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6097 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6098 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6099 				break;
6100 			case PCIE_LNK_X16:
6101 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6102 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6103 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6104 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6105 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6106 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6107 				break;
6108 			case PCIE_LNK_X12:
6109 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6110 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6111 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6112 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6113 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6114 				break;
6115 			case PCIE_LNK_X8:
6116 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6117 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6118 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6119 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6120 				break;
6121 			case PCIE_LNK_X4:
6122 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6123 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6124 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6125 				break;
6126 			case PCIE_LNK_X2:
6127 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6128 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6129 				break;
6130 			case PCIE_LNK_X1:
6131 				adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1;
6132 				break;
6133 			default:
6134 				break;
6135 			}
6136 		}
6137 		/* platform caps */
6138 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6139 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6140 		} else {
6141 			switch (platform_link_width) {
6142 			case PCIE_LNK_X32:
6143 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6144 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6145 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6146 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6147 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6148 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6149 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6150 				break;
6151 			case PCIE_LNK_X16:
6152 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6153 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6154 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6155 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6156 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6157 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6158 				break;
6159 			case PCIE_LNK_X12:
6160 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6161 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6162 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6163 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6164 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6165 				break;
6166 			case PCIE_LNK_X8:
6167 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6168 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6169 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6170 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6171 				break;
6172 			case PCIE_LNK_X4:
6173 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6174 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6175 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6176 				break;
6177 			case PCIE_LNK_X2:
6178 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6179 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6180 				break;
6181 			case PCIE_LNK_X1:
6182 				adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6183 				break;
6184 			default:
6185 				break;
6186 			}
6187 		}
6188 	}
6189 }
6190 
6191 /**
6192  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6193  *
6194  * @adev: amdgpu_device pointer
6195  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6196  *
6197  * Return true if @peer_adev can access (DMA) @adev through the PCIe
6198  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6199  * @peer_adev.
6200  */
6201 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6202 				      struct amdgpu_device *peer_adev)
6203 {
6204 #ifdef CONFIG_HSA_AMD_P2P
6205 	bool p2p_access =
6206 		!adev->gmc.xgmi.connected_to_cpu &&
6207 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6208 	if (!p2p_access)
6209 		dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n",
6210 			pci_name(peer_adev->pdev));
6211 
6212 	bool is_large_bar = adev->gmc.visible_vram_size &&
6213 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
6214 	bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev);
6215 
6216 	if (!p2p_addressable) {
6217 		uint64_t address_mask = peer_adev->dev->dma_mask ?
6218 			~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6219 		resource_size_t aper_limit =
6220 			adev->gmc.aper_base + adev->gmc.aper_size - 1;
6221 
6222 		p2p_addressable = !(adev->gmc.aper_base & address_mask ||
6223 				     aper_limit & address_mask);
6224 	}
6225 	return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
6226 #else
6227 	return false;
6228 #endif
6229 }
6230 
6231 int amdgpu_device_baco_enter(struct amdgpu_device *adev)
6232 {
6233 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6234 
6235 	if (!amdgpu_device_supports_baco(adev))
6236 		return -ENOTSUPP;
6237 
6238 	if (ras && adev->ras_enabled &&
6239 	    adev->nbio.funcs->enable_doorbell_interrupt)
6240 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6241 
6242 	return amdgpu_dpm_baco_enter(adev);
6243 }
6244 
6245 int amdgpu_device_baco_exit(struct amdgpu_device *adev)
6246 {
6247 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6248 	int ret = 0;
6249 
6250 	if (!amdgpu_device_supports_baco(adev))
6251 		return -ENOTSUPP;
6252 
6253 	ret = amdgpu_dpm_baco_exit(adev);
6254 	if (ret)
6255 		return ret;
6256 
6257 	if (ras && adev->ras_enabled &&
6258 	    adev->nbio.funcs->enable_doorbell_interrupt)
6259 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6260 
6261 	if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
6262 	    adev->nbio.funcs->clear_doorbell_interrupt)
6263 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
6264 
6265 	return 0;
6266 }
6267 
6268 /**
6269  * amdgpu_pci_error_detected - Called when a PCI error is detected.
6270  * @pdev: PCI device struct
6271  * @state: PCI channel state
6272  *
6273  * Description: Called when a PCI error is detected.
6274  *
6275  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6276  */
6277 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6278 {
6279 	struct drm_device *dev = pci_get_drvdata(pdev);
6280 	struct amdgpu_device *adev = drm_to_adev(dev);
6281 	struct amdgpu_hive_info *hive __free(xgmi_put_hive) =
6282 		amdgpu_get_xgmi_hive(adev);
6283 	struct amdgpu_reset_context reset_context;
6284 	struct list_head device_list;
6285 
6286 	dev_info(adev->dev, "PCI error: detected callback!!\n");
6287 
6288 	adev->pci_channel_state = state;
6289 
6290 	switch (state) {
6291 	case pci_channel_io_normal:
6292 		dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state);
6293 		return PCI_ERS_RESULT_CAN_RECOVER;
6294 	case pci_channel_io_frozen:
6295 		/* Fatal error, prepare for slot reset */
6296 		dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
6297 		if (hive) {
6298 			/* Hive devices should be able to support FW based
6299 			 * link reset on other devices, if not return.
6300 			 */
6301 			if (!amdgpu_dpm_is_link_reset_supported(adev)) {
6302 				dev_warn(adev->dev,
6303 					 "No support for XGMI hive yet...\n");
6304 				return PCI_ERS_RESULT_DISCONNECT;
6305 			}
6306 			/* Set dpc status only if device is part of hive
6307 			 * Non-hive devices should be able to recover after
6308 			 * link reset.
6309 			 */
6310 			amdgpu_reset_set_dpc_status(adev, true);
6311 
6312 			mutex_lock(&hive->hive_lock);
6313 		} else {
6314 			if (amdgpu_device_bus_status_check(adev))
6315 				amdgpu_reset_set_dpc_status(adev, true);
6316 		}
6317 		memset(&reset_context, 0, sizeof(reset_context));
6318 		INIT_LIST_HEAD(&device_list);
6319 
6320 		amdgpu_device_recovery_prepare(adev, &device_list, hive);
6321 		amdgpu_device_recovery_get_reset_lock(adev, &device_list);
6322 		amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list,
6323 					      hive, false);
6324 		if (hive)
6325 			mutex_unlock(&hive->hive_lock);
6326 		return PCI_ERS_RESULT_NEED_RESET;
6327 	case pci_channel_io_perm_failure:
6328 		/* Permanent error, prepare for device removal */
6329 		dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state);
6330 		return PCI_ERS_RESULT_DISCONNECT;
6331 	}
6332 
6333 	return PCI_ERS_RESULT_NEED_RESET;
6334 }
6335 
6336 /**
6337  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6338  * @pdev: pointer to PCI device
6339  */
6340 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6341 {
6342 	struct drm_device *dev = pci_get_drvdata(pdev);
6343 	struct amdgpu_device *adev = drm_to_adev(dev);
6344 
6345 	dev_info(adev->dev, "PCI error: mmio enabled callback!!\n");
6346 
6347 	/* TODO - dump whatever for debugging purposes */
6348 
6349 	/* This called only if amdgpu_pci_error_detected returns
6350 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6351 	 * works, no need to reset slot.
6352 	 */
6353 
6354 	return PCI_ERS_RESULT_RECOVERED;
6355 }
6356 
6357 /**
6358  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6359  * @pdev: PCI device struct
6360  *
6361  * Description: This routine is called by the pci error recovery
6362  * code after the PCI slot has been reset, just before we
6363  * should resume normal operations.
6364  */
6365 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6366 {
6367 	struct drm_device *dev = pci_get_drvdata(pdev);
6368 	struct amdgpu_device *adev = drm_to_adev(dev);
6369 	struct amdgpu_reset_context reset_context;
6370 	struct amdgpu_device *tmp_adev;
6371 	struct amdgpu_hive_info *hive;
6372 	struct list_head device_list;
6373 	struct pci_dev *link_dev;
6374 	int r = 0, i, timeout;
6375 	u32 memsize;
6376 	u16 status;
6377 
6378 	dev_info(adev->dev, "PCI error: slot reset callback!!\n");
6379 
6380 	memset(&reset_context, 0, sizeof(reset_context));
6381 	INIT_LIST_HEAD(&device_list);
6382 	hive = amdgpu_get_xgmi_hive(adev);
6383 	if (hive) {
6384 		mutex_lock(&hive->hive_lock);
6385 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
6386 			list_add_tail(&tmp_adev->reset_list, &device_list);
6387 	} else {
6388 		list_add_tail(&adev->reset_list, &device_list);
6389 	}
6390 
6391 	if (adev->pcie_reset_ctx.swus)
6392 		link_dev = adev->pcie_reset_ctx.swus;
6393 	else
6394 		link_dev = adev->pdev;
6395 	/* wait for asic to come out of reset, timeout = 10s */
6396 	timeout = 10000;
6397 	do {
6398 		usleep_range(10000, 10500);
6399 		r = pci_read_config_word(link_dev, PCI_VENDOR_ID, &status);
6400 		timeout -= 10;
6401 	} while (timeout > 0 && (status != PCI_VENDOR_ID_ATI) &&
6402 		 (status != PCI_VENDOR_ID_AMD));
6403 
6404 	if ((status != PCI_VENDOR_ID_ATI) && (status != PCI_VENDOR_ID_AMD)) {
6405 		r = -ETIME;
6406 		goto out;
6407 	}
6408 
6409 	amdgpu_device_load_switch_state(adev);
6410 	/* Restore PCI confspace */
6411 	amdgpu_device_load_pci_state(pdev);
6412 
6413 	/* confirm  ASIC came out of reset */
6414 	for (i = 0; i < adev->usec_timeout; i++) {
6415 		memsize = amdgpu_asic_get_config_memsize(adev);
6416 
6417 		if (memsize != 0xffffffff)
6418 			break;
6419 		udelay(1);
6420 	}
6421 	if (memsize == 0xffffffff) {
6422 		r = -ETIME;
6423 		goto out;
6424 	}
6425 
6426 	reset_context.method = AMD_RESET_METHOD_NONE;
6427 	reset_context.reset_req_dev = adev;
6428 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6429 	set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
6430 
6431 	if (hive) {
6432 		reset_context.hive = hive;
6433 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
6434 			tmp_adev->pcie_reset_ctx.in_link_reset = true;
6435 	} else {
6436 		adev->pcie_reset_ctx.in_link_reset = true;
6437 		set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6438 	}
6439 
6440 	r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
6441 out:
6442 	if (!r) {
6443 		if (amdgpu_device_cache_pci_state(adev->pdev))
6444 			pci_restore_state(adev->pdev);
6445 		dev_info(adev->dev, "PCIe error recovery succeeded\n");
6446 	} else {
6447 		dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r);
6448 		if (hive) {
6449 			list_for_each_entry(tmp_adev, &device_list, reset_list)
6450 				amdgpu_device_unset_mp1_state(tmp_adev);
6451 		}
6452 		amdgpu_device_recovery_put_reset_lock(adev, &device_list);
6453 	}
6454 
6455 	if (hive) {
6456 		mutex_unlock(&hive->hive_lock);
6457 		amdgpu_put_xgmi_hive(hive);
6458 	}
6459 
6460 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6461 }
6462 
6463 /**
6464  * amdgpu_pci_resume() - resume normal ops after PCI reset
6465  * @pdev: pointer to PCI device
6466  *
6467  * Called when the error recovery driver tells us that its
6468  * OK to resume normal operation.
6469  */
6470 void amdgpu_pci_resume(struct pci_dev *pdev)
6471 {
6472 	struct drm_device *dev = pci_get_drvdata(pdev);
6473 	struct amdgpu_device *adev = drm_to_adev(dev);
6474 	struct list_head device_list;
6475 	struct amdgpu_hive_info *hive = NULL;
6476 	struct amdgpu_device *tmp_adev = NULL;
6477 
6478 	dev_info(adev->dev, "PCI error: resume callback!!\n");
6479 
6480 	/* Only continue execution for the case of pci_channel_io_frozen */
6481 	if (adev->pci_channel_state != pci_channel_io_frozen)
6482 		return;
6483 
6484 	INIT_LIST_HEAD(&device_list);
6485 
6486 	hive = amdgpu_get_xgmi_hive(adev);
6487 	if (hive) {
6488 		mutex_lock(&hive->hive_lock);
6489 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
6490 			tmp_adev->pcie_reset_ctx.in_link_reset = false;
6491 			list_add_tail(&tmp_adev->reset_list, &device_list);
6492 		}
6493 	} else {
6494 		adev->pcie_reset_ctx.in_link_reset = false;
6495 		list_add_tail(&adev->reset_list, &device_list);
6496 	}
6497 	amdgpu_device_sched_resume(&device_list, NULL, NULL);
6498 	amdgpu_device_gpu_resume(adev, &device_list, false);
6499 	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
6500 
6501 	if (hive) {
6502 		mutex_unlock(&hive->hive_lock);
6503 		amdgpu_put_xgmi_hive(hive);
6504 	}
6505 }
6506 
6507 static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev)
6508 {
6509 	struct pci_dev *swus, *swds;
6510 	int r;
6511 
6512 	swds = pci_upstream_bridge(adev->pdev);
6513 	if (!swds || swds->vendor != PCI_VENDOR_ID_ATI ||
6514 	    pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM)
6515 		return;
6516 	swus = pci_upstream_bridge(swds);
6517 	if (!swus ||
6518 	    (swus->vendor != PCI_VENDOR_ID_ATI &&
6519 	     swus->vendor != PCI_VENDOR_ID_AMD) ||
6520 	    pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM)
6521 		return;
6522 
6523 	/* If already saved, return */
6524 	if (adev->pcie_reset_ctx.swus)
6525 		return;
6526 	/* Upstream bridge is ATI, assume it's SWUS/DS architecture */
6527 	r = pci_save_state(swds);
6528 	if (r)
6529 		return;
6530 	adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds);
6531 
6532 	r = pci_save_state(swus);
6533 	if (r)
6534 		return;
6535 	adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus);
6536 
6537 	adev->pcie_reset_ctx.swus = swus;
6538 }
6539 
6540 static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)
6541 {
6542 	struct pci_dev *pdev;
6543 	int r;
6544 
6545 	if (!adev->pcie_reset_ctx.swds_pcistate ||
6546 	    !adev->pcie_reset_ctx.swus_pcistate)
6547 		return;
6548 
6549 	pdev = adev->pcie_reset_ctx.swus;
6550 	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swus_pcistate);
6551 	if (!r) {
6552 		pci_restore_state(pdev);
6553 	} else {
6554 		dev_warn(adev->dev, "Failed to load SWUS state, err:%d\n", r);
6555 		return;
6556 	}
6557 
6558 	pdev = pci_upstream_bridge(adev->pdev);
6559 	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swds_pcistate);
6560 	if (!r)
6561 		pci_restore_state(pdev);
6562 	else
6563 		dev_warn(adev->dev, "Failed to load SWDS state, err:%d\n", r);
6564 }
6565 
6566 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6567 {
6568 	struct drm_device *dev = pci_get_drvdata(pdev);
6569 	struct amdgpu_device *adev = drm_to_adev(dev);
6570 	int r;
6571 
6572 	if (amdgpu_sriov_vf(adev))
6573 		return false;
6574 
6575 	r = pci_save_state(pdev);
6576 	if (!r) {
6577 		kfree(adev->pci_state);
6578 
6579 		adev->pci_state = pci_store_saved_state(pdev);
6580 
6581 		if (!adev->pci_state) {
6582 			dev_err(adev->dev, "Failed to store PCI saved state");
6583 			return false;
6584 		}
6585 	} else {
6586 		dev_warn(adev->dev, "Failed to save PCI state, err:%d\n", r);
6587 		return false;
6588 	}
6589 
6590 	amdgpu_device_cache_switch_state(adev);
6591 
6592 	return true;
6593 }
6594 
6595 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6596 {
6597 	struct drm_device *dev = pci_get_drvdata(pdev);
6598 	struct amdgpu_device *adev = drm_to_adev(dev);
6599 	int r;
6600 
6601 	if (!adev->pci_state)
6602 		return false;
6603 
6604 	r = pci_load_saved_state(pdev, adev->pci_state);
6605 
6606 	if (!r) {
6607 		pci_restore_state(pdev);
6608 	} else {
6609 		dev_warn(adev->dev, "Failed to load PCI state, err:%d\n", r);
6610 		return false;
6611 	}
6612 
6613 	return true;
6614 }
6615 
6616 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6617 		struct amdgpu_ring *ring)
6618 {
6619 #ifdef CONFIG_X86_64
6620 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6621 		return;
6622 #endif
6623 	if (adev->gmc.xgmi.connected_to_cpu)
6624 		return;
6625 
6626 	if (ring && ring->funcs->emit_hdp_flush) {
6627 		amdgpu_ring_emit_hdp_flush(ring);
6628 		return;
6629 	}
6630 
6631 	if (!ring && amdgpu_sriov_runtime(adev)) {
6632 		if (!amdgpu_kiq_hdp_flush(adev))
6633 			return;
6634 	}
6635 
6636 	amdgpu_hdp_flush(adev, ring);
6637 }
6638 
6639 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6640 		struct amdgpu_ring *ring)
6641 {
6642 #ifdef CONFIG_X86_64
6643 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6644 		return;
6645 #endif
6646 	if (adev->gmc.xgmi.connected_to_cpu)
6647 		return;
6648 
6649 	amdgpu_hdp_invalidate(adev, ring);
6650 }
6651 
6652 int amdgpu_in_reset(struct amdgpu_device *adev)
6653 {
6654 	return atomic_read(&adev->reset_domain->in_gpu_reset);
6655 }
6656 
6657 /**
6658  * amdgpu_device_halt() - bring hardware to some kind of halt state
6659  *
6660  * @adev: amdgpu_device pointer
6661  *
6662  * Bring hardware to some kind of halt state so that no one can touch it
6663  * any more. It will help to maintain error context when error occurred.
6664  * Compare to a simple hang, the system will keep stable at least for SSH
6665  * access. Then it should be trivial to inspect the hardware state and
6666  * see what's going on. Implemented as following:
6667  *
6668  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6669  *    clears all CPU mappings to device, disallows remappings through page faults
6670  * 2. amdgpu_irq_disable_all() disables all interrupts
6671  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6672  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6673  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6674  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6675  *    flush any in flight DMA operations
6676  */
6677 void amdgpu_device_halt(struct amdgpu_device *adev)
6678 {
6679 	struct pci_dev *pdev = adev->pdev;
6680 	struct drm_device *ddev = adev_to_drm(adev);
6681 
6682 	amdgpu_xcp_dev_unplug(adev);
6683 	drm_dev_unplug(ddev);
6684 
6685 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
6686 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
6687 
6688 	amdgpu_irq_disable_all(adev);
6689 
6690 	amdgpu_fence_driver_hw_fini(adev);
6691 
6692 	adev->no_hw_access = true;
6693 
6694 	amdgpu_device_unmap_mmio(adev);
6695 
6696 	pci_disable_device(pdev);
6697 	pci_wait_for_pending_transaction(pdev);
6698 }
6699 
6700 /**
6701  * amdgpu_device_get_gang - return a reference to the current gang
6702  * @adev: amdgpu_device pointer
6703  *
6704  * Returns: A new reference to the current gang leader.
6705  */
6706 struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
6707 {
6708 	struct dma_fence *fence;
6709 
6710 	rcu_read_lock();
6711 	fence = dma_fence_get_rcu_safe(&adev->gang_submit);
6712 	rcu_read_unlock();
6713 	return fence;
6714 }
6715 
6716 /**
6717  * amdgpu_device_switch_gang - switch to a new gang
6718  * @adev: amdgpu_device pointer
6719  * @gang: the gang to switch to
6720  *
6721  * Try to switch to a new gang.
6722  * Returns: NULL if we switched to the new gang or a reference to the current
6723  * gang leader.
6724  */
6725 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6726 					    struct dma_fence *gang)
6727 {
6728 	struct dma_fence *old = NULL;
6729 
6730 	dma_fence_get(gang);
6731 	do {
6732 		dma_fence_put(old);
6733 		old = amdgpu_device_get_gang(adev);
6734 		if (old == gang)
6735 			break;
6736 
6737 		if (!dma_fence_is_signaled(old)) {
6738 			dma_fence_put(gang);
6739 			return old;
6740 		}
6741 
6742 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6743 			 old, gang) != old);
6744 
6745 	/*
6746 	 * Drop it once for the exchanged reference in adev and once for the
6747 	 * thread local reference acquired in amdgpu_device_get_gang().
6748 	 */
6749 	dma_fence_put(old);
6750 	dma_fence_put(old);
6751 	return NULL;
6752 }
6753 
6754 /**
6755  * amdgpu_device_enforce_isolation - enforce HW isolation
6756  * @adev: the amdgpu device pointer
6757  * @ring: the HW ring the job is supposed to run on
6758  * @job: the job which is about to be pushed to the HW ring
6759  *
6760  * Makes sure that only one client at a time can use the GFX block.
6761  * Returns: The dependency to wait on before the job can be pushed to the HW.
6762  * The function is called multiple times until NULL is returned.
6763  */
6764 struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
6765 						  struct amdgpu_ring *ring,
6766 						  struct amdgpu_job *job)
6767 {
6768 	struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
6769 	struct drm_sched_fence *f = job->base.s_fence;
6770 	struct dma_fence *dep;
6771 	void *owner;
6772 	int r;
6773 
6774 	/*
6775 	 * For now enforce isolation only for the GFX block since we only need
6776 	 * the cleaner shader on those rings.
6777 	 */
6778 	if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
6779 	    ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6780 		return NULL;
6781 
6782 	/*
6783 	 * All submissions where enforce isolation is false are handled as if
6784 	 * they come from a single client. Use ~0l as the owner to distinct it
6785 	 * from kernel submissions where the owner is NULL.
6786 	 */
6787 	owner = job->enforce_isolation ? f->owner : (void *)~0l;
6788 
6789 	mutex_lock(&adev->enforce_isolation_mutex);
6790 
6791 	/*
6792 	 * The "spearhead" submission is the first one which changes the
6793 	 * ownership to its client. We always need to wait for it to be
6794 	 * pushed to the HW before proceeding with anything.
6795 	 */
6796 	if (&f->scheduled != isolation->spearhead &&
6797 	    !dma_fence_is_signaled(isolation->spearhead)) {
6798 		dep = isolation->spearhead;
6799 		goto out_grab_ref;
6800 	}
6801 
6802 	if (isolation->owner != owner) {
6803 
6804 		/*
6805 		 * Wait for any gang to be assembled before switching to a
6806 		 * different owner or otherwise we could deadlock the
6807 		 * submissions.
6808 		 */
6809 		if (!job->gang_submit) {
6810 			dep = amdgpu_device_get_gang(adev);
6811 			if (!dma_fence_is_signaled(dep))
6812 				goto out_return_dep;
6813 			dma_fence_put(dep);
6814 		}
6815 
6816 		dma_fence_put(isolation->spearhead);
6817 		isolation->spearhead = dma_fence_get(&f->scheduled);
6818 		amdgpu_sync_move(&isolation->active, &isolation->prev);
6819 		trace_amdgpu_isolation(isolation->owner, owner);
6820 		isolation->owner = owner;
6821 	}
6822 
6823 	/*
6824 	 * Specifying the ring here helps to pipeline submissions even when
6825 	 * isolation is enabled. If that is not desired for testing NULL can be
6826 	 * used instead of the ring to enforce a CPU round trip while switching
6827 	 * between clients.
6828 	 */
6829 	dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
6830 	r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
6831 	if (r)
6832 		dev_warn(adev->dev, "OOM tracking isolation\n");
6833 
6834 out_grab_ref:
6835 	dma_fence_get(dep);
6836 out_return_dep:
6837 	mutex_unlock(&adev->enforce_isolation_mutex);
6838 	return dep;
6839 }
6840 
6841 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6842 {
6843 	switch (adev->asic_type) {
6844 #ifdef CONFIG_DRM_AMDGPU_SI
6845 	case CHIP_HAINAN:
6846 #endif
6847 	case CHIP_TOPAZ:
6848 		/* chips with no display hardware */
6849 		return false;
6850 #ifdef CONFIG_DRM_AMDGPU_SI
6851 	case CHIP_TAHITI:
6852 	case CHIP_PITCAIRN:
6853 	case CHIP_VERDE:
6854 	case CHIP_OLAND:
6855 #endif
6856 #ifdef CONFIG_DRM_AMDGPU_CIK
6857 	case CHIP_BONAIRE:
6858 	case CHIP_HAWAII:
6859 	case CHIP_KAVERI:
6860 	case CHIP_KABINI:
6861 	case CHIP_MULLINS:
6862 #endif
6863 	case CHIP_TONGA:
6864 	case CHIP_FIJI:
6865 	case CHIP_POLARIS10:
6866 	case CHIP_POLARIS11:
6867 	case CHIP_POLARIS12:
6868 	case CHIP_VEGAM:
6869 	case CHIP_CARRIZO:
6870 	case CHIP_STONEY:
6871 		/* chips with display hardware */
6872 		return true;
6873 	default:
6874 		/* IP discovery */
6875 		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6876 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6877 			return false;
6878 		return true;
6879 	}
6880 }
6881 
6882 ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring)
6883 {
6884 	ssize_t size = 0;
6885 
6886 	if (!ring || !ring->adev)
6887 		return size;
6888 
6889 	if (amdgpu_device_should_recover_gpu(ring->adev))
6890 		size |= AMDGPU_RESET_TYPE_FULL;
6891 
6892 	if (unlikely(!ring->adev->debug_disable_soft_recovery) &&
6893 	    !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery)
6894 		size |= AMDGPU_RESET_TYPE_SOFT_RESET;
6895 
6896 	return size;
6897 }
6898 
6899 ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset)
6900 {
6901 	ssize_t size = 0;
6902 
6903 	if (supported_reset == 0) {
6904 		size += sysfs_emit_at(buf, size, "unsupported");
6905 		size += sysfs_emit_at(buf, size, "\n");
6906 		return size;
6907 
6908 	}
6909 
6910 	if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET)
6911 		size += sysfs_emit_at(buf, size, "soft ");
6912 
6913 	if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
6914 		size += sysfs_emit_at(buf, size, "queue ");
6915 
6916 	if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)
6917 		size += sysfs_emit_at(buf, size, "pipe ");
6918 
6919 	if (supported_reset & AMDGPU_RESET_TYPE_FULL)
6920 		size += sysfs_emit_at(buf, size, "full ");
6921 
6922 	size += sysfs_emit_at(buf, size, "\n");
6923 	return size;
6924 }
6925 
6926 void amdgpu_device_set_uid(struct amdgpu_uid *uid_info,
6927 			   enum amdgpu_uid_type type, uint8_t inst,
6928 			   uint64_t uid)
6929 {
6930 	if (!uid_info)
6931 		return;
6932 
6933 	if (type >= AMDGPU_UID_TYPE_MAX) {
6934 		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
6935 			     type);
6936 		return;
6937 	}
6938 
6939 	if (inst >= AMDGPU_UID_INST_MAX) {
6940 		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
6941 			     inst);
6942 		return;
6943 	}
6944 
6945 	if (uid_info->uid[type][inst] != 0) {
6946 		dev_warn_once(
6947 			uid_info->adev->dev,
6948 			"Overwriting existing UID %llu for type %d instance %d\n",
6949 			uid_info->uid[type][inst], type, inst);
6950 	}
6951 
6952 	uid_info->uid[type][inst] = uid;
6953 }
6954 
6955 u64 amdgpu_device_get_uid(struct amdgpu_uid *uid_info,
6956 			  enum amdgpu_uid_type type, uint8_t inst)
6957 {
6958 	if (!uid_info)
6959 		return 0;
6960 
6961 	if (type >= AMDGPU_UID_TYPE_MAX) {
6962 		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
6963 			     type);
6964 		return 0;
6965 	}
6966 
6967 	if (inst >= AMDGPU_UID_INST_MAX) {
6968 		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
6969 			     inst);
6970 		return 0;
6971 	}
6972 
6973 	return uid_info->uid[type][inst];
6974 }
6975