xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c (revision 82502db448aa9eca2c8d8ee8b424f37aa9ce3ac0)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 
29 #include <linux/aperture.h>
30 #include <linux/power_supply.h>
31 #include <linux/kthread.h>
32 #include <linux/module.h>
33 #include <linux/console.h>
34 #include <linux/slab.h>
35 #include <linux/iommu.h>
36 #include <linux/pci.h>
37 #include <linux/pci-p2pdma.h>
38 #include <linux/apple-gmux.h>
39 #include <linux/nospec.h>
40 
41 #include <drm/drm_atomic_helper.h>
42 #include <drm/drm_client_event.h>
43 #include <drm/drm_crtc_helper.h>
44 #include <drm/drm_probe_helper.h>
45 #include <drm/amdgpu_drm.h>
46 #include <linux/device.h>
47 #include <linux/vgaarb.h>
48 #include <linux/vga_switcheroo.h>
49 #include <linux/efi.h>
50 #include "amdgpu.h"
51 #include "amdgpu_trace.h"
52 #include "amdgpu_i2c.h"
53 #include "atom.h"
54 #include "amdgpu_atombios.h"
55 #include "amdgpu_atomfirmware.h"
56 #include "amd_pcie.h"
57 #ifdef CONFIG_DRM_AMDGPU_SI
58 #include "si.h"
59 #endif
60 #ifdef CONFIG_DRM_AMDGPU_CIK
61 #include "cik.h"
62 #endif
63 #include "vi.h"
64 #include "soc15.h"
65 #include "nv.h"
66 #include "bif/bif_4_1_d.h"
67 #include <linux/firmware.h>
68 #include "amdgpu_vf_error.h"
69 
70 #include "amdgpu_amdkfd.h"
71 #include "amdgpu_pm.h"
72 
73 #include "amdgpu_xgmi.h"
74 #include "amdgpu_ras.h"
75 #include "amdgpu_ras_mgr.h"
76 #include "amdgpu_pmu.h"
77 #include "amdgpu_fru_eeprom.h"
78 #include "amdgpu_reset.h"
79 #include "amdgpu_virt.h"
80 #include "amdgpu_dev_coredump.h"
81 
82 #include <linux/suspend.h>
83 #include <drm/task_barrier.h>
84 #include <linux/pm_runtime.h>
85 
86 #include <drm/drm_drv.h>
87 
88 #if IS_ENABLED(CONFIG_X86)
89 #include <asm/intel-family.h>
90 #include <asm/cpu_device_id.h>
91 #endif
92 
93 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
95 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
97 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
98 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
99 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
100 MODULE_FIRMWARE("amdgpu/cyan_skillfish_gpu_info.bin");
101 
102 #define AMDGPU_RESUME_MS		2000
103 #define AMDGPU_MAX_RETRY_LIMIT		2
104 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
105 #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
106 #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
107 #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
108 
109 #define AMDGPU_VBIOS_SKIP (1U << 0)
110 #define AMDGPU_VBIOS_OPTIONAL (1U << 1)
111 
112 static const struct drm_driver amdgpu_kms_driver;
113 
114 const char *amdgpu_asic_name[] = {
115 	"TAHITI",
116 	"PITCAIRN",
117 	"VERDE",
118 	"OLAND",
119 	"HAINAN",
120 	"BONAIRE",
121 	"KAVERI",
122 	"KABINI",
123 	"HAWAII",
124 	"MULLINS",
125 	"TOPAZ",
126 	"TONGA",
127 	"FIJI",
128 	"CARRIZO",
129 	"STONEY",
130 	"POLARIS10",
131 	"POLARIS11",
132 	"POLARIS12",
133 	"VEGAM",
134 	"VEGA10",
135 	"VEGA12",
136 	"VEGA20",
137 	"RAVEN",
138 	"ARCTURUS",
139 	"RENOIR",
140 	"ALDEBARAN",
141 	"NAVI10",
142 	"CYAN_SKILLFISH",
143 	"NAVI14",
144 	"NAVI12",
145 	"SIENNA_CICHLID",
146 	"NAVY_FLOUNDER",
147 	"VANGOGH",
148 	"DIMGREY_CAVEFISH",
149 	"BEIGE_GOBY",
150 	"YELLOW_CARP",
151 	"IP DISCOVERY",
152 	"LAST",
153 };
154 
155 #define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM  - 1, 0)
156 /*
157  * Default init level where all blocks are expected to be initialized. This is
158  * the level of initialization expected by default and also after a full reset
159  * of the device.
160  */
161 struct amdgpu_init_level amdgpu_init_default = {
162 	.level = AMDGPU_INIT_LEVEL_DEFAULT,
163 	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
164 };
165 
166 struct amdgpu_init_level amdgpu_init_recovery = {
167 	.level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
168 	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
169 };
170 
171 /*
172  * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
173  * is used for cases like reset on initialization where the entire hive needs to
174  * be reset before first use.
175  */
176 struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
177 	.level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
178 	.hwini_ip_block_mask =
179 		BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
180 		BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) |
181 		BIT(AMD_IP_BLOCK_TYPE_PSP)
182 };
183 
184 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev);
185 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev);
186 static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev);
187 
188 static void amdgpu_device_load_switch_state(struct amdgpu_device *adev);
189 
190 static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
191 					     enum amd_ip_block_type block)
192 {
193 	return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
194 }
195 
196 void amdgpu_set_init_level(struct amdgpu_device *adev,
197 			   enum amdgpu_init_lvl_id lvl)
198 {
199 	switch (lvl) {
200 	case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
201 		adev->init_lvl = &amdgpu_init_minimal_xgmi;
202 		break;
203 	case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
204 		adev->init_lvl = &amdgpu_init_recovery;
205 		break;
206 	case AMDGPU_INIT_LEVEL_DEFAULT:
207 		fallthrough;
208 	default:
209 		adev->init_lvl = &amdgpu_init_default;
210 		break;
211 	}
212 }
213 
214 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
215 static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
216 				     void *data);
217 
218 /**
219  * DOC: pcie_replay_count
220  *
221  * The amdgpu driver provides a sysfs API for reporting the total number
222  * of PCIe replays (NAKs).
223  * The file pcie_replay_count is used for this and returns the total
224  * number of replays as a sum of the NAKs generated and NAKs received.
225  */
226 
227 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
228 		struct device_attribute *attr, char *buf)
229 {
230 	struct drm_device *ddev = dev_get_drvdata(dev);
231 	struct amdgpu_device *adev = drm_to_adev(ddev);
232 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
233 
234 	return sysfs_emit(buf, "%llu\n", cnt);
235 }
236 
237 static DEVICE_ATTR(pcie_replay_count, 0444,
238 		amdgpu_device_get_pcie_replay_count, NULL);
239 
240 static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
241 {
242 	int ret = 0;
243 
244 	if (amdgpu_nbio_is_replay_cnt_supported(adev))
245 		ret = sysfs_create_file(&adev->dev->kobj,
246 					&dev_attr_pcie_replay_count.attr);
247 
248 	return ret;
249 }
250 
251 static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
252 {
253 	if (amdgpu_nbio_is_replay_cnt_supported(adev))
254 		sysfs_remove_file(&adev->dev->kobj,
255 				  &dev_attr_pcie_replay_count.attr);
256 }
257 
258 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
259 					  const struct bin_attribute *attr, char *buf,
260 					  loff_t ppos, size_t count)
261 {
262 	struct device *dev = kobj_to_dev(kobj);
263 	struct drm_device *ddev = dev_get_drvdata(dev);
264 	struct amdgpu_device *adev = drm_to_adev(ddev);
265 	ssize_t bytes_read;
266 
267 	switch (ppos) {
268 	case AMDGPU_SYS_REG_STATE_XGMI:
269 		bytes_read = amdgpu_asic_get_reg_state(
270 			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
271 		break;
272 	case AMDGPU_SYS_REG_STATE_WAFL:
273 		bytes_read = amdgpu_asic_get_reg_state(
274 			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
275 		break;
276 	case AMDGPU_SYS_REG_STATE_PCIE:
277 		bytes_read = amdgpu_asic_get_reg_state(
278 			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
279 		break;
280 	case AMDGPU_SYS_REG_STATE_USR:
281 		bytes_read = amdgpu_asic_get_reg_state(
282 			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
283 		break;
284 	case AMDGPU_SYS_REG_STATE_USR_1:
285 		bytes_read = amdgpu_asic_get_reg_state(
286 			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
287 		break;
288 	default:
289 		return -EINVAL;
290 	}
291 
292 	return bytes_read;
293 }
294 
295 static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
296 		      AMDGPU_SYS_REG_STATE_END);
297 
298 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
299 {
300 	int ret;
301 
302 	if (!amdgpu_asic_get_reg_state_supported(adev))
303 		return 0;
304 
305 	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
306 
307 	return ret;
308 }
309 
310 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
311 {
312 	if (!amdgpu_asic_get_reg_state_supported(adev))
313 		return;
314 	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
315 }
316 
317 /**
318  * DOC: board_info
319  *
320  * The amdgpu driver provides a sysfs API for giving board related information.
321  * It provides the form factor information in the format
322  *
323  *   type : form factor
324  *
325  * Possible form factor values
326  *
327  * - "cem"		- PCIE CEM card
328  * - "oam"		- Open Compute Accelerator Module
329  * - "unknown"	- Not known
330  *
331  */
332 
333 static ssize_t amdgpu_device_get_board_info(struct device *dev,
334 					    struct device_attribute *attr,
335 					    char *buf)
336 {
337 	struct drm_device *ddev = dev_get_drvdata(dev);
338 	struct amdgpu_device *adev = drm_to_adev(ddev);
339 	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
340 	const char *pkg;
341 
342 	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
343 		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
344 
345 	switch (pkg_type) {
346 	case AMDGPU_PKG_TYPE_CEM:
347 		pkg = "cem";
348 		break;
349 	case AMDGPU_PKG_TYPE_OAM:
350 		pkg = "oam";
351 		break;
352 	default:
353 		pkg = "unknown";
354 		break;
355 	}
356 
357 	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
358 }
359 
360 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
361 
362 static struct attribute *amdgpu_board_attrs[] = {
363 	&dev_attr_board_info.attr,
364 	NULL,
365 };
366 
367 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
368 					     struct attribute *attr, int n)
369 {
370 	struct device *dev = kobj_to_dev(kobj);
371 	struct drm_device *ddev = dev_get_drvdata(dev);
372 	struct amdgpu_device *adev = drm_to_adev(ddev);
373 
374 	if (adev->flags & AMD_IS_APU)
375 		return 0;
376 
377 	return attr->mode;
378 }
379 
380 static const struct attribute_group amdgpu_board_attrs_group = {
381 	.attrs = amdgpu_board_attrs,
382 	.is_visible = amdgpu_board_attrs_is_visible
383 };
384 
385 /**
386  * DOC: uma/carveout_options
387  *
388  * This is a read-only file that lists all available UMA allocation
389  * options and their corresponding indices. Example output::
390  *
391  *     $ cat uma/carveout_options
392  *     0: Minimum (512 MB)
393  *     1:  (1 GB)
394  *     2:  (2 GB)
395  *     3:  (4 GB)
396  *     4:  (6 GB)
397  *     5:  (8 GB)
398  *     6:  (12 GB)
399  *     7: Medium (16 GB)
400  *     8:  (24 GB)
401  *     9: High (32 GB)
402  */
403 static ssize_t carveout_options_show(struct device *dev,
404 				     struct device_attribute *attr,
405 				     char *buf)
406 {
407 	struct drm_device *ddev = dev_get_drvdata(dev);
408 	struct amdgpu_device *adev = drm_to_adev(ddev);
409 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
410 	uint32_t memory_carved;
411 	ssize_t size = 0;
412 
413 	if (!uma_info || !uma_info->num_entries)
414 		return -ENODEV;
415 
416 	for (int i = 0; i < uma_info->num_entries; i++) {
417 		memory_carved = uma_info->entries[i].memory_carved_mb;
418 		if (memory_carved >= SZ_1G/SZ_1M) {
419 			size += sysfs_emit_at(buf, size, "%d: %s (%u GB)\n",
420 					      i,
421 					      uma_info->entries[i].name,
422 					      memory_carved >> 10);
423 		} else {
424 			size += sysfs_emit_at(buf, size, "%d: %s (%u MB)\n",
425 					      i,
426 					      uma_info->entries[i].name,
427 					      memory_carved);
428 		}
429 	}
430 
431 	return size;
432 }
433 static DEVICE_ATTR_RO(carveout_options);
434 
435 /**
436  * DOC: uma/carveout
437  *
438  * This file is both readable and writable. When read, it shows the
439  * index of the current setting. Writing a valid index to this file
440  * allows users to change the UMA carveout size to the selected option
441  * on the next boot.
442  *
443  * The available options and their corresponding indices can be read
444  * from the uma/carveout_options file.
445  */
446 static ssize_t carveout_show(struct device *dev,
447 			     struct device_attribute *attr,
448 			     char *buf)
449 {
450 	struct drm_device *ddev = dev_get_drvdata(dev);
451 	struct amdgpu_device *adev = drm_to_adev(ddev);
452 
453 	return sysfs_emit(buf, "%u\n", adev->uma_info.uma_option_index);
454 }
455 
456 static ssize_t carveout_store(struct device *dev,
457 			      struct device_attribute *attr,
458 			      const char *buf, size_t count)
459 {
460 	struct drm_device *ddev = dev_get_drvdata(dev);
461 	struct amdgpu_device *adev = drm_to_adev(ddev);
462 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
463 	struct amdgpu_uma_carveout_option *opt;
464 	unsigned long val;
465 	uint8_t flags;
466 	int r;
467 
468 	r = kstrtoul(buf, 10, &val);
469 	if (r)
470 		return r;
471 
472 	if (val >= uma_info->num_entries)
473 		return -EINVAL;
474 
475 	val = array_index_nospec(val, uma_info->num_entries);
476 	opt = &uma_info->entries[val];
477 
478 	if (!(opt->flags & AMDGPU_UMA_FLAG_AUTO) &&
479 	    !(opt->flags & AMDGPU_UMA_FLAG_CUSTOM)) {
480 		drm_err_once(ddev, "Option %lu not supported due to lack of Custom/Auto flag", val);
481 		return -EINVAL;
482 	}
483 
484 	flags = opt->flags;
485 	flags &= ~((flags & AMDGPU_UMA_FLAG_AUTO) >> 1);
486 
487 	guard(mutex)(&uma_info->update_lock);
488 
489 	r = amdgpu_acpi_set_uma_allocation_size(adev, val, flags);
490 	if (r)
491 		return r;
492 
493 	uma_info->uma_option_index = val;
494 
495 	return count;
496 }
497 static DEVICE_ATTR_RW(carveout);
498 
499 static struct attribute *amdgpu_uma_attrs[] = {
500 	&dev_attr_carveout.attr,
501 	&dev_attr_carveout_options.attr,
502 	NULL
503 };
504 
505 const struct attribute_group amdgpu_uma_attr_group = {
506 	.name = "uma",
507 	.attrs = amdgpu_uma_attrs
508 };
509 
510 static void amdgpu_uma_sysfs_init(struct amdgpu_device *adev)
511 {
512 	int rc;
513 
514 	if (!(adev->flags & AMD_IS_APU))
515 		return;
516 
517 	if (!amdgpu_acpi_is_set_uma_allocation_size_supported())
518 		return;
519 
520 	rc = amdgpu_atomfirmware_get_uma_carveout_info(adev, &adev->uma_info);
521 	if (rc) {
522 		drm_dbg(adev_to_drm(adev),
523 			"Failed to parse UMA carveout info from VBIOS: %d\n", rc);
524 		goto out_info;
525 	}
526 
527 	mutex_init(&adev->uma_info.update_lock);
528 
529 	rc = devm_device_add_group(adev->dev, &amdgpu_uma_attr_group);
530 	if (rc) {
531 		drm_dbg(adev_to_drm(adev), "Failed to add UMA carveout sysfs interfaces %d\n", rc);
532 		goto out_attr;
533 	}
534 
535 	return;
536 
537 out_attr:
538 	mutex_destroy(&adev->uma_info.update_lock);
539 out_info:
540 	return;
541 }
542 
543 static void amdgpu_uma_sysfs_fini(struct amdgpu_device *adev)
544 {
545 	struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
546 
547 	if (!amdgpu_acpi_is_set_uma_allocation_size_supported())
548 		return;
549 
550 	mutex_destroy(&uma_info->update_lock);
551 	uma_info->num_entries = 0;
552 }
553 
554 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
555 
556 /**
557  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
558  *
559  * @adev: amdgpu device pointer
560  *
561  * Returns true if the device is a dGPU with ATPX power control,
562  * otherwise return false.
563  */
564 bool amdgpu_device_supports_px(struct amdgpu_device *adev)
565 {
566 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
567 		return true;
568 	return false;
569 }
570 
571 /**
572  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
573  *
574  * @adev: amdgpu device pointer
575  *
576  * Returns true if the device is a dGPU with ACPI power control,
577  * otherwise return false.
578  */
579 bool amdgpu_device_supports_boco(struct amdgpu_device *adev)
580 {
581 	if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
582 		return false;
583 
584 	if (adev->has_pr3 ||
585 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
586 		return true;
587 	return false;
588 }
589 
590 /**
591  * amdgpu_device_supports_baco - Does the device support BACO
592  *
593  * @adev: amdgpu device pointer
594  *
595  * Return:
596  * 1 if the device supports BACO;
597  * 3 if the device supports MACO (only works if BACO is supported)
598  * otherwise return 0.
599  */
600 int amdgpu_device_supports_baco(struct amdgpu_device *adev)
601 {
602 	return amdgpu_asic_supports_baco(adev);
603 }
604 
605 void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
606 {
607 	int bamaco_support;
608 
609 	adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
610 	bamaco_support = amdgpu_device_supports_baco(adev);
611 
612 	switch (amdgpu_runtime_pm) {
613 	case 2:
614 		if (bamaco_support & MACO_SUPPORT) {
615 			adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
616 			dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
617 		} else if (bamaco_support == BACO_SUPPORT) {
618 			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
619 			dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
620 		}
621 		break;
622 	case 1:
623 		if (bamaco_support & BACO_SUPPORT) {
624 			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
625 			dev_info(adev->dev, "Forcing BACO for runtime pm\n");
626 		}
627 		break;
628 	case -1:
629 	case -2:
630 		if (amdgpu_device_supports_px(adev)) {
631 			/* enable PX as runtime mode */
632 			adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
633 			dev_info(adev->dev, "Using ATPX for runtime pm\n");
634 		} else if (amdgpu_device_supports_boco(adev)) {
635 			/* enable boco as runtime mode */
636 			adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
637 			dev_info(adev->dev, "Using BOCO for runtime pm\n");
638 		} else {
639 			if (!bamaco_support)
640 				goto no_runtime_pm;
641 
642 			switch (adev->asic_type) {
643 			case CHIP_VEGA20:
644 			case CHIP_ARCTURUS:
645 				/* BACO are not supported on vega20 and arctrus */
646 				break;
647 			case CHIP_VEGA10:
648 				/* enable BACO as runpm mode if noretry=0 */
649 				if (!adev->gmc.noretry && !amdgpu_passthrough(adev))
650 					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
651 				break;
652 			default:
653 				/* enable BACO as runpm mode on CI+ */
654 				if (!amdgpu_passthrough(adev))
655 					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
656 				break;
657 			}
658 
659 			if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
660 				if (bamaco_support & MACO_SUPPORT) {
661 					adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
662 					dev_info(adev->dev, "Using BAMACO for runtime pm\n");
663 				} else {
664 					dev_info(adev->dev, "Using BACO for runtime pm\n");
665 				}
666 			}
667 		}
668 		break;
669 	case 0:
670 		dev_info(adev->dev, "runtime pm is manually disabled\n");
671 		break;
672 	default:
673 		break;
674 	}
675 
676 no_runtime_pm:
677 	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
678 		dev_info(adev->dev, "Runtime PM not available\n");
679 }
680 /**
681  * amdgpu_device_supports_smart_shift - Is the device dGPU with
682  * smart shift support
683  *
684  * @adev: amdgpu device pointer
685  *
686  * Returns true if the device is a dGPU with Smart Shift support,
687  * otherwise returns false.
688  */
689 bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev)
690 {
691 	return (amdgpu_device_supports_boco(adev) &&
692 		amdgpu_acpi_is_power_shift_control_supported());
693 }
694 
695 /*
696  * VRAM access helper functions
697  */
698 
699 /**
700  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
701  *
702  * @adev: amdgpu_device pointer
703  * @pos: offset of the buffer in vram
704  * @buf: virtual address of the buffer in system memory
705  * @size: read/write size, sizeof(@buf) must > @size
706  * @write: true - write to vram, otherwise - read from vram
707  */
708 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
709 			     void *buf, size_t size, bool write)
710 {
711 	unsigned long flags;
712 	uint32_t hi = ~0, tmp = 0;
713 	uint32_t *data = buf;
714 	uint64_t last;
715 	int idx;
716 
717 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
718 		return;
719 
720 	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
721 
722 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
723 	for (last = pos + size; pos < last; pos += 4) {
724 		tmp = pos >> 31;
725 
726 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
727 		if (tmp != hi) {
728 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
729 			hi = tmp;
730 		}
731 		if (write)
732 			WREG32_NO_KIQ(mmMM_DATA, *data++);
733 		else
734 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
735 	}
736 
737 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
738 	drm_dev_exit(idx);
739 }
740 
741 /**
742  * amdgpu_device_aper_access - access vram by vram aperture
743  *
744  * @adev: amdgpu_device pointer
745  * @pos: offset of the buffer in vram
746  * @buf: virtual address of the buffer in system memory
747  * @size: read/write size, sizeof(@buf) must > @size
748  * @write: true - write to vram, otherwise - read from vram
749  *
750  * The return value means how many bytes have been transferred.
751  */
752 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
753 				 void *buf, size_t size, bool write)
754 {
755 #ifdef CONFIG_64BIT
756 	void __iomem *addr;
757 	size_t count = 0;
758 	uint64_t last;
759 
760 	if (!adev->mman.aper_base_kaddr)
761 		return 0;
762 
763 	last = min(pos + size, adev->gmc.visible_vram_size);
764 	if (last > pos) {
765 		addr = adev->mman.aper_base_kaddr + pos;
766 		count = last - pos;
767 
768 		if (write) {
769 			memcpy_toio(addr, buf, count);
770 			/* Make sure HDP write cache flush happens without any reordering
771 			 * after the system memory contents are sent over PCIe device
772 			 */
773 			mb();
774 			amdgpu_device_flush_hdp(adev, NULL);
775 		} else {
776 			amdgpu_device_invalidate_hdp(adev, NULL);
777 			/* Make sure HDP read cache is invalidated before issuing a read
778 			 * to the PCIe device
779 			 */
780 			mb();
781 			memcpy_fromio(buf, addr, count);
782 		}
783 
784 	}
785 
786 	return count;
787 #else
788 	return 0;
789 #endif
790 }
791 
792 /**
793  * amdgpu_device_vram_access - read/write a buffer in vram
794  *
795  * @adev: amdgpu_device pointer
796  * @pos: offset of the buffer in vram
797  * @buf: virtual address of the buffer in system memory
798  * @size: read/write size, sizeof(@buf) must > @size
799  * @write: true - write to vram, otherwise - read from vram
800  */
801 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
802 			       void *buf, size_t size, bool write)
803 {
804 	size_t count;
805 
806 	/* try to using vram apreature to access vram first */
807 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
808 	size -= count;
809 	if (size) {
810 		/* using MM to access rest vram */
811 		pos += count;
812 		buf += count;
813 		amdgpu_device_mm_access(adev, pos, buf, size, write);
814 	}
815 }
816 
817 /*
818  * register access helper functions.
819  */
820 
821 /* Check if hw access should be skipped because of hotplug or device error */
822 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
823 {
824 	if (adev->no_hw_access)
825 		return true;
826 
827 #ifdef CONFIG_LOCKDEP
828 	/*
829 	 * This is a bit complicated to understand, so worth a comment. What we assert
830 	 * here is that the GPU reset is not running on another thread in parallel.
831 	 *
832 	 * For this we trylock the read side of the reset semaphore, if that succeeds
833 	 * we know that the reset is not running in parallel.
834 	 *
835 	 * If the trylock fails we assert that we are either already holding the read
836 	 * side of the lock or are the reset thread itself and hold the write side of
837 	 * the lock.
838 	 */
839 	if (in_task()) {
840 		if (down_read_trylock(&adev->reset_domain->sem))
841 			up_read(&adev->reset_domain->sem);
842 		else
843 			lockdep_assert_held(&adev->reset_domain->sem);
844 	}
845 #endif
846 	return false;
847 }
848 
849 /**
850  * amdgpu_device_get_rev_id - query device rev_id
851  *
852  * @adev: amdgpu_device pointer
853  *
854  * Return device rev_id
855  */
856 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
857 {
858 	return adev->nbio.funcs->get_rev_id(adev);
859 }
860 
861 static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev)
862 {
863 	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
864 		return AMDGPU_VBIOS_SKIP;
865 
866 	if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev))
867 		return AMDGPU_VBIOS_OPTIONAL;
868 
869 	return 0;
870 }
871 
872 /**
873  * amdgpu_device_asic_init - Wrapper for atom asic_init
874  *
875  * @adev: amdgpu_device pointer
876  *
877  * Does any asic specific work and then calls atom asic init.
878  */
879 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
880 {
881 	uint32_t flags;
882 	bool optional;
883 	int ret;
884 
885 	amdgpu_asic_pre_asic_init(adev);
886 	flags = amdgpu_device_get_vbios_flags(adev);
887 	optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL | AMDGPU_VBIOS_SKIP));
888 
889 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
890 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
891 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
892 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
893 		amdgpu_psp_wait_for_bootloader(adev);
894 		if (optional && !adev->bios)
895 			return 0;
896 
897 		ret = amdgpu_atomfirmware_asic_init(adev, true);
898 		return ret;
899 	} else {
900 		if (optional && !adev->bios)
901 			return 0;
902 
903 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
904 	}
905 
906 	return 0;
907 }
908 
909 /**
910  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
911  *
912  * @adev: amdgpu_device pointer
913  *
914  * Allocates a scratch page of VRAM for use by various things in the
915  * driver.
916  */
917 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
918 {
919 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
920 				       AMDGPU_GEM_DOMAIN_VRAM |
921 				       AMDGPU_GEM_DOMAIN_GTT,
922 				       &adev->mem_scratch.robj,
923 				       &adev->mem_scratch.gpu_addr,
924 				       (void **)&adev->mem_scratch.ptr);
925 }
926 
927 /**
928  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
929  *
930  * @adev: amdgpu_device pointer
931  *
932  * Frees the VRAM scratch page.
933  */
934 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
935 {
936 	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
937 }
938 
939 /**
940  * amdgpu_device_program_register_sequence - program an array of registers.
941  *
942  * @adev: amdgpu_device pointer
943  * @registers: pointer to the register array
944  * @array_size: size of the register array
945  *
946  * Programs an array or registers with and or masks.
947  * This is a helper for setting golden registers.
948  */
949 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
950 					     const u32 *registers,
951 					     const u32 array_size)
952 {
953 	u32 tmp, reg, and_mask, or_mask;
954 	int i;
955 
956 	if (array_size % 3)
957 		return;
958 
959 	for (i = 0; i < array_size; i += 3) {
960 		reg = registers[i + 0];
961 		and_mask = registers[i + 1];
962 		or_mask = registers[i + 2];
963 
964 		if (and_mask == 0xffffffff) {
965 			tmp = or_mask;
966 		} else {
967 			tmp = RREG32(reg);
968 			tmp &= ~and_mask;
969 			if (adev->family >= AMDGPU_FAMILY_AI)
970 				tmp |= (or_mask & and_mask);
971 			else
972 				tmp |= or_mask;
973 		}
974 		WREG32(reg, tmp);
975 	}
976 }
977 
978 /**
979  * amdgpu_device_pci_config_reset - reset the GPU
980  *
981  * @adev: amdgpu_device pointer
982  *
983  * Resets the GPU using the pci config reset sequence.
984  * Only applicable to asics prior to vega10.
985  */
986 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
987 {
988 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
989 }
990 
991 /**
992  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
993  *
994  * @adev: amdgpu_device pointer
995  *
996  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
997  */
998 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
999 {
1000 	return pci_reset_function(adev->pdev);
1001 }
1002 
1003 /*
1004  * amdgpu_device_wb_*()
1005  * Writeback is the method by which the GPU updates special pages in memory
1006  * with the status of certain GPU events (fences, ring pointers,etc.).
1007  */
1008 
1009 /**
1010  * amdgpu_device_wb_fini - Disable Writeback and free memory
1011  *
1012  * @adev: amdgpu_device pointer
1013  *
1014  * Disables Writeback and frees the Writeback memory (all asics).
1015  * Used at driver shutdown.
1016  */
1017 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1018 {
1019 	if (adev->wb.wb_obj) {
1020 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1021 				      &adev->wb.gpu_addr,
1022 				      (void **)&adev->wb.wb);
1023 		adev->wb.wb_obj = NULL;
1024 	}
1025 }
1026 
1027 /**
1028  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1029  *
1030  * @adev: amdgpu_device pointer
1031  *
1032  * Initializes writeback and allocates writeback memory (all asics).
1033  * Used at driver startup.
1034  * Returns 0 on success or an -error on failure.
1035  */
1036 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1037 {
1038 	int r;
1039 
1040 	if (adev->wb.wb_obj == NULL) {
1041 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1042 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1043 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1044 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1045 					    (void **)&adev->wb.wb);
1046 		if (r) {
1047 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1048 			return r;
1049 		}
1050 
1051 		adev->wb.num_wb = AMDGPU_MAX_WB;
1052 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1053 
1054 		/* clear wb memory */
1055 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1056 	}
1057 
1058 	return 0;
1059 }
1060 
1061 /**
1062  * amdgpu_device_wb_get - Allocate a wb entry
1063  *
1064  * @adev: amdgpu_device pointer
1065  * @wb: wb index
1066  *
1067  * Allocate a wb slot for use by the driver (all asics).
1068  * Returns 0 on success or -EINVAL on failure.
1069  */
1070 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1071 {
1072 	unsigned long flags, offset;
1073 
1074 	spin_lock_irqsave(&adev->wb.lock, flags);
1075 	offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1076 	if (offset < adev->wb.num_wb) {
1077 		__set_bit(offset, adev->wb.used);
1078 		spin_unlock_irqrestore(&adev->wb.lock, flags);
1079 		*wb = offset << 3; /* convert to dw offset */
1080 		return 0;
1081 	} else {
1082 		spin_unlock_irqrestore(&adev->wb.lock, flags);
1083 		return -EINVAL;
1084 	}
1085 }
1086 
1087 /**
1088  * amdgpu_device_wb_free - Free a wb entry
1089  *
1090  * @adev: amdgpu_device pointer
1091  * @wb: wb index
1092  *
1093  * Free a wb slot allocated for use by the driver (all asics)
1094  */
1095 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1096 {
1097 	unsigned long flags;
1098 
1099 	wb >>= 3;
1100 	spin_lock_irqsave(&adev->wb.lock, flags);
1101 	if (wb < adev->wb.num_wb)
1102 		__clear_bit(wb, adev->wb.used);
1103 	spin_unlock_irqrestore(&adev->wb.lock, flags);
1104 }
1105 
1106 /**
1107  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1108  *
1109  * @adev: amdgpu_device pointer
1110  *
1111  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1112  * to fail, but if any of the BARs is not accessible after the size we abort
1113  * driver loading by returning -ENODEV.
1114  */
1115 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1116 {
1117 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1118 	struct pci_bus *root;
1119 	struct resource *res;
1120 	int max_size, r;
1121 	unsigned int i;
1122 	u16 cmd;
1123 
1124 	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1125 		return 0;
1126 
1127 	/* Bypass for VF */
1128 	if (amdgpu_sriov_vf(adev))
1129 		return 0;
1130 
1131 	if (!amdgpu_rebar)
1132 		return 0;
1133 
1134 	/* resizing on Dell G5 SE platforms causes problems with runtime pm */
1135 	if ((amdgpu_runtime_pm != 0) &&
1136 	    adev->pdev->vendor == PCI_VENDOR_ID_ATI &&
1137 	    adev->pdev->device == 0x731f &&
1138 	    adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)
1139 		return 0;
1140 
1141 	/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1142 	if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1143 		dev_warn(
1144 			adev->dev,
1145 			"System can't access extended configuration space, please check!!\n");
1146 
1147 	/* skip if the bios has already enabled large BAR */
1148 	if (adev->gmc.real_vram_size &&
1149 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1150 		return 0;
1151 
1152 	/* Check if the root BUS has 64bit memory resources */
1153 	root = adev->pdev->bus;
1154 	while (root->parent)
1155 		root = root->parent;
1156 
1157 	pci_bus_for_each_resource(root, res, i) {
1158 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1159 		    res->start > 0x100000000ull)
1160 			break;
1161 	}
1162 
1163 	/* Trying to resize is pointless without a root hub window above 4GB */
1164 	if (!res)
1165 		return 0;
1166 
1167 	/* Limit the BAR size to what is available */
1168 	max_size = pci_rebar_get_max_size(adev->pdev, 0);
1169 	if (max_size < 0)
1170 		return 0;
1171 	rbar_size = min(max_size, rbar_size);
1172 
1173 	/* Disable memory decoding while we change the BAR addresses and size */
1174 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1175 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1176 			      cmd & ~PCI_COMMAND_MEMORY);
1177 
1178 	/* Tear down doorbell as resizing will release BARs */
1179 	amdgpu_doorbell_fini(adev);
1180 
1181 	r = pci_resize_resource(adev->pdev, 0, rbar_size,
1182 				(adev->asic_type >= CHIP_BONAIRE) ? 1 << 5
1183 								  : 1 << 2);
1184 	if (r == -ENOSPC)
1185 		dev_info(adev->dev,
1186 			 "Not enough PCI address space for a large BAR.");
1187 	else if (r && r != -ENOTSUPP)
1188 		dev_err(adev->dev, "Problem resizing BAR0 (%d).", r);
1189 
1190 	/* When the doorbell or fb BAR isn't available we have no chance of
1191 	 * using the device.
1192 	 */
1193 	r = amdgpu_doorbell_init(adev);
1194 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1195 		return -ENODEV;
1196 
1197 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1198 
1199 	return 0;
1200 }
1201 
1202 /*
1203  * GPU helpers function.
1204  */
1205 /**
1206  * amdgpu_device_need_post - check if the hw need post or not
1207  *
1208  * @adev: amdgpu_device pointer
1209  *
1210  * Check if the asic has been initialized (all asics) at driver startup
1211  * or post is needed if  hw reset is performed.
1212  * Returns true if need or false if not.
1213  */
1214 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1215 {
1216 	uint32_t reg, flags;
1217 
1218 	if (amdgpu_sriov_vf(adev))
1219 		return false;
1220 
1221 	flags = amdgpu_device_get_vbios_flags(adev);
1222 	if (flags & AMDGPU_VBIOS_SKIP)
1223 		return false;
1224 	if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios)
1225 		return false;
1226 
1227 	if (amdgpu_passthrough(adev)) {
1228 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1229 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1230 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1231 		 * vpost executed for smc version below 22.15
1232 		 */
1233 		if (adev->asic_type == CHIP_FIJI) {
1234 			int err;
1235 			uint32_t fw_ver;
1236 
1237 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1238 			/* force vPost if error occurred */
1239 			if (err)
1240 				return true;
1241 
1242 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1243 			release_firmware(adev->pm.fw);
1244 			if (fw_ver < 0x00160e00)
1245 				return true;
1246 		}
1247 	}
1248 
1249 	/* Don't post if we need to reset whole hive on init */
1250 	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
1251 		return false;
1252 
1253 	if (adev->has_hw_reset) {
1254 		adev->has_hw_reset = false;
1255 		return true;
1256 	}
1257 
1258 	/* bios scratch used on CIK+ */
1259 	if (adev->asic_type >= CHIP_BONAIRE)
1260 		return amdgpu_atombios_scratch_need_asic_init(adev);
1261 
1262 	/* check MEM_SIZE for older asics */
1263 	reg = amdgpu_asic_get_config_memsize(adev);
1264 
1265 	if ((reg != 0) && (reg != 0xffffffff))
1266 		return false;
1267 
1268 	return true;
1269 }
1270 
1271 /*
1272  * Check whether seamless boot is supported.
1273  *
1274  * So far we only support seamless boot on DCE 3.0 or later.
1275  * If users report that it works on older ASICS as well, we may
1276  * loosen this.
1277  */
1278 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1279 {
1280 	switch (amdgpu_seamless) {
1281 	case -1:
1282 		break;
1283 	case 1:
1284 		return true;
1285 	case 0:
1286 		return false;
1287 	default:
1288 		dev_err(adev->dev, "Invalid value for amdgpu.seamless: %d\n",
1289 			amdgpu_seamless);
1290 		return false;
1291 	}
1292 
1293 	if (!(adev->flags & AMD_IS_APU))
1294 		return false;
1295 
1296 	if (adev->mman.keep_stolen_vga_memory)
1297 		return false;
1298 
1299 	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1300 }
1301 
1302 /*
1303  * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1304  * don't support dynamic speed switching. Until we have confirmation from Intel
1305  * that a specific host supports it, it's safer that we keep it disabled for all.
1306  *
1307  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1308  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1309  */
1310 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1311 {
1312 #if IS_ENABLED(CONFIG_X86)
1313 	struct cpuinfo_x86 *c = &cpu_data(0);
1314 
1315 	/* eGPU change speeds based on USB4 fabric conditions */
1316 	if (dev_is_removable(adev->dev))
1317 		return true;
1318 
1319 	if (c->x86_vendor == X86_VENDOR_INTEL)
1320 		return false;
1321 #endif
1322 	return true;
1323 }
1324 
1325 static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
1326 {
1327 	/* Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4.
1328 	 * It's unclear if this is a platform-specific or GPU-specific issue.
1329 	 * Disable ASPM on SI for the time being.
1330 	 */
1331 	if (adev->family == AMDGPU_FAMILY_SI)
1332 		return true;
1333 
1334 #if IS_ENABLED(CONFIG_X86)
1335 	struct cpuinfo_x86 *c = &cpu_data(0);
1336 
1337 	if (c->x86_vendor == X86_VENDOR_INTEL) {
1338 		switch (c->x86_model) {
1339 		case VFM_MODEL(INTEL_ALDERLAKE):
1340 		case VFM_MODEL(INTEL_ALDERLAKE_L):
1341 		case VFM_MODEL(INTEL_RAPTORLAKE):
1342 		case VFM_MODEL(INTEL_RAPTORLAKE_P):
1343 		case VFM_MODEL(INTEL_RAPTORLAKE_S):
1344 		case VFM_MODEL(INTEL_TIGERLAKE):
1345 		case VFM_MODEL(INTEL_TIGERLAKE_L):
1346 			return true;
1347 		default:
1348 			return false;
1349 		}
1350 	} else {
1351 		return false;
1352 	}
1353 #else
1354 	return false;
1355 #endif
1356 }
1357 
1358 /**
1359  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1360  *
1361  * @adev: amdgpu_device pointer
1362  *
1363  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1364  * be set for this device.
1365  *
1366  * Returns true if it should be used or false if not.
1367  */
1368 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1369 {
1370 	switch (amdgpu_aspm) {
1371 	case -1:
1372 		break;
1373 	case 0:
1374 		return false;
1375 	case 1:
1376 		return true;
1377 	default:
1378 		return false;
1379 	}
1380 	if (adev->flags & AMD_IS_APU)
1381 		return false;
1382 	if (amdgpu_device_aspm_support_quirk(adev))
1383 		return false;
1384 	return pcie_aspm_enabled(adev->pdev);
1385 }
1386 
1387 /* if we get transitioned to only one device, take VGA back */
1388 /**
1389  * amdgpu_device_vga_set_decode - enable/disable vga decode
1390  *
1391  * @pdev: PCI device pointer
1392  * @state: enable/disable vga decode
1393  *
1394  * Enable/disable vga decode (all asics).
1395  * Returns VGA resource flags.
1396  */
1397 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1398 		bool state)
1399 {
1400 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1401 
1402 	amdgpu_asic_set_vga_state(adev, state);
1403 	if (state)
1404 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1405 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1406 	else
1407 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1408 }
1409 
1410 /**
1411  * amdgpu_device_check_block_size - validate the vm block size
1412  *
1413  * @adev: amdgpu_device pointer
1414  *
1415  * Validates the vm block size specified via module parameter.
1416  * The vm block size defines number of bits in page table versus page directory,
1417  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1418  * page table and the remaining bits are in the page directory.
1419  */
1420 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1421 {
1422 	/* defines number of bits in page table versus page directory,
1423 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1424 	 * page table and the remaining bits are in the page directory
1425 	 */
1426 	if (amdgpu_vm_block_size == -1)
1427 		return;
1428 
1429 	if (amdgpu_vm_block_size < 9) {
1430 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1431 			 amdgpu_vm_block_size);
1432 		amdgpu_vm_block_size = -1;
1433 	}
1434 }
1435 
1436 /**
1437  * amdgpu_device_check_vm_size - validate the vm size
1438  *
1439  * @adev: amdgpu_device pointer
1440  *
1441  * Validates the vm size in GB specified via module parameter.
1442  * The VM size is the size of the GPU virtual memory space in GB.
1443  */
1444 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1445 {
1446 	/* no need to check the default value */
1447 	if (amdgpu_vm_size == -1)
1448 		return;
1449 
1450 	if (amdgpu_vm_size < 1) {
1451 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1452 			 amdgpu_vm_size);
1453 		amdgpu_vm_size = -1;
1454 	}
1455 }
1456 
1457 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1458 {
1459 	struct sysinfo si;
1460 	bool is_os_64 = (sizeof(void *) == 8);
1461 	uint64_t total_memory;
1462 	uint64_t dram_size_seven_GB = 0x1B8000000;
1463 	uint64_t dram_size_three_GB = 0xB8000000;
1464 
1465 	if (amdgpu_smu_memory_pool_size == 0)
1466 		return;
1467 
1468 	if (!is_os_64) {
1469 		dev_warn(adev->dev, "Not 64-bit OS, feature not supported\n");
1470 		goto def_value;
1471 	}
1472 	si_meminfo(&si);
1473 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1474 
1475 	if ((amdgpu_smu_memory_pool_size == 1) ||
1476 		(amdgpu_smu_memory_pool_size == 2)) {
1477 		if (total_memory < dram_size_three_GB)
1478 			goto def_value1;
1479 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1480 		(amdgpu_smu_memory_pool_size == 8)) {
1481 		if (total_memory < dram_size_seven_GB)
1482 			goto def_value1;
1483 	} else {
1484 		dev_warn(adev->dev, "Smu memory pool size not supported\n");
1485 		goto def_value;
1486 	}
1487 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1488 
1489 	return;
1490 
1491 def_value1:
1492 	dev_warn(adev->dev, "No enough system memory\n");
1493 def_value:
1494 	adev->pm.smu_prv_buffer_size = 0;
1495 }
1496 
1497 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1498 {
1499 	if (!(adev->flags & AMD_IS_APU) ||
1500 	    adev->asic_type < CHIP_RAVEN)
1501 		return 0;
1502 
1503 	switch (adev->asic_type) {
1504 	case CHIP_RAVEN:
1505 		if (adev->pdev->device == 0x15dd)
1506 			adev->apu_flags |= AMD_APU_IS_RAVEN;
1507 		if (adev->pdev->device == 0x15d8)
1508 			adev->apu_flags |= AMD_APU_IS_PICASSO;
1509 		break;
1510 	case CHIP_RENOIR:
1511 		if ((adev->pdev->device == 0x1636) ||
1512 		    (adev->pdev->device == 0x164c))
1513 			adev->apu_flags |= AMD_APU_IS_RENOIR;
1514 		else
1515 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1516 		break;
1517 	case CHIP_VANGOGH:
1518 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1519 		break;
1520 	case CHIP_YELLOW_CARP:
1521 		break;
1522 	case CHIP_CYAN_SKILLFISH:
1523 		if ((adev->pdev->device == 0x13FE) ||
1524 		    (adev->pdev->device == 0x143F))
1525 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1526 		break;
1527 	default:
1528 		break;
1529 	}
1530 
1531 	return 0;
1532 }
1533 
1534 /**
1535  * amdgpu_device_check_arguments - validate module params
1536  *
1537  * @adev: amdgpu_device pointer
1538  *
1539  * Validates certain module parameters and updates
1540  * the associated values used by the driver (all asics).
1541  */
1542 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1543 {
1544 	int i;
1545 
1546 	if (amdgpu_sched_jobs < 4) {
1547 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1548 			 amdgpu_sched_jobs);
1549 		amdgpu_sched_jobs = 4;
1550 	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
1551 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1552 			 amdgpu_sched_jobs);
1553 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1554 	}
1555 
1556 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1557 		/* gart size must be greater or equal to 32M */
1558 		dev_warn(adev->dev, "gart size (%d) too small\n",
1559 			 amdgpu_gart_size);
1560 		amdgpu_gart_size = -1;
1561 	}
1562 
1563 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1564 		/* gtt size must be greater or equal to 32M */
1565 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1566 				 amdgpu_gtt_size);
1567 		amdgpu_gtt_size = -1;
1568 	}
1569 
1570 	/* valid range is between 4 and 9 inclusive */
1571 	if (amdgpu_vm_fragment_size != -1 &&
1572 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1573 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1574 		amdgpu_vm_fragment_size = -1;
1575 	}
1576 
1577 	if (amdgpu_sched_hw_submission < 2) {
1578 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1579 			 amdgpu_sched_hw_submission);
1580 		amdgpu_sched_hw_submission = 2;
1581 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1582 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1583 			 amdgpu_sched_hw_submission);
1584 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1585 	}
1586 
1587 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1588 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1589 		amdgpu_reset_method = -1;
1590 	}
1591 
1592 	amdgpu_device_check_smu_prv_buffer_size(adev);
1593 
1594 	amdgpu_device_check_vm_size(adev);
1595 
1596 	amdgpu_device_check_block_size(adev);
1597 
1598 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1599 
1600 	for (i = 0; i < MAX_XCP; i++) {
1601 		switch (amdgpu_enforce_isolation) {
1602 		case -1:
1603 		case 0:
1604 		default:
1605 			/* disable */
1606 			adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
1607 			break;
1608 		case 1:
1609 			/* enable */
1610 			adev->enforce_isolation[i] =
1611 				AMDGPU_ENFORCE_ISOLATION_ENABLE;
1612 			break;
1613 		case 2:
1614 			/* enable legacy mode */
1615 			adev->enforce_isolation[i] =
1616 				AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
1617 			break;
1618 		case 3:
1619 			/* enable only process isolation without submitting cleaner shader */
1620 			adev->enforce_isolation[i] =
1621 				AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
1622 			break;
1623 		}
1624 	}
1625 
1626 	return 0;
1627 }
1628 
1629 /**
1630  * amdgpu_switcheroo_set_state - set switcheroo state
1631  *
1632  * @pdev: pci dev pointer
1633  * @state: vga_switcheroo state
1634  *
1635  * Callback for the switcheroo driver.  Suspends or resumes
1636  * the asics before or after it is powered up using ACPI methods.
1637  */
1638 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1639 					enum vga_switcheroo_state state)
1640 {
1641 	struct drm_device *dev = pci_get_drvdata(pdev);
1642 	int r;
1643 
1644 	if (amdgpu_device_supports_px(drm_to_adev(dev)) &&
1645 	    state == VGA_SWITCHEROO_OFF)
1646 		return;
1647 
1648 	if (state == VGA_SWITCHEROO_ON) {
1649 		pr_info("switched on\n");
1650 		/* don't suspend or resume card normally */
1651 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1652 
1653 		pci_set_power_state(pdev, PCI_D0);
1654 		amdgpu_device_load_pci_state(pdev);
1655 		r = pci_enable_device(pdev);
1656 		if (r)
1657 			dev_warn(&pdev->dev, "pci_enable_device failed (%d)\n",
1658 				 r);
1659 		amdgpu_device_resume(dev, true);
1660 
1661 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1662 	} else {
1663 		dev_info(&pdev->dev, "switched off\n");
1664 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1665 		amdgpu_device_prepare(dev);
1666 		amdgpu_device_suspend(dev, true);
1667 		amdgpu_device_cache_pci_state(pdev);
1668 		/* Shut down the device */
1669 		pci_disable_device(pdev);
1670 		pci_set_power_state(pdev, PCI_D3cold);
1671 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1672 	}
1673 }
1674 
1675 /**
1676  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1677  *
1678  * @pdev: pci dev pointer
1679  *
1680  * Callback for the switcheroo driver.  Check of the switcheroo
1681  * state can be changed.
1682  * Returns true if the state can be changed, false if not.
1683  */
1684 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1685 {
1686 	struct drm_device *dev = pci_get_drvdata(pdev);
1687 
1688        /*
1689 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1690 	* locking inversion with the driver load path. And the access here is
1691 	* completely racy anyway. So don't bother with locking for now.
1692 	*/
1693 	return atomic_read(&dev->open_count) == 0;
1694 }
1695 
1696 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1697 	.set_gpu_state = amdgpu_switcheroo_set_state,
1698 	.reprobe = NULL,
1699 	.can_switch = amdgpu_switcheroo_can_switch,
1700 };
1701 
1702 /**
1703  * amdgpu_device_enable_virtual_display - enable virtual display feature
1704  *
1705  * @adev: amdgpu_device pointer
1706  *
1707  * Enabled the virtual display feature if the user has enabled it via
1708  * the module parameter virtual_display.  This feature provides a virtual
1709  * display hardware on headless boards or in virtualized environments.
1710  * This function parses and validates the configuration string specified by
1711  * the user and configures the virtual display configuration (number of
1712  * virtual connectors, crtcs, etc.) specified.
1713  */
1714 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1715 {
1716 	adev->enable_virtual_display = false;
1717 
1718 	if (amdgpu_virtual_display) {
1719 		const char *pci_address_name = pci_name(adev->pdev);
1720 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1721 
1722 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1723 		pciaddstr_tmp = pciaddstr;
1724 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1725 			pciaddname = strsep(&pciaddname_tmp, ",");
1726 			if (!strcmp("all", pciaddname)
1727 			    || !strcmp(pci_address_name, pciaddname)) {
1728 				long num_crtc;
1729 				int res = -1;
1730 
1731 				adev->enable_virtual_display = true;
1732 
1733 				if (pciaddname_tmp)
1734 					res = kstrtol(pciaddname_tmp, 10,
1735 						      &num_crtc);
1736 
1737 				if (!res) {
1738 					if (num_crtc < 1)
1739 						num_crtc = 1;
1740 					if (num_crtc > 6)
1741 						num_crtc = 6;
1742 					adev->mode_info.num_crtc = num_crtc;
1743 				} else {
1744 					adev->mode_info.num_crtc = 1;
1745 				}
1746 				break;
1747 			}
1748 		}
1749 
1750 		dev_info(
1751 			adev->dev,
1752 			"virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1753 			amdgpu_virtual_display, pci_address_name,
1754 			adev->enable_virtual_display, adev->mode_info.num_crtc);
1755 
1756 		kfree(pciaddstr);
1757 	}
1758 }
1759 
1760 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1761 {
1762 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1763 		adev->mode_info.num_crtc = 1;
1764 		adev->enable_virtual_display = true;
1765 		dev_info(adev->dev, "virtual_display:%d, num_crtc:%d\n",
1766 			 adev->enable_virtual_display,
1767 			 adev->mode_info.num_crtc);
1768 	}
1769 }
1770 
1771 /**
1772  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1773  *
1774  * @adev: amdgpu_device pointer
1775  *
1776  * Parses the asic configuration parameters specified in the gpu info
1777  * firmware and makes them available to the driver for use in configuring
1778  * the asic.
1779  * Returns 0 on success, -EINVAL on failure.
1780  */
1781 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1782 {
1783 	const char *chip_name;
1784 	int err;
1785 	const struct gpu_info_firmware_header_v1_0 *hdr;
1786 
1787 	adev->firmware.gpu_info_fw = NULL;
1788 
1789 	switch (adev->asic_type) {
1790 	default:
1791 		return 0;
1792 	case CHIP_VEGA10:
1793 		chip_name = "vega10";
1794 		break;
1795 	case CHIP_VEGA12:
1796 		chip_name = "vega12";
1797 		break;
1798 	case CHIP_RAVEN:
1799 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1800 			chip_name = "raven2";
1801 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1802 			chip_name = "picasso";
1803 		else
1804 			chip_name = "raven";
1805 		break;
1806 	case CHIP_ARCTURUS:
1807 		chip_name = "arcturus";
1808 		break;
1809 	case CHIP_NAVI12:
1810 		if (adev->discovery.bin)
1811 			return 0;
1812 		chip_name = "navi12";
1813 		break;
1814 	case CHIP_CYAN_SKILLFISH:
1815 		if (adev->discovery.bin)
1816 			return 0;
1817 		chip_name = "cyan_skillfish";
1818 		break;
1819 	}
1820 
1821 	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
1822 				   AMDGPU_UCODE_OPTIONAL,
1823 				   "amdgpu/%s_gpu_info.bin", chip_name);
1824 	if (err) {
1825 		dev_err(adev->dev,
1826 			"Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
1827 			chip_name);
1828 		goto out;
1829 	}
1830 
1831 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1832 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1833 
1834 	switch (hdr->version_major) {
1835 	case 1:
1836 	{
1837 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1838 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1839 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1840 
1841 		/*
1842 		 * Should be dropped when DAL no longer needs it.
1843 		 */
1844 		if (adev->asic_type == CHIP_NAVI12)
1845 			goto parse_soc_bounding_box;
1846 
1847 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1848 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1849 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1850 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1851 		adev->gfx.config.max_texture_channel_caches =
1852 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
1853 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1854 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1855 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1856 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1857 		adev->gfx.config.double_offchip_lds_buf =
1858 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1859 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1860 		adev->gfx.cu_info.max_waves_per_simd =
1861 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1862 		adev->gfx.cu_info.max_scratch_slots_per_cu =
1863 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1864 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1865 		if (hdr->version_minor >= 1) {
1866 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1867 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1868 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1869 			adev->gfx.config.num_sc_per_sh =
1870 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1871 			adev->gfx.config.num_packer_per_sc =
1872 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1873 		}
1874 
1875 parse_soc_bounding_box:
1876 		/*
1877 		 * soc bounding box info is not integrated in disocovery table,
1878 		 * we always need to parse it from gpu info firmware if needed.
1879 		 */
1880 		if (hdr->version_minor == 2) {
1881 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1882 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1883 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1884 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1885 		}
1886 		break;
1887 	}
1888 	default:
1889 		dev_err(adev->dev,
1890 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1891 		err = -EINVAL;
1892 		goto out;
1893 	}
1894 out:
1895 	return err;
1896 }
1897 
1898 static void amdgpu_uid_init(struct amdgpu_device *adev)
1899 {
1900 	/* Initialize the UID for the device */
1901 	adev->uid_info = kzalloc_obj(struct amdgpu_uid);
1902 	if (!adev->uid_info) {
1903 		dev_warn(adev->dev, "Failed to allocate memory for UID\n");
1904 		return;
1905 	}
1906 	adev->uid_info->adev = adev;
1907 }
1908 
1909 static void amdgpu_uid_fini(struct amdgpu_device *adev)
1910 {
1911 	/* Free the UID memory */
1912 	kfree(adev->uid_info);
1913 	adev->uid_info = NULL;
1914 }
1915 
1916 static struct pci_dev *amdgpu_device_find_parent(struct amdgpu_device *adev)
1917 {
1918 	struct pci_dev *parent = adev->pdev;
1919 
1920 	/* skip upstream/downstream switches internal to dGPU */
1921 	while ((parent = pci_upstream_bridge(parent))) {
1922 		if (parent->vendor == PCI_VENDOR_ID_ATI)
1923 			continue;
1924 	}
1925 
1926 	return parent;
1927 }
1928 
1929 /**
1930  * amdgpu_device_ip_early_init - run early init for hardware IPs
1931  *
1932  * @adev: amdgpu_device pointer
1933  *
1934  * Early initialization pass for hardware IPs.  The hardware IPs that make
1935  * up each asic are discovered each IP's early_init callback is run.  This
1936  * is the first stage in initializing the asic.
1937  * Returns 0 on success, negative error code on failure.
1938  */
1939 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1940 {
1941 	struct amdgpu_ip_block *ip_block;
1942 	struct pci_dev *parent;
1943 	bool total, skip_bios;
1944 	uint32_t bios_flags;
1945 	int i, r;
1946 
1947 	amdgpu_device_enable_virtual_display(adev);
1948 
1949 	if (amdgpu_sriov_vf(adev)) {
1950 		r = amdgpu_virt_request_full_gpu(adev, true);
1951 		if (r)
1952 			return r;
1953 
1954 		r = amdgpu_virt_init_critical_region(adev);
1955 		if (r)
1956 			return r;
1957 	}
1958 
1959 	switch (adev->asic_type) {
1960 #ifdef CONFIG_DRM_AMDGPU_SI
1961 	case CHIP_VERDE:
1962 	case CHIP_TAHITI:
1963 	case CHIP_PITCAIRN:
1964 	case CHIP_OLAND:
1965 	case CHIP_HAINAN:
1966 		adev->family = AMDGPU_FAMILY_SI;
1967 		r = si_set_ip_blocks(adev);
1968 		if (r)
1969 			return r;
1970 		break;
1971 #endif
1972 #ifdef CONFIG_DRM_AMDGPU_CIK
1973 	case CHIP_BONAIRE:
1974 	case CHIP_HAWAII:
1975 	case CHIP_KAVERI:
1976 	case CHIP_KABINI:
1977 	case CHIP_MULLINS:
1978 		if (adev->flags & AMD_IS_APU)
1979 			adev->family = AMDGPU_FAMILY_KV;
1980 		else
1981 			adev->family = AMDGPU_FAMILY_CI;
1982 
1983 		r = cik_set_ip_blocks(adev);
1984 		if (r)
1985 			return r;
1986 		break;
1987 #endif
1988 	case CHIP_TOPAZ:
1989 	case CHIP_TONGA:
1990 	case CHIP_FIJI:
1991 	case CHIP_POLARIS10:
1992 	case CHIP_POLARIS11:
1993 	case CHIP_POLARIS12:
1994 	case CHIP_VEGAM:
1995 	case CHIP_CARRIZO:
1996 	case CHIP_STONEY:
1997 		if (adev->flags & AMD_IS_APU)
1998 			adev->family = AMDGPU_FAMILY_CZ;
1999 		else
2000 			adev->family = AMDGPU_FAMILY_VI;
2001 
2002 		r = vi_set_ip_blocks(adev);
2003 		if (r)
2004 			return r;
2005 		break;
2006 	default:
2007 		r = amdgpu_discovery_set_ip_blocks(adev);
2008 		if (r) {
2009 			adev->num_ip_blocks = 0;
2010 			return r;
2011 		}
2012 		break;
2013 	}
2014 
2015 	/* Check for IP version 9.4.3 with A0 hardware */
2016 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
2017 	    !amdgpu_device_get_rev_id(adev)) {
2018 		dev_err(adev->dev, "Unsupported A0 hardware\n");
2019 		return -ENODEV;	/* device unsupported - no device error */
2020 	}
2021 
2022 	if (amdgpu_has_atpx() &&
2023 	    (amdgpu_is_atpx_hybrid() ||
2024 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2025 	    ((adev->flags & AMD_IS_APU) == 0) &&
2026 	    !dev_is_removable(&adev->pdev->dev))
2027 		adev->flags |= AMD_IS_PX;
2028 
2029 	if (!(adev->flags & AMD_IS_APU)) {
2030 		parent = pcie_find_root_port(adev->pdev);
2031 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2032 	}
2033 
2034 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2035 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2036 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2037 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2038 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2039 	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2040 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2041 
2042 	adev->virt.is_xgmi_node_migrate_enabled = false;
2043 	if (amdgpu_sriov_vf(adev)) {
2044 		adev->virt.is_xgmi_node_migrate_enabled =
2045 			amdgpu_ip_version((adev), GC_HWIP, 0) == IP_VERSION(9, 4, 4);
2046 	}
2047 
2048 	total = true;
2049 	for (i = 0; i < adev->num_ip_blocks; i++) {
2050 		ip_block = &adev->ip_blocks[i];
2051 
2052 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2053 			dev_warn(adev->dev, "disabled ip block: %d <%s>\n", i,
2054 				 adev->ip_blocks[i].version->funcs->name);
2055 			adev->ip_blocks[i].status.valid = false;
2056 		} else if (ip_block->version->funcs->early_init) {
2057 			r = ip_block->version->funcs->early_init(ip_block);
2058 			if (r == -ENOENT) {
2059 				adev->ip_blocks[i].status.valid = false;
2060 			} else if (r) {
2061 				dev_err(adev->dev,
2062 					"early_init of IP block <%s> failed %d\n",
2063 					adev->ip_blocks[i].version->funcs->name,
2064 					r);
2065 				total = false;
2066 			} else {
2067 				adev->ip_blocks[i].status.valid = true;
2068 			}
2069 		} else {
2070 			adev->ip_blocks[i].status.valid = true;
2071 		}
2072 		/* get the vbios after the asic_funcs are set up */
2073 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2074 			r = amdgpu_device_parse_gpu_info_fw(adev);
2075 			if (r)
2076 				return r;
2077 
2078 			bios_flags = amdgpu_device_get_vbios_flags(adev);
2079 			skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP);
2080 			/* Read BIOS */
2081 			if (!skip_bios) {
2082 				bool optional =
2083 					!!(bios_flags & AMDGPU_VBIOS_OPTIONAL);
2084 				if (!amdgpu_get_bios(adev) && !optional)
2085 					return -EINVAL;
2086 
2087 				if (optional && !adev->bios)
2088 					dev_info(
2089 						adev->dev,
2090 						"VBIOS image optional, proceeding without VBIOS image");
2091 
2092 				if (adev->bios) {
2093 					r = amdgpu_atombios_init(adev);
2094 					if (r) {
2095 						dev_err(adev->dev,
2096 							"amdgpu_atombios_init failed\n");
2097 						amdgpu_vf_error_put(
2098 							adev,
2099 							AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL,
2100 							0, 0);
2101 						return r;
2102 					}
2103 				}
2104 			}
2105 
2106 			/*get pf2vf msg info at it's earliest time*/
2107 			if (amdgpu_sriov_vf(adev))
2108 				amdgpu_virt_init_data_exchange(adev);
2109 
2110 		}
2111 	}
2112 	if (!total)
2113 		return -ENODEV;
2114 
2115 	if (adev->gmc.xgmi.supported)
2116 		amdgpu_xgmi_early_init(adev);
2117 
2118 	if (amdgpu_is_multi_aid(adev))
2119 		amdgpu_uid_init(adev);
2120 	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
2121 	if (ip_block->status.valid != false)
2122 		amdgpu_amdkfd_device_probe(adev);
2123 
2124 	adev->cg_flags &= amdgpu_cg_mask;
2125 	adev->pg_flags &= amdgpu_pg_mask;
2126 
2127 	return 0;
2128 }
2129 
2130 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2131 {
2132 	int i, r;
2133 
2134 	for (i = 0; i < adev->num_ip_blocks; i++) {
2135 		if (!adev->ip_blocks[i].status.sw)
2136 			continue;
2137 		if (adev->ip_blocks[i].status.hw)
2138 			continue;
2139 		if (!amdgpu_ip_member_of_hwini(
2140 			    adev, adev->ip_blocks[i].version->type))
2141 			continue;
2142 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2143 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2144 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2145 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2146 			if (r) {
2147 				dev_err(adev->dev,
2148 					"hw_init of IP block <%s> failed %d\n",
2149 					adev->ip_blocks[i].version->funcs->name,
2150 					r);
2151 				return r;
2152 			}
2153 			adev->ip_blocks[i].status.hw = true;
2154 		}
2155 	}
2156 
2157 	return 0;
2158 }
2159 
2160 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2161 {
2162 	int i, r;
2163 
2164 	for (i = 0; i < adev->num_ip_blocks; i++) {
2165 		if (!adev->ip_blocks[i].status.sw)
2166 			continue;
2167 		if (adev->ip_blocks[i].status.hw)
2168 			continue;
2169 		if (!amdgpu_ip_member_of_hwini(
2170 			    adev, adev->ip_blocks[i].version->type))
2171 			continue;
2172 		r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2173 		if (r) {
2174 			dev_err(adev->dev,
2175 				"hw_init of IP block <%s> failed %d\n",
2176 				adev->ip_blocks[i].version->funcs->name, r);
2177 			return r;
2178 		}
2179 		adev->ip_blocks[i].status.hw = true;
2180 	}
2181 
2182 	return 0;
2183 }
2184 
2185 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2186 {
2187 	int r = 0;
2188 	int i;
2189 	uint32_t smu_version;
2190 
2191 	if (adev->asic_type >= CHIP_VEGA10) {
2192 		for (i = 0; i < adev->num_ip_blocks; i++) {
2193 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2194 				continue;
2195 
2196 			if (!amdgpu_ip_member_of_hwini(adev,
2197 						       AMD_IP_BLOCK_TYPE_PSP))
2198 				break;
2199 
2200 			if (!adev->ip_blocks[i].status.sw)
2201 				continue;
2202 
2203 			/* no need to do the fw loading again if already done*/
2204 			if (adev->ip_blocks[i].status.hw == true)
2205 				break;
2206 
2207 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2208 				r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
2209 				if (r)
2210 					return r;
2211 			} else {
2212 				r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2213 				if (r) {
2214 					dev_err(adev->dev,
2215 						"hw_init of IP block <%s> failed %d\n",
2216 						adev->ip_blocks[i]
2217 							.version->funcs->name,
2218 						r);
2219 					return r;
2220 				}
2221 				adev->ip_blocks[i].status.hw = true;
2222 			}
2223 			break;
2224 		}
2225 	}
2226 
2227 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2228 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2229 
2230 	return r;
2231 }
2232 
2233 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2234 {
2235 	struct drm_sched_init_args args = {
2236 		.ops = &amdgpu_sched_ops,
2237 		.timeout_wq = adev->reset_domain->wq,
2238 		.dev = adev->dev,
2239 	};
2240 	long timeout;
2241 	int r, i;
2242 
2243 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2244 		struct amdgpu_ring *ring = adev->rings[i];
2245 
2246 		/* No need to setup the GPU scheduler for rings that don't need it */
2247 		if (!ring || ring->no_scheduler)
2248 			continue;
2249 
2250 		switch (ring->funcs->type) {
2251 		case AMDGPU_RING_TYPE_GFX:
2252 			timeout = adev->gfx_timeout;
2253 			break;
2254 		case AMDGPU_RING_TYPE_COMPUTE:
2255 			timeout = adev->compute_timeout;
2256 			break;
2257 		case AMDGPU_RING_TYPE_SDMA:
2258 			timeout = adev->sdma_timeout;
2259 			break;
2260 		default:
2261 			timeout = adev->video_timeout;
2262 			break;
2263 		}
2264 
2265 		args.timeout = timeout;
2266 		args.credit_limit = ring->num_hw_submission;
2267 		args.score = ring->sched_score;
2268 		args.name = ring->name;
2269 
2270 		r = drm_sched_init(&ring->sched, &args);
2271 		if (r) {
2272 			dev_err(adev->dev,
2273 				"Failed to create scheduler on ring %s.\n",
2274 				ring->name);
2275 			return r;
2276 		}
2277 		r = amdgpu_uvd_entity_init(adev, ring);
2278 		if (r) {
2279 			dev_err(adev->dev,
2280 				"Failed to create UVD scheduling entity on ring %s.\n",
2281 				ring->name);
2282 			return r;
2283 		}
2284 		r = amdgpu_vce_entity_init(adev, ring);
2285 		if (r) {
2286 			dev_err(adev->dev,
2287 				"Failed to create VCE scheduling entity on ring %s.\n",
2288 				ring->name);
2289 			return r;
2290 		}
2291 	}
2292 
2293 	if (adev->xcp_mgr)
2294 		amdgpu_xcp_update_partition_sched_list(adev);
2295 
2296 	return 0;
2297 }
2298 
2299 
2300 /**
2301  * amdgpu_device_ip_init - run init for hardware IPs
2302  *
2303  * @adev: amdgpu_device pointer
2304  *
2305  * Main initialization pass for hardware IPs.  The list of all the hardware
2306  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2307  * are run.  sw_init initializes the software state associated with each IP
2308  * and hw_init initializes the hardware associated with each IP.
2309  * Returns 0 on success, negative error code on failure.
2310  */
2311 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2312 {
2313 	bool init_badpage;
2314 	int i, r;
2315 
2316 	r = amdgpu_ras_init(adev);
2317 	if (r)
2318 		return r;
2319 
2320 	for (i = 0; i < adev->num_ip_blocks; i++) {
2321 		if (!adev->ip_blocks[i].status.valid)
2322 			continue;
2323 		if (adev->ip_blocks[i].version->funcs->sw_init) {
2324 			r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
2325 			if (r) {
2326 				dev_err(adev->dev,
2327 					"sw_init of IP block <%s> failed %d\n",
2328 					adev->ip_blocks[i].version->funcs->name,
2329 					r);
2330 				goto init_failed;
2331 			}
2332 		}
2333 		adev->ip_blocks[i].status.sw = true;
2334 
2335 		if (!amdgpu_ip_member_of_hwini(
2336 			    adev, adev->ip_blocks[i].version->type))
2337 			continue;
2338 
2339 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2340 			/* need to do common hw init early so everything is set up for gmc */
2341 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2342 			if (r) {
2343 				dev_err(adev->dev, "hw_init %d failed %d\n", i,
2344 					r);
2345 				goto init_failed;
2346 			}
2347 			adev->ip_blocks[i].status.hw = true;
2348 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2349 			/* need to do gmc hw init early so we can allocate gpu mem */
2350 			/* Try to reserve bad pages early */
2351 			if (amdgpu_sriov_vf(adev))
2352 				amdgpu_virt_exchange_data(adev);
2353 
2354 			r = amdgpu_device_mem_scratch_init(adev);
2355 			if (r) {
2356 				dev_err(adev->dev,
2357 					"amdgpu_mem_scratch_init failed %d\n",
2358 					r);
2359 				goto init_failed;
2360 			}
2361 			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2362 			if (r) {
2363 				dev_err(adev->dev, "hw_init %d failed %d\n", i,
2364 					r);
2365 				goto init_failed;
2366 			}
2367 			r = amdgpu_device_wb_init(adev);
2368 			if (r) {
2369 				dev_err(adev->dev,
2370 					"amdgpu_device_wb_init failed %d\n", r);
2371 				goto init_failed;
2372 			}
2373 			adev->ip_blocks[i].status.hw = true;
2374 
2375 			/* right after GMC hw init, we create CSA */
2376 			if (adev->gfx.mcbp) {
2377 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2378 							       AMDGPU_GEM_DOMAIN_VRAM |
2379 							       AMDGPU_GEM_DOMAIN_GTT,
2380 							       AMDGPU_CSA_SIZE);
2381 				if (r) {
2382 					dev_err(adev->dev,
2383 						"allocate CSA failed %d\n", r);
2384 					goto init_failed;
2385 				}
2386 			}
2387 
2388 			r = amdgpu_seq64_init(adev);
2389 			if (r) {
2390 				dev_err(adev->dev, "allocate seq64 failed %d\n",
2391 					r);
2392 				goto init_failed;
2393 			}
2394 		}
2395 	}
2396 
2397 	if (amdgpu_sriov_vf(adev))
2398 		amdgpu_virt_init_data_exchange(adev);
2399 
2400 	r = amdgpu_ib_pool_init(adev);
2401 	if (r) {
2402 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2403 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2404 		goto init_failed;
2405 	}
2406 
2407 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2408 	if (r)
2409 		goto init_failed;
2410 
2411 	r = amdgpu_device_ip_hw_init_phase1(adev);
2412 	if (r)
2413 		goto init_failed;
2414 
2415 	r = amdgpu_device_fw_loading(adev);
2416 	if (r)
2417 		goto init_failed;
2418 
2419 	r = amdgpu_device_ip_hw_init_phase2(adev);
2420 	if (r)
2421 		goto init_failed;
2422 
2423 	/*
2424 	 * retired pages will be loaded from eeprom and reserved here,
2425 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2426 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2427 	 * for I2C communication which only true at this point.
2428 	 *
2429 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2430 	 * failure from bad gpu situation and stop amdgpu init process
2431 	 * accordingly. For other failed cases, it will still release all
2432 	 * the resource and print error message, rather than returning one
2433 	 * negative value to upper level.
2434 	 *
2435 	 * Note: theoretically, this should be called before all vram allocations
2436 	 * to protect retired page from abusing
2437 	 */
2438 	init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
2439 	r = amdgpu_ras_recovery_init(adev, init_badpage);
2440 	if (r)
2441 		goto init_failed;
2442 
2443 	/**
2444 	 * In case of XGMI grab extra reference for reset domain for this device
2445 	 */
2446 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2447 		if (amdgpu_xgmi_add_device(adev) == 0) {
2448 			if (!amdgpu_sriov_vf(adev)) {
2449 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2450 
2451 				if (WARN_ON(!hive)) {
2452 					r = -ENOENT;
2453 					goto init_failed;
2454 				}
2455 
2456 				if (!hive->reset_domain ||
2457 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2458 					r = -ENOENT;
2459 					amdgpu_put_xgmi_hive(hive);
2460 					goto init_failed;
2461 				}
2462 
2463 				/* Drop the early temporary reset domain we created for device */
2464 				amdgpu_reset_put_reset_domain(adev->reset_domain);
2465 				adev->reset_domain = hive->reset_domain;
2466 				amdgpu_put_xgmi_hive(hive);
2467 			}
2468 		}
2469 	}
2470 
2471 	r = amdgpu_device_init_schedulers(adev);
2472 	if (r)
2473 		goto init_failed;
2474 
2475 	amdgpu_ttm_enable_buffer_funcs(adev);
2476 
2477 	/* Don't init kfd if whole hive need to be reset during init */
2478 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
2479 		amdgpu_amdkfd_device_init(adev);
2480 	}
2481 
2482 	amdgpu_fru_get_product_info(adev);
2483 
2484 	r = amdgpu_cper_init(adev);
2485 
2486 init_failed:
2487 
2488 	return r;
2489 }
2490 
2491 /**
2492  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2493  *
2494  * @adev: amdgpu_device pointer
2495  *
2496  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2497  * this function before a GPU reset.  If the value is retained after a
2498  * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents.
2499  */
2500 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2501 {
2502 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2503 }
2504 
2505 /**
2506  * amdgpu_device_check_vram_lost - check if vram is valid
2507  *
2508  * @adev: amdgpu_device pointer
2509  *
2510  * Checks the reset magic value written to the gart pointer in VRAM.
2511  * The driver calls this after a GPU reset to see if the contents of
2512  * VRAM is lost or now.
2513  * returns true if vram is lost, false if not.
2514  */
2515 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2516 {
2517 	if (memcmp(adev->gart.ptr, adev->reset_magic,
2518 			AMDGPU_RESET_MAGIC_NUM))
2519 		return true;
2520 
2521 	if (!amdgpu_in_reset(adev))
2522 		return false;
2523 
2524 	/*
2525 	 * For all ASICs with baco/mode1 reset, the VRAM is
2526 	 * always assumed to be lost.
2527 	 */
2528 	switch (amdgpu_asic_reset_method(adev)) {
2529 	case AMD_RESET_METHOD_LEGACY:
2530 	case AMD_RESET_METHOD_LINK:
2531 	case AMD_RESET_METHOD_BACO:
2532 	case AMD_RESET_METHOD_MODE1:
2533 		return true;
2534 	default:
2535 		return false;
2536 	}
2537 }
2538 
2539 /**
2540  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2541  *
2542  * @adev: amdgpu_device pointer
2543  * @state: clockgating state (gate or ungate)
2544  *
2545  * The list of all the hardware IPs that make up the asic is walked and the
2546  * set_clockgating_state callbacks are run.
2547  * Late initialization pass enabling clockgating for hardware IPs.
2548  * Fini or suspend, pass disabling clockgating for hardware IPs.
2549  * Returns 0 on success, negative error code on failure.
2550  */
2551 
2552 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2553 			       enum amd_clockgating_state state)
2554 {
2555 	int i, j, r;
2556 
2557 	if (amdgpu_emu_mode == 1)
2558 		return 0;
2559 
2560 	for (j = 0; j < adev->num_ip_blocks; j++) {
2561 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2562 		if (!adev->ip_blocks[i].status.late_initialized)
2563 			continue;
2564 		if (!adev->ip_blocks[i].version)
2565 			continue;
2566 		/* skip CG for GFX, SDMA on S0ix */
2567 		if (adev->in_s0ix &&
2568 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2569 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2570 			continue;
2571 		/* skip CG for VCE/UVD, it's handled specially */
2572 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2573 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2574 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2575 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2576 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2577 			/* enable clockgating to save power */
2578 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i],
2579 										     state);
2580 			if (r) {
2581 				dev_err(adev->dev,
2582 					"set_clockgating_state(gate) of IP block <%s> failed %d\n",
2583 					adev->ip_blocks[i].version->funcs->name,
2584 					r);
2585 				return r;
2586 			}
2587 		}
2588 	}
2589 
2590 	return 0;
2591 }
2592 
2593 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2594 			       enum amd_powergating_state state)
2595 {
2596 	int i, j, r;
2597 
2598 	if (amdgpu_emu_mode == 1)
2599 		return 0;
2600 
2601 	for (j = 0; j < adev->num_ip_blocks; j++) {
2602 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2603 		if (!adev->ip_blocks[i].status.late_initialized)
2604 			continue;
2605 		if (!adev->ip_blocks[i].version)
2606 			continue;
2607 		/* skip PG for GFX, SDMA on S0ix */
2608 		if (adev->in_s0ix &&
2609 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2610 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2611 			continue;
2612 		/* skip CG for VCE/UVD, it's handled specially */
2613 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2614 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2615 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2616 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2617 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
2618 			/* enable powergating to save power */
2619 			r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i],
2620 											state);
2621 			if (r) {
2622 				dev_err(adev->dev,
2623 					"set_powergating_state(gate) of IP block <%s> failed %d\n",
2624 					adev->ip_blocks[i].version->funcs->name,
2625 					r);
2626 				return r;
2627 			}
2628 		}
2629 	}
2630 	return 0;
2631 }
2632 
2633 static int amdgpu_device_enable_mgpu_fan_boost(void)
2634 {
2635 	struct amdgpu_gpu_instance *gpu_ins;
2636 	struct amdgpu_device *adev;
2637 	int i, ret = 0;
2638 
2639 	mutex_lock(&mgpu_info.mutex);
2640 
2641 	/*
2642 	 * MGPU fan boost feature should be enabled
2643 	 * only when there are two or more dGPUs in
2644 	 * the system
2645 	 */
2646 	if (mgpu_info.num_dgpu < 2)
2647 		goto out;
2648 
2649 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
2650 		gpu_ins = &(mgpu_info.gpu_ins[i]);
2651 		adev = gpu_ins->adev;
2652 		if (!(adev->flags & AMD_IS_APU || amdgpu_sriov_multi_vf_mode(adev)) &&
2653 		    !gpu_ins->mgpu_fan_enabled) {
2654 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2655 			if (ret)
2656 				break;
2657 
2658 			gpu_ins->mgpu_fan_enabled = 1;
2659 		}
2660 	}
2661 
2662 out:
2663 	mutex_unlock(&mgpu_info.mutex);
2664 
2665 	return ret;
2666 }
2667 
2668 /**
2669  * amdgpu_device_ip_late_init - run late init for hardware IPs
2670  *
2671  * @adev: amdgpu_device pointer
2672  *
2673  * Late initialization pass for hardware IPs.  The list of all the hardware
2674  * IPs that make up the asic is walked and the late_init callbacks are run.
2675  * late_init covers any special initialization that an IP requires
2676  * after all of the have been initialized or something that needs to happen
2677  * late in the init process.
2678  * Returns 0 on success, negative error code on failure.
2679  */
2680 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2681 {
2682 	struct amdgpu_gpu_instance *gpu_instance;
2683 	int i = 0, r;
2684 
2685 	for (i = 0; i < adev->num_ip_blocks; i++) {
2686 		if (!adev->ip_blocks[i].status.hw)
2687 			continue;
2688 		if (adev->ip_blocks[i].version->funcs->late_init) {
2689 			r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
2690 			if (r) {
2691 				dev_err(adev->dev,
2692 					"late_init of IP block <%s> failed %d\n",
2693 					adev->ip_blocks[i].version->funcs->name,
2694 					r);
2695 				return r;
2696 			}
2697 		}
2698 		adev->ip_blocks[i].status.late_initialized = true;
2699 	}
2700 
2701 	r = amdgpu_ras_late_init(adev);
2702 	if (r) {
2703 		dev_err(adev->dev, "amdgpu_ras_late_init failed %d", r);
2704 		return r;
2705 	}
2706 
2707 	if (!amdgpu_reset_in_recovery(adev))
2708 		amdgpu_ras_set_error_query_ready(adev, true);
2709 
2710 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2711 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2712 
2713 	amdgpu_device_fill_reset_magic(adev);
2714 
2715 	r = amdgpu_device_enable_mgpu_fan_boost();
2716 	if (r)
2717 		dev_err(adev->dev, "enable mgpu fan boost failed (%d).\n", r);
2718 
2719 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2720 	if (amdgpu_passthrough(adev) &&
2721 	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2722 	     adev->asic_type == CHIP_ALDEBARAN))
2723 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
2724 
2725 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2726 		mutex_lock(&mgpu_info.mutex);
2727 
2728 		/*
2729 		 * Reset device p-state to low as this was booted with high.
2730 		 *
2731 		 * This should be performed only after all devices from the same
2732 		 * hive get initialized.
2733 		 *
2734 		 * However, it's unknown how many device in the hive in advance.
2735 		 * As this is counted one by one during devices initializations.
2736 		 *
2737 		 * So, we wait for all XGMI interlinked devices initialized.
2738 		 * This may bring some delays as those devices may come from
2739 		 * different hives. But that should be OK.
2740 		 */
2741 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2742 			for (i = 0; i < mgpu_info.num_gpu; i++) {
2743 				gpu_instance = &(mgpu_info.gpu_ins[i]);
2744 				if (gpu_instance->adev->flags & AMD_IS_APU)
2745 					continue;
2746 
2747 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2748 						AMDGPU_XGMI_PSTATE_MIN);
2749 				if (r) {
2750 					dev_err(adev->dev,
2751 						"pstate setting failed (%d).\n",
2752 						r);
2753 					break;
2754 				}
2755 			}
2756 		}
2757 
2758 		mutex_unlock(&mgpu_info.mutex);
2759 	}
2760 
2761 	return 0;
2762 }
2763 
2764 static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block)
2765 {
2766 	struct amdgpu_device *adev = ip_block->adev;
2767 	int r;
2768 
2769 	if (!ip_block->version->funcs->hw_fini) {
2770 		dev_err(adev->dev, "hw_fini of IP block <%s> not defined\n",
2771 			ip_block->version->funcs->name);
2772 	} else {
2773 		r = ip_block->version->funcs->hw_fini(ip_block);
2774 		/* XXX handle errors */
2775 		if (r) {
2776 			dev_dbg(adev->dev,
2777 				"hw_fini of IP block <%s> failed %d\n",
2778 				ip_block->version->funcs->name, r);
2779 		}
2780 	}
2781 
2782 	ip_block->status.hw = false;
2783 }
2784 
2785 /**
2786  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2787  *
2788  * @adev: amdgpu_device pointer
2789  *
2790  * For ASICs need to disable SMC first
2791  */
2792 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2793 {
2794 	int i;
2795 
2796 	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
2797 		return;
2798 
2799 	for (i = 0; i < adev->num_ip_blocks; i++) {
2800 		if (!adev->ip_blocks[i].status.hw)
2801 			continue;
2802 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2803 			amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
2804 			break;
2805 		}
2806 	}
2807 }
2808 
2809 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
2810 {
2811 	int i, r;
2812 
2813 	for (i = 0; i < adev->num_ip_blocks; i++) {
2814 		if (!adev->ip_blocks[i].version)
2815 			continue;
2816 		if (!adev->ip_blocks[i].version->funcs->early_fini)
2817 			continue;
2818 
2819 		r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
2820 		if (r) {
2821 			dev_dbg(adev->dev,
2822 				"early_fini of IP block <%s> failed %d\n",
2823 				adev->ip_blocks[i].version->funcs->name, r);
2824 		}
2825 	}
2826 
2827 	amdgpu_amdkfd_suspend(adev, true);
2828 	amdgpu_amdkfd_teardown_processes(adev);
2829 	amdgpu_userq_suspend(adev);
2830 
2831 	/* Workaround for ASICs need to disable SMC first */
2832 	amdgpu_device_smu_fini_early(adev);
2833 
2834 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2835 		if (!adev->ip_blocks[i].status.hw)
2836 			continue;
2837 
2838 		amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
2839 	}
2840 
2841 	if (amdgpu_sriov_vf(adev)) {
2842 		if (amdgpu_virt_release_full_gpu(adev, false))
2843 			dev_err(adev->dev,
2844 				"failed to release exclusive mode on fini\n");
2845 	}
2846 
2847 	/*
2848 	 * Driver reload on the APU can fail due to firmware validation because
2849 	 * the PSP is always running, as it is shared across the whole SoC.
2850 	 * This same issue does not occur on dGPU because it has a mechanism
2851 	 * that checks whether the PSP is running. A solution for those issues
2852 	 * in the APU is to trigger a GPU reset, but this should be done during
2853 	 * the unload phase to avoid adding boot latency and screen flicker.
2854 	 * GFX V11 has GC block as default off IP. Every time AMDGPU driver sends
2855 	 * a request to PMFW to unload MP1, PMFW will put GC in reset and power down
2856 	 * the voltage. Hence, skipping reset for APUs with GFX V11 or later.
2857 	 */
2858 	if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu &&
2859 		amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 0, 0)) {
2860 		r = amdgpu_asic_reset(adev);
2861 		if (r)
2862 			dev_err(adev->dev, "asic reset on %s failed\n", __func__);
2863 	}
2864 
2865 	return 0;
2866 }
2867 
2868 /**
2869  * amdgpu_device_ip_fini - run fini for hardware IPs
2870  *
2871  * @adev: amdgpu_device pointer
2872  *
2873  * Main teardown pass for hardware IPs.  The list of all the hardware
2874  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2875  * are run.  hw_fini tears down the hardware associated with each IP
2876  * and sw_fini tears down any software state associated with each IP.
2877  * Returns 0 on success, negative error code on failure.
2878  */
2879 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2880 {
2881 	int i, r;
2882 
2883 	amdgpu_cper_fini(adev);
2884 
2885 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2886 		amdgpu_virt_release_ras_err_handler_data(adev);
2887 
2888 	if (adev->gmc.xgmi.num_physical_nodes > 1)
2889 		amdgpu_xgmi_remove_device(adev);
2890 
2891 	amdgpu_amdkfd_device_fini_sw(adev);
2892 
2893 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2894 		if (!adev->ip_blocks[i].status.sw)
2895 			continue;
2896 
2897 		if (!adev->ip_blocks[i].version)
2898 			continue;
2899 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2900 			amdgpu_ucode_free_bo(adev);
2901 			amdgpu_free_static_csa(&adev->virt.csa_obj);
2902 			amdgpu_device_wb_fini(adev);
2903 			amdgpu_device_mem_scratch_fini(adev);
2904 			amdgpu_ib_pool_fini(adev);
2905 			amdgpu_seq64_fini(adev);
2906 			amdgpu_doorbell_fini(adev);
2907 		}
2908 		if (adev->ip_blocks[i].version->funcs->sw_fini) {
2909 			r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
2910 			/* XXX handle errors */
2911 			if (r) {
2912 				dev_dbg(adev->dev,
2913 					"sw_fini of IP block <%s> failed %d\n",
2914 					adev->ip_blocks[i].version->funcs->name,
2915 					r);
2916 			}
2917 		}
2918 		adev->ip_blocks[i].status.sw = false;
2919 		adev->ip_blocks[i].status.valid = false;
2920 	}
2921 
2922 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2923 		if (!adev->ip_blocks[i].status.late_initialized)
2924 			continue;
2925 		if (!adev->ip_blocks[i].version)
2926 			continue;
2927 		if (adev->ip_blocks[i].version->funcs->late_fini)
2928 			adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
2929 		adev->ip_blocks[i].status.late_initialized = false;
2930 	}
2931 
2932 	amdgpu_ras_fini(adev);
2933 	amdgpu_uid_fini(adev);
2934 
2935 	return 0;
2936 }
2937 
2938 /**
2939  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2940  *
2941  * @work: work_struct.
2942  */
2943 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2944 {
2945 	struct amdgpu_device *adev =
2946 		container_of(work, struct amdgpu_device, delayed_init_work.work);
2947 	int r;
2948 
2949 	r = amdgpu_ib_ring_tests(adev);
2950 	if (r)
2951 		dev_err(adev->dev, "ib ring test failed (%d).\n", r);
2952 }
2953 
2954 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2955 {
2956 	struct amdgpu_device *adev =
2957 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2958 
2959 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
2960 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2961 
2962 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0))
2963 		adev->gfx.gfx_off_state = true;
2964 }
2965 
2966 /**
2967  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2968  *
2969  * @adev: amdgpu_device pointer
2970  *
2971  * Main suspend function for hardware IPs.  The list of all the hardware
2972  * IPs that make up the asic is walked, clockgating is disabled and the
2973  * suspend callbacks are run.  suspend puts the hardware and software state
2974  * in each IP into a state suitable for suspend.
2975  * Returns 0 on success, negative error code on failure.
2976  */
2977 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2978 {
2979 	int i, r, rec;
2980 
2981 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2982 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2983 
2984 	/*
2985 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
2986 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2987 	 * scenario. Add the missing df cstate disablement here.
2988 	 */
2989 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2990 		dev_warn(adev->dev, "Failed to disallow df cstate");
2991 
2992 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2993 		if (!adev->ip_blocks[i].status.valid)
2994 			continue;
2995 
2996 		/* displays are handled separately */
2997 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2998 			continue;
2999 
3000 		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3001 		if (r)
3002 			goto unwind;
3003 	}
3004 
3005 	return 0;
3006 unwind:
3007 	rec = amdgpu_device_ip_resume_phase3(adev);
3008 	if (rec)
3009 		dev_err(adev->dev,
3010 			"amdgpu_device_ip_resume_phase3 failed during unwind: %d\n",
3011 			rec);
3012 
3013 	amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW);
3014 
3015 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3016 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3017 
3018 	return r;
3019 }
3020 
3021 /**
3022  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3023  *
3024  * @adev: amdgpu_device pointer
3025  *
3026  * Main suspend function for hardware IPs.  The list of all the hardware
3027  * IPs that make up the asic is walked, clockgating is disabled and the
3028  * suspend callbacks are run.  suspend puts the hardware and software state
3029  * in each IP into a state suitable for suspend.
3030  * Returns 0 on success, negative error code on failure.
3031  */
3032 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3033 {
3034 	int i, r, rec;
3035 
3036 	if (adev->in_s0ix)
3037 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3038 
3039 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3040 		if (!adev->ip_blocks[i].status.valid)
3041 			continue;
3042 		/* displays are handled in phase1 */
3043 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3044 			continue;
3045 		/* PSP lost connection when err_event_athub occurs */
3046 		if (amdgpu_ras_intr_triggered() &&
3047 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3048 			adev->ip_blocks[i].status.hw = false;
3049 			continue;
3050 		}
3051 
3052 		/* skip unnecessary suspend if we do not initialize them yet */
3053 		if (!amdgpu_ip_member_of_hwini(
3054 			    adev, adev->ip_blocks[i].version->type))
3055 			continue;
3056 
3057 		/* Since we skip suspend for S0i3, we need to cancel the delayed
3058 		 * idle work here as the suspend callback never gets called.
3059 		 */
3060 		if (adev->in_s0ix &&
3061 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX &&
3062 		    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
3063 			cancel_delayed_work_sync(&adev->gfx.idle_work);
3064 		/* skip suspend of gfx/mes and psp for S0ix
3065 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3066 		 * like at runtime. PSP is also part of the always on hardware
3067 		 * so no need to suspend it.
3068 		 */
3069 		if (adev->in_s0ix &&
3070 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3071 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3072 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3073 			continue;
3074 
3075 		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3076 		if (adev->in_s0ix &&
3077 		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3078 		     IP_VERSION(5, 0, 0)) &&
3079 		    (adev->ip_blocks[i].version->type ==
3080 		     AMD_IP_BLOCK_TYPE_SDMA))
3081 			continue;
3082 
3083 		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3084 		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3085 		 * from this location and RLC Autoload automatically also gets loaded
3086 		 * from here based on PMFW -> PSP message during re-init sequence.
3087 		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3088 		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3089 		 */
3090 		if (amdgpu_in_reset(adev) &&
3091 		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3092 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3093 			continue;
3094 
3095 		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3096 		if (r)
3097 			goto unwind;
3098 
3099 		/* handle putting the SMC in the appropriate state */
3100 		if (!amdgpu_sriov_vf(adev)) {
3101 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3102 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3103 				if (r) {
3104 					dev_err(adev->dev,
3105 						"SMC failed to set mp1 state %d, %d\n",
3106 						adev->mp1_state, r);
3107 					goto unwind;
3108 				}
3109 			}
3110 		}
3111 	}
3112 
3113 	return 0;
3114 unwind:
3115 	/* suspend phase 2 = resume phase 1 + resume phase 2 */
3116 	rec = amdgpu_device_ip_resume_phase1(adev);
3117 	if (rec) {
3118 		dev_err(adev->dev,
3119 			"amdgpu_device_ip_resume_phase1 failed during unwind: %d\n",
3120 			rec);
3121 		return r;
3122 	}
3123 
3124 	rec = amdgpu_device_fw_loading(adev);
3125 	if (rec) {
3126 		dev_err(adev->dev,
3127 			"amdgpu_device_fw_loading failed during unwind: %d\n",
3128 			rec);
3129 		return r;
3130 	}
3131 
3132 	rec = amdgpu_device_ip_resume_phase2(adev);
3133 	if (rec) {
3134 		dev_err(adev->dev,
3135 			"amdgpu_device_ip_resume_phase2 failed during unwind: %d\n",
3136 			rec);
3137 		return r;
3138 	}
3139 
3140 	return r;
3141 }
3142 
3143 /**
3144  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3145  *
3146  * @adev: amdgpu_device pointer
3147  *
3148  * Main suspend function for hardware IPs.  The list of all the hardware
3149  * IPs that make up the asic is walked, clockgating is disabled and the
3150  * suspend callbacks are run.  suspend puts the hardware and software state
3151  * in each IP into a state suitable for suspend.
3152  * Returns 0 on success, negative error code on failure.
3153  */
3154 static int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3155 {
3156 	int r;
3157 
3158 	if (amdgpu_sriov_vf(adev)) {
3159 		amdgpu_virt_fini_data_exchange(adev);
3160 		amdgpu_virt_request_full_gpu(adev, false);
3161 	}
3162 
3163 	amdgpu_ttm_disable_buffer_funcs(adev);
3164 
3165 	r = amdgpu_device_ip_suspend_phase1(adev);
3166 	if (r)
3167 		return r;
3168 	r = amdgpu_device_ip_suspend_phase2(adev);
3169 
3170 	if (amdgpu_sriov_vf(adev))
3171 		amdgpu_virt_release_full_gpu(adev, false);
3172 
3173 	return r;
3174 }
3175 
3176 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3177 {
3178 	int i, r;
3179 
3180 	static enum amd_ip_block_type ip_order[] = {
3181 		AMD_IP_BLOCK_TYPE_COMMON,
3182 		AMD_IP_BLOCK_TYPE_GMC,
3183 		AMD_IP_BLOCK_TYPE_PSP,
3184 		AMD_IP_BLOCK_TYPE_IH,
3185 	};
3186 
3187 	for (i = 0; i < adev->num_ip_blocks; i++) {
3188 		int j;
3189 		struct amdgpu_ip_block *block;
3190 
3191 		block = &adev->ip_blocks[i];
3192 		block->status.hw = false;
3193 
3194 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3195 
3196 			if (block->version->type != ip_order[j] ||
3197 				!block->status.valid)
3198 				continue;
3199 
3200 			r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
3201 			if (r) {
3202 				dev_err(adev->dev, "RE-INIT-early: %s failed\n",
3203 					 block->version->funcs->name);
3204 				return r;
3205 			}
3206 			block->status.hw = true;
3207 		}
3208 	}
3209 
3210 	return 0;
3211 }
3212 
3213 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3214 {
3215 	struct amdgpu_ip_block *block;
3216 	int i, r = 0;
3217 
3218 	static enum amd_ip_block_type ip_order[] = {
3219 		AMD_IP_BLOCK_TYPE_SMC,
3220 		AMD_IP_BLOCK_TYPE_DCE,
3221 		AMD_IP_BLOCK_TYPE_GFX,
3222 		AMD_IP_BLOCK_TYPE_SDMA,
3223 		AMD_IP_BLOCK_TYPE_MES,
3224 		AMD_IP_BLOCK_TYPE_UVD,
3225 		AMD_IP_BLOCK_TYPE_VCE,
3226 		AMD_IP_BLOCK_TYPE_VCN,
3227 		AMD_IP_BLOCK_TYPE_JPEG
3228 	};
3229 
3230 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3231 		block = amdgpu_device_ip_get_ip_block(adev, ip_order[i]);
3232 
3233 		if (!block)
3234 			continue;
3235 
3236 		if (block->status.valid && !block->status.hw) {
3237 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
3238 				r = amdgpu_ip_block_resume(block);
3239 			} else {
3240 				r = block->version->funcs->hw_init(block);
3241 			}
3242 
3243 			if (r) {
3244 				dev_err(adev->dev, "RE-INIT-late: %s failed\n",
3245 					 block->version->funcs->name);
3246 				break;
3247 			}
3248 			block->status.hw = true;
3249 		}
3250 	}
3251 
3252 	return r;
3253 }
3254 
3255 /**
3256  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3257  *
3258  * @adev: amdgpu_device pointer
3259  *
3260  * First resume function for hardware IPs.  The list of all the hardware
3261  * IPs that make up the asic is walked and the resume callbacks are run for
3262  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3263  * after a suspend and updates the software state as necessary.  This
3264  * function is also used for restoring the GPU after a GPU reset.
3265  * Returns 0 on success, negative error code on failure.
3266  */
3267 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3268 {
3269 	int i, r;
3270 
3271 	for (i = 0; i < adev->num_ip_blocks; i++) {
3272 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3273 			continue;
3274 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3275 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3276 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3277 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3278 
3279 			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3280 			if (r)
3281 				return r;
3282 		}
3283 	}
3284 
3285 	return 0;
3286 }
3287 
3288 /**
3289  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3290  *
3291  * @adev: amdgpu_device pointer
3292  *
3293  * Second resume function for hardware IPs.  The list of all the hardware
3294  * IPs that make up the asic is walked and the resume callbacks are run for
3295  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3296  * functional state after a suspend and updates the software state as
3297  * necessary.  This function is also used for restoring the GPU after a GPU
3298  * reset.
3299  * Returns 0 on success, negative error code on failure.
3300  */
3301 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3302 {
3303 	int i, r;
3304 
3305 	for (i = 0; i < adev->num_ip_blocks; i++) {
3306 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3307 			continue;
3308 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3309 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3310 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3311 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
3312 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3313 			continue;
3314 		r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3315 		if (r)
3316 			return r;
3317 	}
3318 
3319 	return 0;
3320 }
3321 
3322 /**
3323  * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
3324  *
3325  * @adev: amdgpu_device pointer
3326  *
3327  * Third resume function for hardware IPs.  The list of all the hardware
3328  * IPs that make up the asic is walked and the resume callbacks are run for
3329  * all DCE.  resume puts the hardware into a functional state after a suspend
3330  * and updates the software state as necessary.  This function is also used
3331  * for restoring the GPU after a GPU reset.
3332  *
3333  * Returns 0 on success, negative error code on failure.
3334  */
3335 static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
3336 {
3337 	int i, r;
3338 
3339 	for (i = 0; i < adev->num_ip_blocks; i++) {
3340 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3341 			continue;
3342 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
3343 			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3344 			if (r)
3345 				return r;
3346 		}
3347 	}
3348 
3349 	return 0;
3350 }
3351 
3352 /**
3353  * amdgpu_device_ip_resume - run resume for hardware IPs
3354  *
3355  * @adev: amdgpu_device pointer
3356  *
3357  * Main resume function for hardware IPs.  The hardware IPs
3358  * are split into two resume functions because they are
3359  * also used in recovering from a GPU reset and some additional
3360  * steps need to be take between them.  In this case (S3/S4) they are
3361  * run sequentially.
3362  * Returns 0 on success, negative error code on failure.
3363  */
3364 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3365 {
3366 	int r;
3367 
3368 	r = amdgpu_device_ip_resume_phase1(adev);
3369 	if (r)
3370 		return r;
3371 
3372 	r = amdgpu_device_fw_loading(adev);
3373 	if (r)
3374 		return r;
3375 
3376 	r = amdgpu_device_ip_resume_phase2(adev);
3377 
3378 	amdgpu_ttm_enable_buffer_funcs(adev);
3379 
3380 	if (r)
3381 		return r;
3382 
3383 	amdgpu_fence_driver_hw_init(adev);
3384 
3385 	r = amdgpu_device_ip_resume_phase3(adev);
3386 
3387 	return r;
3388 }
3389 
3390 /**
3391  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3392  *
3393  * @adev: amdgpu_device pointer
3394  *
3395  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3396  */
3397 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3398 {
3399 	if (amdgpu_sriov_vf(adev)) {
3400 		if (adev->is_atom_fw) {
3401 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3402 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3403 		} else {
3404 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3405 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3406 		}
3407 
3408 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3409 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3410 	}
3411 }
3412 
3413 /**
3414  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3415  *
3416  * @pdev : pci device context
3417  * @asic_type: AMD asic type
3418  *
3419  * Check if there is DC (new modesetting infrastructre) support for an asic.
3420  * returns true if DC has support, false if not.
3421  */
3422 bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
3423 				       enum amd_asic_type asic_type)
3424 {
3425 	switch (asic_type) {
3426 #ifdef CONFIG_DRM_AMDGPU_SI
3427 	case CHIP_HAINAN:
3428 #endif
3429 	case CHIP_TOPAZ:
3430 		/* chips with no display hardware */
3431 		return false;
3432 #if defined(CONFIG_DRM_AMD_DC)
3433 	case CHIP_TAHITI:
3434 	case CHIP_PITCAIRN:
3435 	case CHIP_VERDE:
3436 	case CHIP_OLAND:
3437 		return amdgpu_dc != 0 && IS_ENABLED(CONFIG_DRM_AMD_DC_SI);
3438 	default:
3439 		return amdgpu_dc != 0;
3440 #else
3441 	default:
3442 		if (amdgpu_dc > 0)
3443 			dev_info_once(
3444 				&pdev->dev,
3445 				"Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3446 		return false;
3447 #endif
3448 	}
3449 }
3450 
3451 /**
3452  * amdgpu_device_has_dc_support - check if dc is supported
3453  *
3454  * @adev: amdgpu_device pointer
3455  *
3456  * Returns true for supported, false for not supported
3457  */
3458 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3459 {
3460 	if (adev->enable_virtual_display ||
3461 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3462 		return false;
3463 
3464 	return amdgpu_device_asic_has_dc_support(adev->pdev, adev->asic_type);
3465 }
3466 
3467 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3468 {
3469 	struct amdgpu_device *adev =
3470 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3471 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3472 
3473 	/* It's a bug to not have a hive within this function */
3474 	if (WARN_ON(!hive))
3475 		return;
3476 
3477 	/*
3478 	 * Use task barrier to synchronize all xgmi reset works across the
3479 	 * hive. task_barrier_enter and task_barrier_exit will block
3480 	 * until all the threads running the xgmi reset works reach
3481 	 * those points. task_barrier_full will do both blocks.
3482 	 */
3483 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3484 
3485 		task_barrier_enter(&hive->tb);
3486 		adev->asic_reset_res = amdgpu_device_baco_enter(adev);
3487 
3488 		if (adev->asic_reset_res)
3489 			goto fail;
3490 
3491 		task_barrier_exit(&hive->tb);
3492 		adev->asic_reset_res = amdgpu_device_baco_exit(adev);
3493 
3494 		if (adev->asic_reset_res)
3495 			goto fail;
3496 
3497 		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3498 	} else {
3499 
3500 		task_barrier_full(&hive->tb);
3501 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3502 	}
3503 
3504 fail:
3505 	if (adev->asic_reset_res)
3506 		dev_warn(adev->dev,
3507 			 "ASIC reset failed with error, %d for drm dev, %s",
3508 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3509 	amdgpu_put_xgmi_hive(hive);
3510 }
3511 
3512 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3513 {
3514 	char buf[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH];
3515 	char *input = buf;
3516 	char *timeout_setting = NULL;
3517 	int index = 0;
3518 	long timeout;
3519 	int ret = 0;
3520 
3521 	/* By default timeout for all queues is 2 sec */
3522 	adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
3523 		adev->video_timeout = msecs_to_jiffies(2000);
3524 
3525 	if (!strnlen(amdgpu_lockup_timeout, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
3526 		return 0;
3527 
3528 	/*
3529 	 * strsep() destructively modifies its input by replacing delimiters
3530 	 * with '\0'. Use a stack copy so the global module parameter buffer
3531 	 * remains intact for multi-GPU systems where this function is called
3532 	 * once per device.
3533 	 */
3534 	strscpy(buf, amdgpu_lockup_timeout, sizeof(buf));
3535 
3536 	while ((timeout_setting = strsep(&input, ",")) &&
3537 	       strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3538 		ret = kstrtol(timeout_setting, 0, &timeout);
3539 		if (ret)
3540 			return ret;
3541 
3542 		if (timeout == 0) {
3543 			index++;
3544 			continue;
3545 		} else if (timeout < 0) {
3546 			timeout = MAX_SCHEDULE_TIMEOUT;
3547 			dev_warn(adev->dev, "lockup timeout disabled");
3548 			add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3549 		} else {
3550 			timeout = msecs_to_jiffies(timeout);
3551 		}
3552 
3553 		switch (index++) {
3554 		case 0:
3555 			adev->gfx_timeout = timeout;
3556 			break;
3557 		case 1:
3558 			adev->compute_timeout = timeout;
3559 			break;
3560 		case 2:
3561 			adev->sdma_timeout = timeout;
3562 			break;
3563 		case 3:
3564 			adev->video_timeout = timeout;
3565 			break;
3566 		default:
3567 			break;
3568 		}
3569 	}
3570 
3571 	/* When only one value specified apply it to all queues. */
3572 	if (index == 1)
3573 		adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
3574 			adev->video_timeout = timeout;
3575 
3576 	return ret;
3577 }
3578 
3579 /**
3580  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3581  *
3582  * @adev: amdgpu_device pointer
3583  *
3584  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3585  */
3586 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3587 {
3588 	struct iommu_domain *domain;
3589 
3590 	domain = iommu_get_domain_for_dev(adev->dev);
3591 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3592 		adev->ram_is_direct_mapped = true;
3593 }
3594 
3595 #if defined(CONFIG_HSA_AMD_P2P)
3596 /**
3597  * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
3598  *
3599  * @adev: amdgpu_device pointer
3600  *
3601  * return if IOMMU remapping bar address
3602  */
3603 static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
3604 {
3605 	struct iommu_domain *domain;
3606 
3607 	domain = iommu_get_domain_for_dev(adev->dev);
3608 	if (domain && (domain->type == IOMMU_DOMAIN_DMA ||
3609 		domain->type ==	IOMMU_DOMAIN_DMA_FQ))
3610 		return true;
3611 
3612 	return false;
3613 }
3614 #endif
3615 
3616 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3617 {
3618 	if (amdgpu_mcbp == 1)
3619 		adev->gfx.mcbp = true;
3620 	else if (amdgpu_mcbp == 0)
3621 		adev->gfx.mcbp = false;
3622 
3623 	if (amdgpu_sriov_vf(adev))
3624 		adev->gfx.mcbp = true;
3625 
3626 	if (adev->gfx.mcbp)
3627 		dev_info(adev->dev, "MCBP is enabled\n");
3628 }
3629 
3630 static int amdgpu_device_sys_interface_init(struct amdgpu_device *adev)
3631 {
3632 	int r;
3633 
3634 	r = amdgpu_atombios_sysfs_init(adev);
3635 	if (r)
3636 		drm_err(&adev->ddev,
3637 			"registering atombios sysfs failed (%d).\n", r);
3638 
3639 	r = amdgpu_pm_sysfs_init(adev);
3640 	if (r)
3641 		dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r);
3642 
3643 	r = amdgpu_ucode_sysfs_init(adev);
3644 	if (r) {
3645 		adev->ucode_sysfs_en = false;
3646 		dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r);
3647 	} else
3648 		adev->ucode_sysfs_en = true;
3649 
3650 	r = amdgpu_device_attr_sysfs_init(adev);
3651 	if (r)
3652 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
3653 
3654 	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
3655 	if (r)
3656 		dev_err(adev->dev,
3657 			"Could not create amdgpu board attributes\n");
3658 
3659 	amdgpu_fru_sysfs_init(adev);
3660 	amdgpu_reg_state_sysfs_init(adev);
3661 	amdgpu_xcp_sysfs_init(adev);
3662 	amdgpu_uma_sysfs_init(adev);
3663 	amdgpu_ptl_sysfs_init(adev);
3664 
3665 	return r;
3666 }
3667 
3668 static void amdgpu_device_sys_interface_fini(struct amdgpu_device *adev)
3669 {
3670 	if (adev->pm.sysfs_initialized)
3671 		amdgpu_pm_sysfs_fini(adev);
3672 	if (adev->ucode_sysfs_en)
3673 		amdgpu_ucode_sysfs_fini(adev);
3674 	amdgpu_device_attr_sysfs_fini(adev);
3675 	amdgpu_fru_sysfs_fini(adev);
3676 
3677 	amdgpu_reg_state_sysfs_fini(adev);
3678 	amdgpu_xcp_sysfs_fini(adev);
3679 	amdgpu_uma_sysfs_fini(adev);
3680 	amdgpu_ptl_sysfs_fini(adev);
3681 }
3682 
3683 /**
3684  * amdgpu_device_init - initialize the driver
3685  *
3686  * @adev: amdgpu_device pointer
3687  * @flags: driver flags
3688  *
3689  * Initializes the driver info and hw (all asics).
3690  * Returns 0 for success or an error on failure.
3691  * Called at driver startup.
3692  */
3693 int amdgpu_device_init(struct amdgpu_device *adev,
3694 		       uint32_t flags)
3695 {
3696 	struct pci_dev *pdev = adev->pdev;
3697 	int r, i;
3698 	bool px = false;
3699 	u32 max_MBps;
3700 	int tmp;
3701 
3702 	adev->shutdown = false;
3703 	adev->flags = flags;
3704 
3705 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3706 		adev->asic_type = amdgpu_force_asic_type;
3707 	else
3708 		adev->asic_type = flags & AMD_ASIC_MASK;
3709 
3710 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3711 	if (amdgpu_emu_mode == 1)
3712 		adev->usec_timeout *= 10;
3713 	adev->gmc.gart_size = 512 * 1024 * 1024;
3714 	adev->accel_working = false;
3715 	adev->num_rings = 0;
3716 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3717 	adev->mman.buffer_funcs = NULL;
3718 	adev->mman.num_buffer_funcs_scheds = 0;
3719 	adev->vm_manager.vm_pte_funcs = NULL;
3720 	adev->vm_manager.vm_pte_num_scheds = 0;
3721 	adev->gmc.gmc_funcs = NULL;
3722 	adev->harvest_ip_mask = 0x0;
3723 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3724 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3725 
3726 	amdgpu_reg_access_init(adev);
3727 
3728 	dev_info(
3729 		adev->dev,
3730 		"initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3731 		amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3732 		pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3733 
3734 	/* mutex initialization are all done here so we
3735 	 * can recall function without having locking issues
3736 	 */
3737 	mutex_init(&adev->firmware.mutex);
3738 	mutex_init(&adev->pm.mutex);
3739 	mutex_init(&adev->gfx.gpu_clock_mutex);
3740 	mutex_init(&adev->srbm_mutex);
3741 	mutex_init(&adev->gfx.pipe_reserve_mutex);
3742 	mutex_init(&adev->gfx.gfx_off_mutex);
3743 	mutex_init(&adev->gfx.partition_mutex);
3744 	mutex_init(&adev->grbm_idx_mutex);
3745 	mutex_init(&adev->mn_lock);
3746 	mutex_init(&adev->virt.vf_errors.lock);
3747 	hash_init(adev->mn_hash);
3748 	mutex_init(&adev->psp.mutex);
3749 	mutex_init(&adev->psp.ptl.mutex);
3750 	mutex_init(&adev->notifier_lock);
3751 	mutex_init(&adev->pm.stable_pstate_ctx_lock);
3752 	mutex_init(&adev->benchmark_mutex);
3753 	mutex_init(&adev->gfx.reset_sem_mutex);
3754 	/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
3755 	mutex_init(&adev->enforce_isolation_mutex);
3756 	for (i = 0; i < MAX_XCP; ++i) {
3757 		adev->isolation[i].spearhead = dma_fence_get_stub();
3758 		amdgpu_sync_create(&adev->isolation[i].active);
3759 		amdgpu_sync_create(&adev->isolation[i].prev);
3760 	}
3761 	mutex_init(&adev->gfx.userq_sch_mutex);
3762 	mutex_init(&adev->gfx.workload_profile_mutex);
3763 	mutex_init(&adev->vcn.workload_profile_mutex);
3764 
3765 	amdgpu_device_init_apu_flags(adev);
3766 
3767 	r = amdgpu_device_check_arguments(adev);
3768 	if (r)
3769 		return r;
3770 
3771 	spin_lock_init(&adev->mmio_idx_lock);
3772 	spin_lock_init(&adev->mm_stats.lock);
3773 	spin_lock_init(&adev->virt.rlcg_reg_lock);
3774 	spin_lock_init(&adev->wb.lock);
3775 
3776 	INIT_LIST_HEAD(&adev->reset_list);
3777 
3778 	INIT_LIST_HEAD(&adev->ras_list);
3779 
3780 	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3781 
3782 	xa_init_flags(&adev->userq_doorbell_xa, XA_FLAGS_LOCK_IRQ);
3783 
3784 	INIT_DELAYED_WORK(&adev->delayed_init_work,
3785 			  amdgpu_device_delayed_init_work_handler);
3786 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3787 			  amdgpu_device_delay_enable_gfx_off);
3788 	/*
3789 	 * Initialize the enforce_isolation work structures for each XCP
3790 	 * partition.  This work handler is responsible for enforcing shader
3791 	 * isolation on AMD GPUs.  It counts the number of emitted fences for
3792 	 * each GFX and compute ring.  If there are any fences, it schedules
3793 	 * the `enforce_isolation_work` to be run after a delay.  If there are
3794 	 * no fences, it signals the Kernel Fusion Driver (KFD) to resume the
3795 	 * runqueue.
3796 	 */
3797 	for (i = 0; i < MAX_XCP; i++) {
3798 		INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
3799 				  amdgpu_gfx_enforce_isolation_handler);
3800 		adev->gfx.enforce_isolation[i].adev = adev;
3801 		adev->gfx.enforce_isolation[i].xcp_id = i;
3802 	}
3803 
3804 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3805 
3806 	amdgpu_coredump_init(adev);
3807 
3808 	adev->gfx.gfx_off_req_count = 1;
3809 	adev->gfx.gfx_off_residency = 0;
3810 	adev->gfx.gfx_off_entrycount = 0;
3811 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3812 
3813 	atomic_set(&adev->throttling_logging_enabled, 1);
3814 	/*
3815 	 * If throttling continues, logging will be performed every minute
3816 	 * to avoid log flooding. "-1" is subtracted since the thermal
3817 	 * throttling interrupt comes every second. Thus, the total logging
3818 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3819 	 * for throttling interrupt) = 60 seconds.
3820 	 */
3821 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3822 
3823 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3824 
3825 	/* Registers mapping */
3826 	/* TODO: block userspace mapping of io register */
3827 	if (adev->asic_type >= CHIP_BONAIRE) {
3828 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3829 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3830 	} else {
3831 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3832 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3833 	}
3834 
3835 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3836 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3837 
3838 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3839 	if (!adev->rmmio)
3840 		return -ENOMEM;
3841 
3842 	dev_info(adev->dev, "register mmio base: 0x%08X\n",
3843 		 (uint32_t)adev->rmmio_base);
3844 	dev_info(adev->dev, "register mmio size: %u\n",
3845 		 (unsigned int)adev->rmmio_size);
3846 
3847 	/*
3848 	 * Reset domain needs to be present early, before XGMI hive discovered
3849 	 * (if any) and initialized to use reset sem and in_gpu reset flag
3850 	 * early on during init and before calling to RREG32.
3851 	 */
3852 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3853 	if (!adev->reset_domain)
3854 		return -ENOMEM;
3855 
3856 	/* detect hw virtualization here */
3857 	amdgpu_virt_init(adev);
3858 
3859 	amdgpu_device_get_pcie_info(adev);
3860 
3861 	r = amdgpu_device_get_job_timeout_settings(adev);
3862 	if (r) {
3863 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3864 		return r;
3865 	}
3866 
3867 	amdgpu_device_set_mcbp(adev);
3868 
3869 	/*
3870 	 * By default, use default mode where all blocks are expected to be
3871 	 * initialized. At present a 'swinit' of blocks is required to be
3872 	 * completed before the need for a different level is detected.
3873 	 */
3874 	amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
3875 
3876 	amdgpu_device_check_iommu_direct_map(adev);
3877 
3878 	/* early init functions */
3879 	r = amdgpu_device_ip_early_init(adev);
3880 	if (r)
3881 		return r;
3882 
3883 	/*
3884 	 * No need to remove conflicting FBs for non-display class devices.
3885 	 * This prevents the sysfb from being freed accidently.
3886 	 */
3887 	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
3888 	    (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
3889 		/* Get rid of things like offb */
3890 		r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name);
3891 		if (r)
3892 			return r;
3893 	}
3894 
3895 	/* Enable TMZ based on IP_VERSION */
3896 	amdgpu_gmc_tmz_set(adev);
3897 
3898 	if (amdgpu_sriov_vf(adev) &&
3899 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
3900 		/* VF MMIO access (except mailbox range) from CPU
3901 		 * will be blocked during sriov runtime
3902 		 */
3903 		adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
3904 
3905 	amdgpu_gmc_noretry_set(adev);
3906 	/* Need to get xgmi info early to decide the reset behavior*/
3907 	if (adev->gmc.xgmi.supported) {
3908 		if (adev->gfxhub.funcs &&
3909 		    adev->gfxhub.funcs->get_xgmi_info) {
3910 			r = adev->gfxhub.funcs->get_xgmi_info(adev);
3911 			if (r)
3912 				return r;
3913 		}
3914 	}
3915 
3916 	if (adev->gmc.xgmi.connected_to_cpu) {
3917 		if (adev->mmhub.funcs &&
3918 		    adev->mmhub.funcs->get_xgmi_info) {
3919 			r = adev->mmhub.funcs->get_xgmi_info(adev);
3920 			if (r)
3921 				return r;
3922 		}
3923 	}
3924 
3925 	/* enable PCIE atomic ops */
3926 	if (amdgpu_sriov_vf(adev)) {
3927 		if (adev->virt.fw_reserve.p_pf2vf)
3928 			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3929 						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3930 				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3931 	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3932 	 * internal path natively support atomics, set have_atomics_support to true.
3933 	 */
3934 	} else if ((adev->flags & AMD_IS_APU &&
3935 		   amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0)) ||
3936 		   (adev->gmc.xgmi.connected_to_cpu &&
3937 		   amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 1, 0))) {
3938 		adev->have_atomics_support = true;
3939 	} else {
3940 		adev->have_atomics_support =
3941 			!pci_enable_atomic_ops_to_root(adev->pdev,
3942 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3943 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3944 	}
3945 
3946 	if (!adev->have_atomics_support)
3947 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3948 
3949 	/* doorbell bar mapping and doorbell index init*/
3950 	amdgpu_doorbell_init(adev);
3951 
3952 	if (amdgpu_emu_mode == 1) {
3953 		/* post the asic on emulation mode */
3954 		emu_soc_asic_init(adev);
3955 		goto fence_driver_init;
3956 	}
3957 
3958 	amdgpu_reset_init(adev);
3959 
3960 	/* detect if we are with an SRIOV vbios */
3961 	if (adev->bios)
3962 		amdgpu_device_detect_sriov_bios(adev);
3963 
3964 	/* check if we need to reset the asic
3965 	 *  E.g., driver was not cleanly unloaded previously, etc.
3966 	 */
3967 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3968 		if (adev->gmc.xgmi.num_physical_nodes) {
3969 			dev_info(adev->dev, "Pending hive reset.\n");
3970 			amdgpu_set_init_level(adev,
3971 					      AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
3972 		} else {
3973 				tmp = amdgpu_reset_method;
3974 				/* It should do a default reset when loading or reloading the driver,
3975 				 * regardless of the module parameter reset_method.
3976 				 */
3977 				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
3978 				r = amdgpu_asic_reset(adev);
3979 				amdgpu_reset_method = tmp;
3980 		}
3981 
3982 		if (r) {
3983 		  dev_err(adev->dev, "asic reset on init failed\n");
3984 		  goto failed;
3985 		}
3986 	}
3987 
3988 	/* Post card if necessary */
3989 	if (amdgpu_device_need_post(adev)) {
3990 		if (!adev->bios) {
3991 			dev_err(adev->dev, "no vBIOS found\n");
3992 			r = -EINVAL;
3993 			goto failed;
3994 		}
3995 		dev_info(adev->dev, "GPU posting now...\n");
3996 		r = amdgpu_device_asic_init(adev);
3997 		if (r) {
3998 			dev_err(adev->dev, "gpu post error!\n");
3999 			goto failed;
4000 		}
4001 	}
4002 
4003 	if (adev->bios) {
4004 		if (adev->is_atom_fw) {
4005 			/* Initialize clocks */
4006 			r = amdgpu_atomfirmware_get_clock_info(adev);
4007 			if (r) {
4008 				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4009 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4010 				goto failed;
4011 			}
4012 		} else {
4013 			/* Initialize clocks */
4014 			r = amdgpu_atombios_get_clock_info(adev);
4015 			if (r) {
4016 				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4017 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4018 				goto failed;
4019 			}
4020 			/* init i2c buses */
4021 			amdgpu_i2c_init(adev);
4022 		}
4023 	}
4024 
4025 fence_driver_init:
4026 	/* Fence driver */
4027 	r = amdgpu_fence_driver_sw_init(adev);
4028 	if (r) {
4029 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4030 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4031 		goto failed;
4032 	}
4033 
4034 	/* init the mode config */
4035 	drm_mode_config_init(adev_to_drm(adev));
4036 
4037 	r = amdgpu_device_ip_init(adev);
4038 	if (r) {
4039 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4040 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4041 		goto release_ras_con;
4042 	}
4043 
4044 	amdgpu_fence_driver_hw_init(adev);
4045 
4046 	dev_info(adev->dev,
4047 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4048 			adev->gfx.config.max_shader_engines,
4049 			adev->gfx.config.max_sh_per_se,
4050 			adev->gfx.config.max_cu_per_sh,
4051 			adev->gfx.cu_info.number);
4052 
4053 	adev->accel_working = true;
4054 
4055 	amdgpu_vm_check_compute_bug(adev);
4056 
4057 	/* Initialize the buffer migration limit. */
4058 	if (amdgpu_moverate >= 0)
4059 		max_MBps = amdgpu_moverate;
4060 	else
4061 		max_MBps = 8; /* Allow 8 MB/s. */
4062 	/* Get a log2 for easy divisions. */
4063 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4064 
4065 	/*
4066 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4067 	 * Otherwise the mgpu fan boost feature will be skipped due to the
4068 	 * gpu instance is counted less.
4069 	 */
4070 	amdgpu_register_gpu_instance(adev);
4071 
4072 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4073 	 * explicit gating rather than handling it automatically.
4074 	 */
4075 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4076 		r = amdgpu_device_ip_late_init(adev);
4077 		if (r) {
4078 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4079 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4080 			goto release_ras_con;
4081 		}
4082 		/* must succeed. */
4083 		amdgpu_ras_resume(adev);
4084 		queue_delayed_work(system_dfl_wq, &adev->delayed_init_work,
4085 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4086 	}
4087 
4088 	if (amdgpu_sriov_vf(adev)) {
4089 		amdgpu_virt_release_full_gpu(adev, true);
4090 		flush_delayed_work(&adev->delayed_init_work);
4091 	}
4092 
4093 	/* Don't init kfd if whole hive need to be reset during init */
4094 	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4095 		kgd2kfd_init_zone_device(adev);
4096 		kfd_update_svm_support_properties(adev);
4097 	}
4098 
4099 	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
4100 		amdgpu_xgmi_reset_on_init(adev);
4101 
4102 	/*
4103 	 * Place those sysfs registering after `late_init`. As some of those
4104 	 * operations performed in `late_init` might affect the sysfs
4105 	 * interfaces creating.
4106 	 */
4107 	r = amdgpu_device_sys_interface_init(adev);
4108 
4109 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4110 		r = amdgpu_pmu_init(adev);
4111 	if (r)
4112 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4113 
4114 	/* Have stored pci confspace at hand for restore in sudden PCI error */
4115 	if (amdgpu_device_cache_pci_state(adev->pdev))
4116 		pci_restore_state(pdev);
4117 
4118 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4119 	/* this will fail for cards that aren't VGA class devices, just
4120 	 * ignore it
4121 	 */
4122 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4123 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4124 
4125 	px = amdgpu_device_supports_px(adev);
4126 
4127 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4128 				apple_gmux_detect(NULL, NULL)))
4129 		vga_switcheroo_register_client(adev->pdev,
4130 					       &amdgpu_switcheroo_ops, px);
4131 
4132 	if (px)
4133 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4134 
4135 	adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
4136 	r = register_pm_notifier(&adev->pm_nb);
4137 	if (r)
4138 		goto failed;
4139 
4140 	return 0;
4141 
4142 release_ras_con:
4143 	if (amdgpu_sriov_vf(adev))
4144 		amdgpu_virt_release_full_gpu(adev, true);
4145 
4146 	/* failed in exclusive mode due to timeout */
4147 	if (amdgpu_sriov_vf(adev) &&
4148 		!amdgpu_sriov_runtime(adev) &&
4149 		amdgpu_virt_mmio_blocked(adev) &&
4150 		!amdgpu_virt_wait_reset(adev)) {
4151 		dev_err(adev->dev, "VF exclusive mode timeout\n");
4152 		/* Don't send request since VF is inactive. */
4153 		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4154 		adev->virt.ops = NULL;
4155 		r = -EAGAIN;
4156 	}
4157 	amdgpu_release_ras_context(adev);
4158 
4159 failed:
4160 	amdgpu_vf_error_trans_all(adev);
4161 
4162 	return r;
4163 }
4164 
4165 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4166 {
4167 
4168 	/* Clear all CPU mappings pointing to this device */
4169 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4170 
4171 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4172 	amdgpu_doorbell_fini(adev);
4173 
4174 	iounmap(adev->rmmio);
4175 	adev->rmmio = NULL;
4176 	if (adev->mman.aper_base_kaddr)
4177 		iounmap(adev->mman.aper_base_kaddr);
4178 	adev->mman.aper_base_kaddr = NULL;
4179 
4180 	/* Memory manager related */
4181 	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4182 		arch_phys_wc_del(adev->gmc.vram_mtrr);
4183 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4184 	}
4185 }
4186 
4187 /**
4188  * amdgpu_device_fini_hw - tear down the driver
4189  *
4190  * @adev: amdgpu_device pointer
4191  *
4192  * Tear down the driver info (all asics).
4193  * Called at driver shutdown.
4194  */
4195 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4196 {
4197 	dev_info(adev->dev, "finishing device.\n");
4198 	flush_delayed_work(&adev->delayed_init_work);
4199 
4200 	if (adev->mman.initialized)
4201 		drain_workqueue(adev->mman.bdev.wq);
4202 	adev->shutdown = true;
4203 
4204 	unregister_pm_notifier(&adev->pm_nb);
4205 
4206 	/* make sure IB test finished before entering exclusive mode
4207 	 * to avoid preemption on IB test
4208 	 */
4209 	if (amdgpu_sriov_vf(adev)) {
4210 		amdgpu_virt_request_full_gpu(adev, false);
4211 		amdgpu_virt_fini_data_exchange(adev);
4212 	}
4213 
4214 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
4215 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
4216 
4217 	/* disable all interrupts */
4218 	amdgpu_irq_disable_all(adev);
4219 	if (adev->mode_info.mode_config_initialized) {
4220 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4221 			drm_helper_force_disable_all(adev_to_drm(adev));
4222 		else
4223 			drm_atomic_helper_shutdown(adev_to_drm(adev));
4224 	}
4225 	amdgpu_fence_driver_hw_fini(adev);
4226 
4227 	amdgpu_device_sys_interface_fini(adev);
4228 
4229 	/* disable ras feature must before hw fini */
4230 	amdgpu_ras_pre_fini(adev);
4231 
4232 	amdgpu_ttm_disable_buffer_funcs(adev);
4233 
4234 	/*
4235 	 * device went through surprise hotplug; we need to destroy topology
4236 	 * before ip_fini_early to prevent kfd locking refcount issues by calling
4237 	 * amdgpu_amdkfd_suspend()
4238 	 */
4239 	if (pci_dev_is_disconnected(adev->pdev))
4240 		amdgpu_amdkfd_device_fini_sw(adev);
4241 
4242 	amdgpu_coredump_fini(adev);
4243 	amdgpu_device_ip_fini_early(adev);
4244 
4245 	amdgpu_irq_fini_hw(adev);
4246 
4247 	if (adev->mman.initialized)
4248 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4249 
4250 	amdgpu_gart_dummy_page_fini(adev);
4251 
4252 	if (pci_dev_is_disconnected(adev->pdev))
4253 		amdgpu_device_unmap_mmio(adev);
4254 
4255 }
4256 
4257 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4258 {
4259 	int i, idx;
4260 	bool px;
4261 
4262 	amdgpu_device_ip_fini(adev);
4263 	amdgpu_fence_driver_sw_fini(adev);
4264 	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4265 	adev->accel_working = false;
4266 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4267 	for (i = 0; i < MAX_XCP; ++i) {
4268 		dma_fence_put(adev->isolation[i].spearhead);
4269 		amdgpu_sync_free(&adev->isolation[i].active);
4270 		amdgpu_sync_free(&adev->isolation[i].prev);
4271 	}
4272 
4273 	amdgpu_reset_fini(adev);
4274 
4275 	/* free i2c buses */
4276 	amdgpu_i2c_fini(adev);
4277 
4278 	if (adev->bios) {
4279 		if (amdgpu_emu_mode != 1)
4280 			amdgpu_atombios_fini(adev);
4281 		amdgpu_bios_release(adev);
4282 	}
4283 
4284 	kfree(adev->fru_info);
4285 	adev->fru_info = NULL;
4286 
4287 	kfree(adev->xcp_mgr);
4288 	adev->xcp_mgr = NULL;
4289 
4290 	px = amdgpu_device_supports_px(adev);
4291 
4292 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4293 				apple_gmux_detect(NULL, NULL)))
4294 		vga_switcheroo_unregister_client(adev->pdev);
4295 
4296 	if (px)
4297 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4298 
4299 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4300 		vga_client_unregister(adev->pdev);
4301 
4302 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4303 
4304 		iounmap(adev->rmmio);
4305 		adev->rmmio = NULL;
4306 		drm_dev_exit(idx);
4307 	}
4308 
4309 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4310 		amdgpu_pmu_fini(adev);
4311 	if (adev->discovery.bin)
4312 		amdgpu_discovery_fini(adev);
4313 
4314 	amdgpu_reset_put_reset_domain(adev->reset_domain);
4315 	adev->reset_domain = NULL;
4316 
4317 	kfree(adev->pci_state);
4318 	kfree(adev->pcie_reset_ctx.swds_pcistate);
4319 	kfree(adev->pcie_reset_ctx.swus_pcistate);
4320 }
4321 
4322 /**
4323  * amdgpu_device_evict_resources - evict device resources
4324  * @adev: amdgpu device object
4325  *
4326  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4327  * of the vram memory type. Mainly used for evicting device resources
4328  * at suspend time.
4329  *
4330  */
4331 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4332 {
4333 	int ret;
4334 
4335 	/* No need to evict vram on APUs unless going to S4 */
4336 	if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
4337 		return 0;
4338 
4339 	/* No need to evict when going to S5 through S4 callbacks */
4340 	if (system_state == SYSTEM_POWER_OFF)
4341 		return 0;
4342 
4343 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4344 	if (ret) {
4345 		dev_warn(adev->dev, "evicting device resources failed\n");
4346 		return ret;
4347 	}
4348 
4349 	if (adev->in_s4) {
4350 		ret = ttm_device_prepare_hibernation(&adev->mman.bdev);
4351 		if (ret)
4352 			dev_err(adev->dev, "prepare hibernation failed, %d\n", ret);
4353 	}
4354 	return ret;
4355 }
4356 
4357 /*
4358  * Suspend & resume.
4359  */
4360 /**
4361  * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events
4362  * @nb: notifier block
4363  * @mode: suspend mode
4364  * @data: data
4365  *
4366  * This function is called when the system is about to suspend or hibernate.
4367  * It is used to set the appropriate flags so that eviction can be optimized
4368  * in the pm prepare callback.
4369  */
4370 static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
4371 				     void *data)
4372 {
4373 	struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
4374 
4375 	switch (mode) {
4376 	case PM_HIBERNATION_PREPARE:
4377 		adev->in_s4 = true;
4378 		break;
4379 	case PM_POST_HIBERNATION:
4380 		adev->in_s4 = false;
4381 		break;
4382 	}
4383 
4384 	return NOTIFY_DONE;
4385 }
4386 
4387 /**
4388  * amdgpu_device_prepare - prepare for device suspend
4389  *
4390  * @dev: drm dev pointer
4391  *
4392  * Prepare to put the hw in the suspend state (all asics).
4393  * Returns 0 for success or an error on failure.
4394  * Called at driver suspend.
4395  */
4396 int amdgpu_device_prepare(struct drm_device *dev)
4397 {
4398 	struct amdgpu_device *adev = drm_to_adev(dev);
4399 	int i, r;
4400 
4401 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4402 		return 0;
4403 
4404 	/* Evict the majority of BOs before starting suspend sequence */
4405 	r = amdgpu_device_evict_resources(adev);
4406 	if (r)
4407 		return r;
4408 
4409 	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4410 
4411 	for (i = 0; i < adev->num_ip_blocks; i++) {
4412 		if (!adev->ip_blocks[i].status.valid)
4413 			continue;
4414 		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4415 			continue;
4416 		r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
4417 		if (r)
4418 			return r;
4419 	}
4420 
4421 	return 0;
4422 }
4423 
4424 /**
4425  * amdgpu_device_complete - complete power state transition
4426  *
4427  * @dev: drm dev pointer
4428  *
4429  * Undo the changes from amdgpu_device_prepare. This will be
4430  * called on all resume transitions, including those that failed.
4431  */
4432 void amdgpu_device_complete(struct drm_device *dev)
4433 {
4434 	struct amdgpu_device *adev = drm_to_adev(dev);
4435 	int i;
4436 
4437 	for (i = 0; i < adev->num_ip_blocks; i++) {
4438 		if (!adev->ip_blocks[i].status.valid)
4439 			continue;
4440 		if (!adev->ip_blocks[i].version->funcs->complete)
4441 			continue;
4442 		adev->ip_blocks[i].version->funcs->complete(&adev->ip_blocks[i]);
4443 	}
4444 }
4445 
4446 /**
4447  * amdgpu_device_suspend - initiate device suspend
4448  *
4449  * @dev: drm dev pointer
4450  * @notify_clients: notify in-kernel DRM clients
4451  *
4452  * Puts the hw in the suspend state (all asics).
4453  * Returns 0 for success or an error on failure.
4454  * Called at driver suspend.
4455  */
4456 int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
4457 {
4458 	struct amdgpu_device *adev = drm_to_adev(dev);
4459 	int r, rec;
4460 
4461 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4462 		return 0;
4463 
4464 	adev->in_suspend = true;
4465 
4466 	if (amdgpu_sriov_vf(adev)) {
4467 		if (!adev->in_runpm)
4468 			amdgpu_amdkfd_suspend_process(adev);
4469 		amdgpu_virt_fini_data_exchange(adev);
4470 		r = amdgpu_virt_request_full_gpu(adev, false);
4471 		if (r)
4472 			return r;
4473 	}
4474 
4475 	r = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3);
4476 	if (r)
4477 		goto unwind_sriov;
4478 
4479 	if (notify_clients)
4480 		drm_client_dev_suspend(adev_to_drm(adev));
4481 
4482 	cancel_delayed_work_sync(&adev->delayed_init_work);
4483 
4484 	amdgpu_ras_suspend(adev);
4485 
4486 	r = amdgpu_device_ip_suspend_phase1(adev);
4487 	if (r)
4488 		goto unwind_smartshift;
4489 
4490 	amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4491 	r = amdgpu_userq_suspend(adev);
4492 	if (r)
4493 		goto unwind_ip_phase1;
4494 
4495 	r = amdgpu_device_evict_resources(adev);
4496 	if (r)
4497 		goto unwind_userq;
4498 
4499 	amdgpu_ttm_disable_buffer_funcs(adev);
4500 
4501 	amdgpu_fence_driver_hw_fini(adev);
4502 
4503 	r = amdgpu_device_ip_suspend_phase2(adev);
4504 	if (r)
4505 		goto unwind_evict;
4506 
4507 	if (amdgpu_sriov_vf(adev))
4508 		amdgpu_virt_release_full_gpu(adev, false);
4509 
4510 	return 0;
4511 
4512 unwind_evict:
4513 	amdgpu_ttm_enable_buffer_funcs(adev);
4514 	amdgpu_fence_driver_hw_init(adev);
4515 
4516 unwind_userq:
4517 	rec = amdgpu_userq_resume(adev);
4518 	if (rec) {
4519 		dev_warn(adev->dev, "failed to re-initialize user queues: %d\n", rec);
4520 		return r;
4521 	}
4522 	rec = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4523 	if (rec) {
4524 		dev_warn(adev->dev, "failed to re-initialize kfd: %d\n", rec);
4525 		return r;
4526 	}
4527 
4528 unwind_ip_phase1:
4529 	/* suspend phase 1 = resume phase 3 */
4530 	rec = amdgpu_device_ip_resume_phase3(adev);
4531 	if (rec) {
4532 		dev_warn(adev->dev, "failed to re-initialize IPs phase1: %d\n", rec);
4533 		return r;
4534 	}
4535 
4536 unwind_smartshift:
4537 	rec = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0);
4538 	if (rec) {
4539 		dev_warn(adev->dev, "failed to re-update smart shift: %d\n", rec);
4540 		return r;
4541 	}
4542 
4543 	if (notify_clients)
4544 		drm_client_dev_resume(adev_to_drm(adev));
4545 
4546 	amdgpu_ras_resume(adev);
4547 
4548 unwind_sriov:
4549 	if (amdgpu_sriov_vf(adev)) {
4550 		rec = amdgpu_virt_request_full_gpu(adev, true);
4551 		if (rec) {
4552 			dev_warn(adev->dev, "failed to reinitialize sriov: %d\n", rec);
4553 			return r;
4554 		}
4555 	}
4556 
4557 	adev->in_suspend = adev->in_s0ix = adev->in_s3 = false;
4558 
4559 	return r;
4560 }
4561 
4562 static inline int amdgpu_virt_resume(struct amdgpu_device *adev)
4563 {
4564 	int r;
4565 	unsigned int prev_physical_node_id = adev->gmc.xgmi.physical_node_id;
4566 
4567 	/* During VM resume, QEMU programming of VF MSIX table (register GFXMSIX_VECT0_ADDR_LO)
4568 	 * may not work. The access could be blocked by nBIF protection as VF isn't in
4569 	 * exclusive access mode. Exclusive access is enabled now, disable/enable MSIX
4570 	 * so that QEMU reprograms MSIX table.
4571 	 */
4572 	amdgpu_restore_msix(adev);
4573 
4574 	r = adev->gfxhub.funcs->get_xgmi_info(adev);
4575 	if (r)
4576 		return r;
4577 
4578 	dev_info(adev->dev, "xgmi node, old id %d, new id %d\n",
4579 		prev_physical_node_id, adev->gmc.xgmi.physical_node_id);
4580 
4581 	adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
4582 	adev->vm_manager.vram_base_offset +=
4583 		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
4584 
4585 	return 0;
4586 }
4587 
4588 /**
4589  * amdgpu_device_resume - initiate device resume
4590  *
4591  * @dev: drm dev pointer
4592  * @notify_clients: notify in-kernel DRM clients
4593  *
4594  * Bring the hw back to operating state (all asics).
4595  * Returns 0 for success or an error on failure.
4596  * Called at driver resume.
4597  */
4598 int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
4599 {
4600 	struct amdgpu_device *adev = drm_to_adev(dev);
4601 	int r = 0;
4602 
4603 	if (amdgpu_sriov_vf(adev)) {
4604 		r = amdgpu_virt_request_full_gpu(adev, true);
4605 		if (r)
4606 			return r;
4607 	}
4608 
4609 	if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
4610 		r = amdgpu_virt_resume(adev);
4611 		if (r)
4612 			goto exit;
4613 	}
4614 
4615 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4616 		return 0;
4617 
4618 	if (adev->in_s0ix)
4619 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4620 
4621 	/* post card */
4622 	if (amdgpu_device_need_post(adev)) {
4623 		r = amdgpu_device_asic_init(adev);
4624 		if (r)
4625 			dev_err(adev->dev, "amdgpu asic init failed\n");
4626 	}
4627 
4628 	r = amdgpu_device_ip_resume(adev);
4629 
4630 	if (r) {
4631 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4632 		goto exit;
4633 	}
4634 
4635 	r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
4636 	if (r)
4637 		goto exit;
4638 
4639 	r = amdgpu_userq_resume(adev);
4640 	if (r)
4641 		goto exit;
4642 
4643 	r = amdgpu_device_ip_late_init(adev);
4644 	if (r)
4645 		goto exit;
4646 
4647 	queue_delayed_work(system_dfl_wq, &adev->delayed_init_work,
4648 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4649 exit:
4650 	if (amdgpu_sriov_vf(adev)) {
4651 		amdgpu_virt_init_data_exchange(adev);
4652 		amdgpu_virt_release_full_gpu(adev, true);
4653 
4654 		if (!r && !adev->in_runpm)
4655 			r = amdgpu_amdkfd_resume_process(adev);
4656 	}
4657 
4658 	if (r)
4659 		return r;
4660 
4661 	/* Make sure IB tests flushed */
4662 	flush_delayed_work(&adev->delayed_init_work);
4663 
4664 	if (notify_clients)
4665 		drm_client_dev_resume(adev_to_drm(adev));
4666 
4667 	amdgpu_ras_resume(adev);
4668 
4669 	if (adev->mode_info.num_crtc) {
4670 		/*
4671 		 * Most of the connector probing functions try to acquire runtime pm
4672 		 * refs to ensure that the GPU is powered on when connector polling is
4673 		 * performed. Since we're calling this from a runtime PM callback,
4674 		 * trying to acquire rpm refs will cause us to deadlock.
4675 		 *
4676 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
4677 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
4678 		 */
4679 #ifdef CONFIG_PM
4680 		dev->dev->power.disable_depth++;
4681 #endif
4682 		if (!adev->dc_enabled)
4683 			drm_helper_hpd_irq_event(dev);
4684 		else
4685 			drm_kms_helper_hotplug_event(dev);
4686 #ifdef CONFIG_PM
4687 		dev->dev->power.disable_depth--;
4688 #endif
4689 	}
4690 
4691 	amdgpu_vram_mgr_clear_reset_blocks(adev);
4692 	adev->in_suspend = false;
4693 
4694 	if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0))
4695 		dev_warn(adev->dev, "smart shift update failed\n");
4696 
4697 	return 0;
4698 }
4699 
4700 /**
4701  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4702  *
4703  * @adev: amdgpu_device pointer
4704  *
4705  * The list of all the hardware IPs that make up the asic is walked and
4706  * the check_soft_reset callbacks are run.  check_soft_reset determines
4707  * if the asic is still hung or not.
4708  * Returns true if any of the IPs are still in a hung state, false if not.
4709  */
4710 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4711 {
4712 	int i;
4713 	bool asic_hang = false;
4714 
4715 	if (amdgpu_sriov_vf(adev))
4716 		return true;
4717 
4718 	if (amdgpu_asic_need_full_reset(adev))
4719 		return true;
4720 
4721 	for (i = 0; i < adev->num_ip_blocks; i++) {
4722 		if (!adev->ip_blocks[i].status.valid)
4723 			continue;
4724 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4725 			adev->ip_blocks[i].status.hang =
4726 				adev->ip_blocks[i].version->funcs->check_soft_reset(
4727 					&adev->ip_blocks[i]);
4728 		if (adev->ip_blocks[i].status.hang) {
4729 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4730 			asic_hang = true;
4731 		}
4732 	}
4733 	return asic_hang;
4734 }
4735 
4736 /**
4737  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4738  *
4739  * @adev: amdgpu_device pointer
4740  *
4741  * The list of all the hardware IPs that make up the asic is walked and the
4742  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4743  * handles any IP specific hardware or software state changes that are
4744  * necessary for a soft reset to succeed.
4745  * Returns 0 on success, negative error code on failure.
4746  */
4747 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4748 {
4749 	int i, r = 0;
4750 
4751 	for (i = 0; i < adev->num_ip_blocks; i++) {
4752 		if (!adev->ip_blocks[i].status.valid)
4753 			continue;
4754 		if (adev->ip_blocks[i].status.hang &&
4755 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4756 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
4757 			if (r)
4758 				return r;
4759 		}
4760 	}
4761 
4762 	return 0;
4763 }
4764 
4765 /**
4766  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4767  *
4768  * @adev: amdgpu_device pointer
4769  *
4770  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4771  * reset is necessary to recover.
4772  * Returns true if a full asic reset is required, false if not.
4773  */
4774 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4775 {
4776 	int i;
4777 
4778 	if (amdgpu_asic_need_full_reset(adev))
4779 		return true;
4780 
4781 	for (i = 0; i < adev->num_ip_blocks; i++) {
4782 		if (!adev->ip_blocks[i].status.valid)
4783 			continue;
4784 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4785 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4786 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4787 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4788 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4789 			if (adev->ip_blocks[i].status.hang) {
4790 				dev_info(adev->dev, "Some block need full reset!\n");
4791 				return true;
4792 			}
4793 		}
4794 	}
4795 	return false;
4796 }
4797 
4798 /**
4799  * amdgpu_device_ip_soft_reset - do a soft reset
4800  *
4801  * @adev: amdgpu_device pointer
4802  *
4803  * The list of all the hardware IPs that make up the asic is walked and the
4804  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4805  * IP specific hardware or software state changes that are necessary to soft
4806  * reset the IP.
4807  * Returns 0 on success, negative error code on failure.
4808  */
4809 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4810 {
4811 	int i, r = 0;
4812 
4813 	for (i = 0; i < adev->num_ip_blocks; i++) {
4814 		if (!adev->ip_blocks[i].status.valid)
4815 			continue;
4816 		if (adev->ip_blocks[i].status.hang &&
4817 		    adev->ip_blocks[i].version->funcs->soft_reset) {
4818 			r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
4819 			if (r)
4820 				return r;
4821 		}
4822 	}
4823 
4824 	return 0;
4825 }
4826 
4827 /**
4828  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4829  *
4830  * @adev: amdgpu_device pointer
4831  *
4832  * The list of all the hardware IPs that make up the asic is walked and the
4833  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4834  * handles any IP specific hardware or software state changes that are
4835  * necessary after the IP has been soft reset.
4836  * Returns 0 on success, negative error code on failure.
4837  */
4838 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4839 {
4840 	int i, r = 0;
4841 
4842 	for (i = 0; i < adev->num_ip_blocks; i++) {
4843 		if (!adev->ip_blocks[i].status.valid)
4844 			continue;
4845 		if (adev->ip_blocks[i].status.hang &&
4846 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
4847 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]);
4848 		if (r)
4849 			return r;
4850 	}
4851 
4852 	return 0;
4853 }
4854 
4855 /**
4856  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4857  *
4858  * @adev: amdgpu_device pointer
4859  * @reset_context: amdgpu reset context pointer
4860  *
4861  * do VF FLR and reinitialize Asic
4862  * return 0 means succeeded otherwise failed
4863  */
4864 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4865 				     struct amdgpu_reset_context *reset_context)
4866 {
4867 	int r;
4868 	struct amdgpu_hive_info *hive = NULL;
4869 
4870 	if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
4871 		if (!amdgpu_ras_get_fed_status(adev))
4872 			amdgpu_virt_ready_to_reset(adev);
4873 		amdgpu_virt_wait_reset(adev);
4874 		clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
4875 		r = amdgpu_virt_request_full_gpu(adev, true);
4876 	} else {
4877 		r = amdgpu_virt_reset_gpu(adev);
4878 	}
4879 	if (r)
4880 		return r;
4881 
4882 	amdgpu_ras_clear_err_state(adev);
4883 	amdgpu_irq_gpu_reset_resume_helper(adev);
4884 
4885 	/* some sw clean up VF needs to do before recover */
4886 	amdgpu_virt_post_reset(adev);
4887 
4888 	/* Resume IP prior to SMC */
4889 	r = amdgpu_device_ip_reinit_early_sriov(adev);
4890 	if (r)
4891 		return r;
4892 
4893 	amdgpu_virt_init_data_exchange(adev);
4894 
4895 	r = amdgpu_device_fw_loading(adev);
4896 	if (r)
4897 		return r;
4898 
4899 	/* now we are okay to resume SMC/CP/SDMA */
4900 	r = amdgpu_device_ip_reinit_late_sriov(adev);
4901 	if (r)
4902 		return r;
4903 
4904 	hive = amdgpu_get_xgmi_hive(adev);
4905 	/* Update PSP FW topology after reset */
4906 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4907 		r = amdgpu_xgmi_update_topology(hive, adev);
4908 	if (hive)
4909 		amdgpu_put_xgmi_hive(hive);
4910 	if (r)
4911 		return r;
4912 
4913 	r = amdgpu_ib_ring_tests(adev);
4914 	if (r)
4915 		return r;
4916 
4917 	if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
4918 		amdgpu_inc_vram_lost(adev);
4919 
4920 	/* need to be called during full access so we can't do it later like
4921 	 * bare-metal does.
4922 	 */
4923 	amdgpu_amdkfd_post_reset(adev);
4924 	amdgpu_virt_release_full_gpu(adev, true);
4925 
4926 	/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4927 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
4928 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
4929 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
4930 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
4931 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
4932 		amdgpu_ras_resume(adev);
4933 
4934 	amdgpu_virt_ras_telemetry_post_reset(adev);
4935 
4936 	return 0;
4937 }
4938 
4939 /**
4940  * amdgpu_device_has_job_running - check if there is any unfinished job
4941  *
4942  * @adev: amdgpu_device pointer
4943  *
4944  * check if there is any job running on the device when guest driver receives
4945  * FLR notification from host driver. If there are still jobs running, then
4946  * the guest driver will not respond the FLR reset. Instead, let the job hit
4947  * the timeout and guest driver then issue the reset request.
4948  */
4949 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4950 {
4951 	int i;
4952 
4953 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4954 		struct amdgpu_ring *ring = adev->rings[i];
4955 
4956 		if (!amdgpu_ring_sched_ready(ring))
4957 			continue;
4958 
4959 		if (amdgpu_fence_count_emitted(ring))
4960 			return true;
4961 	}
4962 	return false;
4963 }
4964 
4965 /**
4966  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4967  *
4968  * @adev: amdgpu_device pointer
4969  *
4970  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4971  * a hung GPU.
4972  */
4973 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4974 {
4975 
4976 	if (amdgpu_gpu_recovery == 0)
4977 		goto disabled;
4978 
4979 	/* Skip soft reset check in fatal error mode */
4980 	if (!amdgpu_ras_is_poison_mode_supported(adev))
4981 		return true;
4982 
4983 	if (amdgpu_sriov_vf(adev))
4984 		return true;
4985 
4986 	if (amdgpu_gpu_recovery == -1) {
4987 		switch (adev->asic_type) {
4988 #ifdef CONFIG_DRM_AMDGPU_SI
4989 		case CHIP_VERDE:
4990 		case CHIP_TAHITI:
4991 		case CHIP_PITCAIRN:
4992 		case CHIP_OLAND:
4993 		case CHIP_HAINAN:
4994 #endif
4995 #ifdef CONFIG_DRM_AMDGPU_CIK
4996 		case CHIP_KAVERI:
4997 		case CHIP_KABINI:
4998 		case CHIP_MULLINS:
4999 #endif
5000 		case CHIP_CARRIZO:
5001 		case CHIP_STONEY:
5002 		case CHIP_CYAN_SKILLFISH:
5003 			goto disabled;
5004 		default:
5005 			break;
5006 		}
5007 	}
5008 
5009 	return true;
5010 
5011 disabled:
5012 		dev_info(adev->dev, "GPU recovery disabled.\n");
5013 		return false;
5014 }
5015 
5016 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5017 {
5018 	u32 i;
5019 	int ret = 0;
5020 
5021 	if (adev->bios)
5022 		amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5023 
5024 	dev_info(adev->dev, "GPU mode1 reset\n");
5025 
5026 	/* Cache the state before bus master disable. The saved config space
5027 	 * values are used in other cases like restore after mode-2 reset.
5028 	 */
5029 	amdgpu_device_cache_pci_state(adev->pdev);
5030 
5031 	/* disable BM */
5032 	pci_clear_master(adev->pdev);
5033 
5034 	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5035 		dev_info(adev->dev, "GPU smu mode1 reset\n");
5036 		ret = amdgpu_dpm_mode1_reset(adev);
5037 	} else {
5038 		dev_info(adev->dev, "GPU psp mode1 reset\n");
5039 		ret = psp_gpu_reset(adev);
5040 	}
5041 
5042 	if (ret)
5043 		goto mode1_reset_failed;
5044 
5045 	/* enable mmio access after mode 1 reset completed */
5046 	adev->no_hw_access = false;
5047 
5048 	/* ensure no_hw_access is updated before we access hw */
5049 	smp_mb();
5050 
5051 	amdgpu_device_load_pci_state(adev->pdev);
5052 	ret = amdgpu_psp_wait_for_bootloader(adev);
5053 	if (ret)
5054 		goto mode1_reset_failed;
5055 
5056 	/* wait for asic to come out of reset */
5057 	for (i = 0; i < adev->usec_timeout; i++) {
5058 		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5059 
5060 		if (memsize != 0xffffffff)
5061 			break;
5062 		udelay(1);
5063 	}
5064 
5065 	if (i >= adev->usec_timeout) {
5066 		ret = -ETIMEDOUT;
5067 		goto mode1_reset_failed;
5068 	}
5069 
5070 	if (adev->bios)
5071 		amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5072 
5073 	return 0;
5074 
5075 mode1_reset_failed:
5076 	dev_err(adev->dev, "GPU mode1 reset failed\n");
5077 	return ret;
5078 }
5079 
5080 int amdgpu_device_link_reset(struct amdgpu_device *adev)
5081 {
5082 	int ret = 0;
5083 
5084 	dev_info(adev->dev, "GPU link reset\n");
5085 
5086 	if (!amdgpu_reset_in_dpc(adev))
5087 		ret = amdgpu_dpm_link_reset(adev);
5088 
5089 	if (ret)
5090 		goto link_reset_failed;
5091 
5092 	ret = amdgpu_psp_wait_for_bootloader(adev);
5093 	if (ret)
5094 		goto link_reset_failed;
5095 
5096 	return 0;
5097 
5098 link_reset_failed:
5099 	dev_err(adev->dev, "GPU link reset failed\n");
5100 	return ret;
5101 }
5102 
5103 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5104 				 struct amdgpu_reset_context *reset_context)
5105 {
5106 	int i, r = 0;
5107 	struct amdgpu_job *job = NULL;
5108 	struct dma_fence *fence = NULL;
5109 	struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
5110 	bool need_full_reset =
5111 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5112 
5113 	if (reset_context->reset_req_dev == adev)
5114 		job = reset_context->job;
5115 
5116 	if (amdgpu_sriov_vf(adev))
5117 		amdgpu_virt_pre_reset(adev);
5118 
5119 	amdgpu_fence_driver_isr_toggle(adev, true);
5120 
5121 	if (job)
5122 		fence = &job->hw_fence->base;
5123 
5124 	/* block all schedulers and reset given job's ring */
5125 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5126 		struct amdgpu_ring *ring = adev->rings[i];
5127 
5128 		if (!amdgpu_ring_sched_ready(ring))
5129 			continue;
5130 
5131 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5132 		amdgpu_fence_driver_force_completion(ring, fence);
5133 	}
5134 
5135 	amdgpu_fence_driver_isr_toggle(adev, false);
5136 
5137 	if (job && job->vm)
5138 		drm_sched_increase_karma(&job->base);
5139 
5140 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5141 	/* If reset handler not implemented, continue; otherwise return */
5142 	if (r == -EOPNOTSUPP)
5143 		r = 0;
5144 	else
5145 		return r;
5146 
5147 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5148 	if (!amdgpu_sriov_vf(adev)) {
5149 
5150 		if (!need_full_reset)
5151 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5152 
5153 		if (!need_full_reset && amdgpu_gpu_recovery &&
5154 		    amdgpu_device_ip_check_soft_reset(adev)) {
5155 			amdgpu_device_ip_pre_soft_reset(adev);
5156 			r = amdgpu_device_ip_soft_reset(adev);
5157 			amdgpu_device_ip_post_soft_reset(adev);
5158 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5159 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5160 				need_full_reset = true;
5161 			}
5162 		}
5163 
5164 		if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5165 			dev_info(tmp_adev->dev, "Dumping IP State\n");
5166 			/* Trigger ip dump before we reset the asic */
5167 			for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5168 				if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5169 					tmp_adev->ip_blocks[i].version->funcs
5170 						->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
5171 			dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
5172 		}
5173 
5174 		if (need_full_reset)
5175 			r = amdgpu_device_ip_suspend(adev);
5176 		if (need_full_reset)
5177 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5178 		else
5179 			clear_bit(AMDGPU_NEED_FULL_RESET,
5180 				  &reset_context->flags);
5181 	}
5182 
5183 	return r;
5184 }
5185 
5186 int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
5187 {
5188 	struct list_head *device_list_handle;
5189 	bool full_reset, vram_lost = false;
5190 	struct amdgpu_device *tmp_adev;
5191 	int r, init_level;
5192 
5193 	device_list_handle = reset_context->reset_device_list;
5194 
5195 	if (!device_list_handle)
5196 		return -EINVAL;
5197 
5198 	full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5199 
5200 	/**
5201 	 * If it's reset on init, it's default init level, otherwise keep level
5202 	 * as recovery level.
5203 	 */
5204 	if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
5205 			init_level = AMDGPU_INIT_LEVEL_DEFAULT;
5206 	else
5207 			init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
5208 
5209 	r = 0;
5210 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5211 		amdgpu_set_init_level(tmp_adev, init_level);
5212 		if (full_reset) {
5213 			/* post card */
5214 			amdgpu_reset_set_dpc_status(tmp_adev, false);
5215 			amdgpu_ras_clear_err_state(tmp_adev);
5216 			r = amdgpu_device_asic_init(tmp_adev);
5217 			if (r) {
5218 				dev_warn(tmp_adev->dev, "asic atom init failed!");
5219 			} else {
5220 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5221 
5222 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5223 				if (r)
5224 					goto out;
5225 
5226 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5227 
5228 				if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5229 					amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);
5230 
5231 				if (vram_lost) {
5232 					dev_info(
5233 						tmp_adev->dev,
5234 						"VRAM is lost due to GPU reset!\n");
5235 					amdgpu_inc_vram_lost(tmp_adev);
5236 				}
5237 
5238 				r = amdgpu_device_fw_loading(tmp_adev);
5239 				if (r)
5240 					return r;
5241 
5242 				r = amdgpu_xcp_restore_partition_mode(
5243 					tmp_adev->xcp_mgr);
5244 				if (r)
5245 					goto out;
5246 
5247 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5248 				if (r)
5249 					goto out;
5250 
5251 				amdgpu_ttm_enable_buffer_funcs(tmp_adev);
5252 
5253 				r = amdgpu_device_ip_resume_phase3(tmp_adev);
5254 				if (r)
5255 					goto out;
5256 
5257 				if (vram_lost)
5258 					amdgpu_device_fill_reset_magic(tmp_adev);
5259 
5260 				/*
5261 				 * Add this ASIC as tracked as reset was already
5262 				 * complete successfully.
5263 				 */
5264 				amdgpu_register_gpu_instance(tmp_adev);
5265 
5266 				if (!reset_context->hive &&
5267 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5268 					amdgpu_xgmi_add_device(tmp_adev);
5269 
5270 				r = amdgpu_device_ip_late_init(tmp_adev);
5271 				if (r)
5272 					goto out;
5273 
5274 				r = amdgpu_userq_post_reset(tmp_adev, vram_lost);
5275 				if (r)
5276 					goto out;
5277 
5278 				drm_client_dev_resume(adev_to_drm(tmp_adev));
5279 
5280 				/*
5281 				 * The GPU enters bad state once faulty pages
5282 				 * by ECC has reached the threshold, and ras
5283 				 * recovery is scheduled next. So add one check
5284 				 * here to break recovery if it indeed exceeds
5285 				 * bad page threshold, and remind user to
5286 				 * retire this GPU or setting one bigger
5287 				 * bad_page_threshold value to fix this once
5288 				 * probing driver again.
5289 				 */
5290 				if (!amdgpu_ras_is_rma(tmp_adev)) {
5291 					/* must succeed. */
5292 					amdgpu_ras_resume(tmp_adev);
5293 				} else {
5294 					r = -EINVAL;
5295 					goto out;
5296 				}
5297 
5298 				/* Update PSP FW topology after reset */
5299 				if (reset_context->hive &&
5300 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5301 					r = amdgpu_xgmi_update_topology(
5302 						reset_context->hive, tmp_adev);
5303 			}
5304 		}
5305 
5306 out:
5307 		if (!r) {
5308 			/* IP init is complete now, set level as default */
5309 			amdgpu_set_init_level(tmp_adev,
5310 					      AMDGPU_INIT_LEVEL_DEFAULT);
5311 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5312 			r = amdgpu_ib_ring_tests(tmp_adev);
5313 			if (r) {
5314 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5315 				r = -EAGAIN;
5316 				goto end;
5317 			}
5318 		}
5319 
5320 		if (r)
5321 			tmp_adev->asic_reset_res = r;
5322 	}
5323 
5324 end:
5325 	return r;
5326 }
5327 
5328 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5329 			 struct amdgpu_reset_context *reset_context)
5330 {
5331 	struct amdgpu_device *tmp_adev = NULL;
5332 	bool need_full_reset, skip_hw_reset;
5333 	int r = 0;
5334 
5335 	/* Try reset handler method first */
5336 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5337 				    reset_list);
5338 
5339 	reset_context->reset_device_list = device_list_handle;
5340 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5341 	/* If reset handler not implemented, continue; otherwise return */
5342 	if (r == -EOPNOTSUPP)
5343 		r = 0;
5344 	else
5345 		return r;
5346 
5347 	/* Reset handler not implemented, use the default method */
5348 	need_full_reset =
5349 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5350 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5351 
5352 	/*
5353 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5354 	 * to allow proper links negotiation in FW (within 1 sec)
5355 	 */
5356 	if (!skip_hw_reset && need_full_reset) {
5357 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5358 			/* For XGMI run all resets in parallel to speed up the process */
5359 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5360 				if (!queue_work(system_dfl_wq,
5361 						&tmp_adev->xgmi_reset_work))
5362 					r = -EALREADY;
5363 			} else
5364 				r = amdgpu_asic_reset(tmp_adev);
5365 
5366 			if (r) {
5367 				dev_err(tmp_adev->dev,
5368 					"ASIC reset failed with error, %d for drm dev, %s",
5369 					r, adev_to_drm(tmp_adev)->unique);
5370 				goto out;
5371 			}
5372 		}
5373 
5374 		/* For XGMI wait for all resets to complete before proceed */
5375 		if (!r) {
5376 			list_for_each_entry(tmp_adev, device_list_handle,
5377 					    reset_list) {
5378 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5379 					flush_work(&tmp_adev->xgmi_reset_work);
5380 					r = tmp_adev->asic_reset_res;
5381 					if (r)
5382 						break;
5383 				}
5384 			}
5385 		}
5386 	}
5387 
5388 	if (!r && amdgpu_ras_intr_triggered()) {
5389 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5390 			amdgpu_ras_reset_error_count(tmp_adev,
5391 						     AMDGPU_RAS_BLOCK__MMHUB);
5392 		}
5393 
5394 		amdgpu_ras_intr_cleared();
5395 	}
5396 
5397 	r = amdgpu_device_reinit_after_reset(reset_context);
5398 	if (r == -EAGAIN)
5399 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5400 	else
5401 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5402 
5403 out:
5404 	return r;
5405 }
5406 
5407 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5408 {
5409 
5410 	switch (amdgpu_asic_reset_method(adev)) {
5411 	case AMD_RESET_METHOD_MODE1:
5412 	case AMD_RESET_METHOD_LINK:
5413 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5414 		break;
5415 	case AMD_RESET_METHOD_MODE2:
5416 		adev->mp1_state = PP_MP1_STATE_RESET;
5417 		break;
5418 	default:
5419 		adev->mp1_state = PP_MP1_STATE_NONE;
5420 		break;
5421 	}
5422 }
5423 
5424 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5425 {
5426 	amdgpu_vf_error_trans_all(adev);
5427 	adev->mp1_state = PP_MP1_STATE_NONE;
5428 }
5429 
5430 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5431 {
5432 	struct pci_dev *p = NULL;
5433 
5434 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5435 			adev->pdev->bus->number, 1);
5436 	if (p) {
5437 		pm_runtime_enable(&(p->dev));
5438 		pm_runtime_resume(&(p->dev));
5439 	}
5440 
5441 	pci_dev_put(p);
5442 }
5443 
5444 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5445 {
5446 	enum amd_reset_method reset_method;
5447 	struct pci_dev *p = NULL;
5448 	u64 expires;
5449 
5450 	/*
5451 	 * For now, only BACO and mode1 reset are confirmed
5452 	 * to suffer the audio issue without proper suspended.
5453 	 */
5454 	reset_method = amdgpu_asic_reset_method(adev);
5455 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5456 	     (reset_method != AMD_RESET_METHOD_MODE1))
5457 		return -EINVAL;
5458 
5459 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5460 			adev->pdev->bus->number, 1);
5461 	if (!p)
5462 		return -ENODEV;
5463 
5464 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5465 	if (!expires)
5466 		/*
5467 		 * If we cannot get the audio device autosuspend delay,
5468 		 * a fixed 4S interval will be used. Considering 3S is
5469 		 * the audio controller default autosuspend delay setting.
5470 		 * 4S used here is guaranteed to cover that.
5471 		 */
5472 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5473 
5474 	while (!pm_runtime_status_suspended(&(p->dev))) {
5475 		if (!pm_runtime_suspend(&(p->dev)))
5476 			break;
5477 
5478 		if (expires < ktime_get_mono_fast_ns()) {
5479 			dev_warn(adev->dev, "failed to suspend display audio\n");
5480 			pci_dev_put(p);
5481 			/* TODO: abort the succeeding gpu reset? */
5482 			return -ETIMEDOUT;
5483 		}
5484 	}
5485 
5486 	pm_runtime_disable(&(p->dev));
5487 
5488 	pci_dev_put(p);
5489 	return 0;
5490 }
5491 
5492 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5493 {
5494 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5495 
5496 #if defined(CONFIG_DEBUG_FS)
5497 	if (!amdgpu_sriov_vf(adev))
5498 		cancel_work(&adev->reset_work);
5499 #endif
5500 	amdgpu_userq_mgr_cancel_reset_work(adev);
5501 
5502 	if (adev->kfd.dev)
5503 		cancel_work(&adev->kfd.reset_work);
5504 
5505 	if (amdgpu_sriov_vf(adev))
5506 		cancel_work(&adev->virt.flr_work);
5507 
5508 	if (con && adev->ras_enabled)
5509 		cancel_work(&con->recovery_work);
5510 
5511 }
5512 
5513 static int amdgpu_device_health_check(struct list_head *device_list_handle)
5514 {
5515 	struct amdgpu_device *tmp_adev;
5516 	int ret = 0;
5517 
5518 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5519 		ret |= amdgpu_device_bus_status_check(tmp_adev);
5520 	}
5521 
5522 	return ret;
5523 }
5524 
5525 static void amdgpu_device_recovery_prepare(struct amdgpu_device *adev,
5526 					  struct list_head *device_list,
5527 					  struct amdgpu_hive_info *hive)
5528 {
5529 	struct amdgpu_device *tmp_adev = NULL;
5530 
5531 	/*
5532 	 * Build list of devices to reset.
5533 	 * In case we are in XGMI hive mode, resort the device list
5534 	 * to put adev in the 1st position.
5535 	 */
5536 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
5537 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5538 			list_add_tail(&tmp_adev->reset_list, device_list);
5539 			if (adev->shutdown)
5540 				tmp_adev->shutdown = true;
5541 		}
5542 		if (!list_is_first(&adev->reset_list, device_list))
5543 			list_rotate_to_front(&adev->reset_list, device_list);
5544 	} else {
5545 		list_add_tail(&adev->reset_list, device_list);
5546 	}
5547 }
5548 
5549 static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev,
5550 						  struct list_head *device_list)
5551 {
5552 	struct amdgpu_device *tmp_adev = NULL;
5553 
5554 	if (list_empty(device_list))
5555 		return;
5556 	tmp_adev =
5557 		list_first_entry(device_list, struct amdgpu_device, reset_list);
5558 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5559 }
5560 
5561 static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev,
5562 						  struct list_head *device_list)
5563 {
5564 	struct amdgpu_device *tmp_adev = NULL;
5565 
5566 	if (list_empty(device_list))
5567 		return;
5568 	tmp_adev =
5569 		list_first_entry(device_list, struct amdgpu_device, reset_list);
5570 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5571 }
5572 
5573 static void amdgpu_device_halt_activities(struct amdgpu_device *adev,
5574 					  struct amdgpu_job *job,
5575 					  struct amdgpu_reset_context *reset_context,
5576 					  struct list_head *device_list,
5577 					  struct amdgpu_hive_info *hive,
5578 					  bool need_emergency_restart)
5579 {
5580 	struct amdgpu_device *tmp_adev = NULL;
5581 	int i;
5582 
5583 	/* block all schedulers and reset given job's ring */
5584 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5585 		amdgpu_device_set_mp1_state(tmp_adev);
5586 
5587 		/*
5588 		 * Try to put the audio codec into suspend state
5589 		 * before gpu reset started.
5590 		 *
5591 		 * Due to the power domain of the graphics device
5592 		 * is shared with AZ power domain. Without this,
5593 		 * we may change the audio hardware from behind
5594 		 * the audio driver's back. That will trigger
5595 		 * some audio codec errors.
5596 		 */
5597 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5598 			tmp_adev->pcie_reset_ctx.audio_suspended = true;
5599 
5600 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5601 
5602 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5603 
5604 		amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
5605 
5606 		/*
5607 		 * Mark these ASICs to be reset as untracked first
5608 		 * And add them back after reset completed
5609 		 */
5610 		amdgpu_unregister_gpu_instance(tmp_adev);
5611 
5612 		drm_client_dev_suspend(adev_to_drm(tmp_adev));
5613 
5614 		/* disable ras on ALL IPs */
5615 		if (!need_emergency_restart && !amdgpu_reset_in_dpc(adev) &&
5616 		    amdgpu_device_ip_need_full_reset(tmp_adev))
5617 			amdgpu_ras_suspend(tmp_adev);
5618 
5619 		amdgpu_userq_pre_reset(tmp_adev);
5620 
5621 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5622 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5623 
5624 			if (!amdgpu_ring_sched_ready(ring))
5625 				continue;
5626 
5627 			drm_sched_wqueue_stop(&ring->sched);
5628 
5629 			if (need_emergency_restart)
5630 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5631 		}
5632 		atomic_inc(&tmp_adev->gpu_reset_counter);
5633 	}
5634 }
5635 
5636 static int amdgpu_device_asic_reset(struct amdgpu_device *adev,
5637 			      struct list_head *device_list,
5638 			      struct amdgpu_reset_context *reset_context)
5639 {
5640 	struct amdgpu_device *tmp_adev = NULL;
5641 	int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
5642 	int r = 0;
5643 
5644 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5645 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5646 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5647 		/*TODO Should we stop ?*/
5648 		if (r) {
5649 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5650 				  r, adev_to_drm(tmp_adev)->unique);
5651 			tmp_adev->asic_reset_res = r;
5652 		}
5653 	}
5654 
5655 	/* Actual ASIC resets if needed.*/
5656 	/* Host driver will handle XGMI hive reset for SRIOV */
5657 	if (amdgpu_sriov_vf(adev)) {
5658 
5659 		/* Bail out of reset early */
5660 		if (amdgpu_ras_is_rma(adev))
5661 			return -ENODEV;
5662 
5663 		if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
5664 			dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
5665 			amdgpu_ras_set_fed(adev, true);
5666 			set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5667 		}
5668 
5669 		r = amdgpu_device_reset_sriov(adev, reset_context);
5670 		if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
5671 			amdgpu_virt_release_full_gpu(adev, true);
5672 			goto retry;
5673 		}
5674 		if (r)
5675 			adev->asic_reset_res = r;
5676 	} else {
5677 		r = amdgpu_do_asic_reset(device_list, reset_context);
5678 		if (r && r == -EAGAIN)
5679 			goto retry;
5680 	}
5681 
5682 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5683 		/*
5684 		 * Drop any pending non scheduler resets queued before reset is done.
5685 		 * Any reset scheduled after this point would be valid. Scheduler resets
5686 		 * were already dropped during drm_sched_stop and no new ones can come
5687 		 * in before drm_sched_start.
5688 		 */
5689 		amdgpu_device_stop_pending_resets(tmp_adev);
5690 	}
5691 
5692 	return r;
5693 }
5694 
5695 static int amdgpu_device_sched_resume(struct list_head *device_list,
5696 			      struct amdgpu_reset_context *reset_context,
5697 			      bool   job_signaled)
5698 {
5699 	struct amdgpu_device *tmp_adev = NULL;
5700 	int i, r = 0;
5701 
5702 	/* Post ASIC reset for all devs .*/
5703 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5704 
5705 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5706 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5707 
5708 			if (!amdgpu_ring_sched_ready(ring))
5709 				continue;
5710 
5711 			drm_sched_wqueue_start(&ring->sched);
5712 		}
5713 
5714 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5715 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5716 
5717 		if (tmp_adev->asic_reset_res) {
5718 			/* bad news, how to tell it to userspace ?
5719 			 * for ras error, we should report GPU bad status instead of
5720 			 * reset failure
5721 			 */
5722 			if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
5723 			    !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
5724 				dev_info(
5725 					tmp_adev->dev,
5726 					"GPU reset(%d) failed with error %d\n",
5727 					atomic_read(
5728 						&tmp_adev->gpu_reset_counter),
5729 					tmp_adev->asic_reset_res);
5730 			amdgpu_vf_error_put(tmp_adev,
5731 					    AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0,
5732 					    tmp_adev->asic_reset_res);
5733 			if (!r)
5734 				r = tmp_adev->asic_reset_res;
5735 			tmp_adev->asic_reset_res = 0;
5736 		} else {
5737 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n",
5738 				 atomic_read(&tmp_adev->gpu_reset_counter));
5739 			if (amdgpu_acpi_smart_shift_update(tmp_adev,
5740 							   AMDGPU_SS_DEV_D0))
5741 				dev_warn(tmp_adev->dev,
5742 					 "smart shift update failed\n");
5743 		}
5744 	}
5745 
5746 	return r;
5747 }
5748 
5749 static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
5750 			      struct list_head *device_list,
5751 			      bool   need_emergency_restart)
5752 {
5753 	struct amdgpu_device *tmp_adev = NULL;
5754 
5755 	list_for_each_entry(tmp_adev, device_list, reset_list) {
5756 		/* unlock kfd: SRIOV would do it separately */
5757 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5758 			amdgpu_amdkfd_post_reset(tmp_adev);
5759 
5760 		/* kfd_post_reset will do nothing if kfd device is not initialized,
5761 		 * need to bring up kfd here if it's not be initialized before
5762 		 */
5763 		if (!adev->kfd.init_complete)
5764 			amdgpu_amdkfd_device_init(adev);
5765 
5766 		if (tmp_adev->pcie_reset_ctx.audio_suspended)
5767 			amdgpu_device_resume_display_audio(tmp_adev);
5768 
5769 		amdgpu_device_unset_mp1_state(tmp_adev);
5770 
5771 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
5772 
5773 	}
5774 }
5775 
5776 
5777 /**
5778  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5779  *
5780  * @adev: amdgpu_device pointer
5781  * @job: which job trigger hang
5782  * @reset_context: amdgpu reset context pointer
5783  *
5784  * Attempt to reset the GPU if it has hung (all asics).
5785  * Attempt to do soft-reset or full-reset and reinitialize Asic
5786  * Returns 0 for success or an error on failure.
5787  */
5788 
5789 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5790 			      struct amdgpu_job *job,
5791 			      struct amdgpu_reset_context *reset_context)
5792 {
5793 	struct list_head device_list;
5794 	bool job_signaled = false;
5795 	struct amdgpu_hive_info *hive = NULL;
5796 	int r = 0;
5797 	bool need_emergency_restart = false;
5798 	/* save the pasid here as the job may be freed before the end of the reset */
5799 	int pasid = job ? job->pasid : -EINVAL;
5800 
5801 	/*
5802 	 * If it reaches here because of hang/timeout and a RAS error is
5803 	 * detected at the same time, let RAS recovery take care of it.
5804 	 */
5805 	if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
5806 	    !amdgpu_sriov_vf(adev) &&
5807 	    reset_context->src != AMDGPU_RESET_SRC_RAS) {
5808 		dev_dbg(adev->dev,
5809 			"Gpu recovery from source: %d yielding to RAS error recovery handling",
5810 			reset_context->src);
5811 		return 0;
5812 	}
5813 
5814 	/*
5815 	 * Special case: RAS triggered and full reset isn't supported
5816 	 */
5817 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5818 
5819 	/*
5820 	 * Flush RAM to disk so that after reboot
5821 	 * the user can read log and see why the system rebooted.
5822 	 */
5823 	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5824 		amdgpu_ras_get_context(adev)->reboot) {
5825 		dev_warn(adev->dev, "Emergency reboot.");
5826 
5827 		ksys_sync_helper();
5828 		emergency_restart();
5829 	}
5830 
5831 	dev_info(adev->dev, "GPU %s begin!. Source:  %d\n",
5832 		 need_emergency_restart ? "jobs stop" : "reset",
5833 		 reset_context->src);
5834 
5835 	if (!amdgpu_sriov_vf(adev))
5836 		hive = amdgpu_get_xgmi_hive(adev);
5837 	if (hive)
5838 		mutex_lock(&hive->hive_lock);
5839 
5840 	reset_context->job = job;
5841 	reset_context->hive = hive;
5842 	INIT_LIST_HEAD(&device_list);
5843 
5844 	amdgpu_device_recovery_prepare(adev, &device_list, hive);
5845 
5846 	if (!amdgpu_sriov_vf(adev)) {
5847 		r = amdgpu_device_health_check(&device_list);
5848 		if (r)
5849 			goto end_reset;
5850 	}
5851 
5852 	/* Cannot be called after locking reset domain */
5853 	amdgpu_ras_pre_reset(adev, &device_list);
5854 
5855 	/* We need to lock reset domain only once both for XGMI and single device */
5856 	amdgpu_device_recovery_get_reset_lock(adev, &device_list);
5857 
5858 	/* unmap all the mappings of doorbell and framebuffer to prevent user space from
5859 	 * accessing them
5860 	 */
5861 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
5862 	amdgpu_amdkfd_clear_kfd_mapping(adev);
5863 
5864 	amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
5865 				      hive, need_emergency_restart);
5866 	if (need_emergency_restart)
5867 		goto skip_sched_resume;
5868 	/*
5869 	 * Must check guilty signal here since after this point all old
5870 	 * HW fences are force signaled.
5871 	 *
5872 	 * job->base holds a reference to parent fence
5873 	 */
5874 	if (job && (dma_fence_get_status(&job->hw_fence->base) > 0)) {
5875 		job_signaled = true;
5876 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5877 		goto skip_hw_reset;
5878 	}
5879 
5880 	r = amdgpu_device_asic_reset(adev, &device_list, reset_context);
5881 	if (r)
5882 		goto reset_unlock;
5883 skip_hw_reset:
5884 	r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled);
5885 	if (r)
5886 		goto reset_unlock;
5887 skip_sched_resume:
5888 	amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart);
5889 reset_unlock:
5890 	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
5891 	amdgpu_ras_post_reset(adev, &device_list);
5892 end_reset:
5893 	if (hive) {
5894 		mutex_unlock(&hive->hive_lock);
5895 		amdgpu_put_xgmi_hive(hive);
5896 	}
5897 
5898 	if (r)
5899 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5900 
5901 	atomic_set(&adev->reset_domain->reset_res, r);
5902 
5903 	if (!r) {
5904 		struct amdgpu_task_info *ti = NULL;
5905 
5906 		/*
5907 		 * The job may already be freed at this point via the sched tdr workqueue so
5908 		 * use the cached pasid.
5909 		 */
5910 		if (pasid >= 0)
5911 			ti = amdgpu_vm_get_task_info_pasid(adev, pasid);
5912 
5913 		drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE,
5914 				     ti ? &ti->task : NULL);
5915 
5916 		amdgpu_vm_put_task_info(ti);
5917 	}
5918 
5919 	return r;
5920 }
5921 
5922 /**
5923  * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5924  *
5925  * @adev: amdgpu_device pointer
5926  * @speed: pointer to the speed of the link
5927  * @width: pointer to the width of the link
5928  *
5929  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5930  * first physical partner to an AMD dGPU.
5931  * This will exclude any virtual switches and links.
5932  */
5933 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5934 					    enum pci_bus_speed *speed,
5935 					    enum pcie_link_width *width)
5936 {
5937 	if (!speed || !width)
5938 		return;
5939 
5940 	*speed = PCI_SPEED_UNKNOWN;
5941 	*width = PCIE_LNK_WIDTH_UNKNOWN;
5942 
5943 	if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
5944 		struct pci_dev *parent = amdgpu_device_find_parent(adev);
5945 
5946 		if (parent) {
5947 			*speed = pcie_get_speed_cap(parent);
5948 			*width = pcie_get_width_cap(parent);
5949 		}
5950 	} else {
5951 		/* use the current speeds rather than max if switching is not supported */
5952 		pcie_bandwidth_available(adev->pdev, NULL, speed, width);
5953 	}
5954 }
5955 
5956 /**
5957  * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU
5958  *
5959  * @adev: amdgpu_device pointer
5960  * @speed: pointer to the speed of the link
5961  * @width: pointer to the width of the link
5962  *
5963  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5964  * AMD dGPU which may be a virtual upstream bridge.
5965  */
5966 static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev,
5967 					enum pci_bus_speed *speed,
5968 					enum pcie_link_width *width)
5969 {
5970 	struct pci_dev *parent = adev->pdev;
5971 
5972 	if (!speed || !width)
5973 		return;
5974 
5975 	/* use the device itself */
5976 	*speed = pcie_get_speed_cap(adev->pdev);
5977 	*width = pcie_get_width_cap(adev->pdev);
5978 
5979 	/* use the link outside the device */
5980 	parent = amdgpu_device_find_parent(adev);
5981 	if (parent) {
5982 		*speed = pcie_get_speed_cap(parent);
5983 		*width = pcie_get_width_cap(parent);
5984 	}
5985 }
5986 
5987 /**
5988  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5989  *
5990  * @adev: amdgpu_device pointer
5991  *
5992  * Fetches and stores in the driver the PCIE capabilities (gen speed
5993  * and lanes) of the slot the device is in. Handles APUs and
5994  * virtualized environments where PCIE config space may not be available.
5995  */
5996 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5997 {
5998 	enum pci_bus_speed speed_cap, platform_speed_cap;
5999 	enum pcie_link_width platform_link_width, link_width;
6000 
6001 	if (amdgpu_pcie_gen_cap)
6002 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
6003 
6004 	if (amdgpu_pcie_lane_cap)
6005 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
6006 
6007 	/* covers APUs as well */
6008 	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
6009 		if (adev->pm.pcie_gen_mask == 0)
6010 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
6011 		if (adev->pm.pcie_mlw_mask == 0)
6012 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
6013 		return;
6014 	}
6015 
6016 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
6017 		return;
6018 
6019 	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
6020 					&platform_link_width);
6021 	amdgpu_device_gpu_bandwidth(adev, &speed_cap, &link_width);
6022 
6023 	if (adev->pm.pcie_gen_mask == 0) {
6024 		/* asic caps */
6025 		if (speed_cap == PCI_SPEED_UNKNOWN) {
6026 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6027 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6028 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6029 		} else {
6030 			if (speed_cap == PCIE_SPEED_32_0GT)
6031 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6032 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6033 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6034 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6035 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6036 			else if (speed_cap == PCIE_SPEED_16_0GT)
6037 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6038 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6039 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6040 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6041 			else if (speed_cap == PCIE_SPEED_8_0GT)
6042 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6043 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6044 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6045 			else if (speed_cap == PCIE_SPEED_5_0GT)
6046 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6047 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6048 			else
6049 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6050 		}
6051 		/* platform caps */
6052 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
6053 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6054 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6055 		} else {
6056 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
6057 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6058 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6059 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6060 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6061 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6062 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6063 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6064 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6065 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6066 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6067 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6068 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6069 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6070 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6071 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6072 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6073 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6074 			else
6075 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6076 
6077 		}
6078 	}
6079 	if (adev->pm.pcie_mlw_mask == 0) {
6080 		/* asic caps */
6081 		if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6082 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK;
6083 		} else {
6084 			switch (link_width) {
6085 			case PCIE_LNK_X32:
6086 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 |
6087 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6088 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6089 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6090 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6091 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6092 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6093 				break;
6094 			case PCIE_LNK_X16:
6095 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
6096 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6097 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6098 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6099 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6100 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6101 				break;
6102 			case PCIE_LNK_X12:
6103 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
6104 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6105 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6106 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6107 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6108 				break;
6109 			case PCIE_LNK_X8:
6110 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
6111 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6112 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6113 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6114 				break;
6115 			case PCIE_LNK_X4:
6116 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
6117 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6118 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6119 				break;
6120 			case PCIE_LNK_X2:
6121 				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
6122 							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6123 				break;
6124 			case PCIE_LNK_X1:
6125 				adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1;
6126 				break;
6127 			default:
6128 				break;
6129 			}
6130 		}
6131 		/* platform caps */
6132 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6133 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6134 		} else {
6135 			switch (platform_link_width) {
6136 			case PCIE_LNK_X32:
6137 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6138 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6139 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6140 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6141 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6142 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6143 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6144 				break;
6145 			case PCIE_LNK_X16:
6146 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6147 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6148 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6149 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6150 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6151 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6152 				break;
6153 			case PCIE_LNK_X12:
6154 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6155 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6156 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6157 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6158 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6159 				break;
6160 			case PCIE_LNK_X8:
6161 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6162 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6163 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6164 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6165 				break;
6166 			case PCIE_LNK_X4:
6167 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6168 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6169 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6170 				break;
6171 			case PCIE_LNK_X2:
6172 				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6173 							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6174 				break;
6175 			case PCIE_LNK_X1:
6176 				adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6177 				break;
6178 			default:
6179 				break;
6180 			}
6181 		}
6182 	}
6183 }
6184 
6185 /**
6186  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6187  *
6188  * @adev: amdgpu_device pointer
6189  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6190  *
6191  * Return true if @peer_adev can access (DMA) @adev through the PCIe
6192  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6193  * @peer_adev.
6194  */
6195 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6196 				      struct amdgpu_device *peer_adev)
6197 {
6198 #ifdef CONFIG_HSA_AMD_P2P
6199 	bool p2p_access =
6200 		!adev->gmc.xgmi.connected_to_cpu &&
6201 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6202 	if (!p2p_access)
6203 		dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n",
6204 			pci_name(peer_adev->pdev));
6205 
6206 	bool is_large_bar = adev->gmc.visible_vram_size &&
6207 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
6208 	bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev);
6209 
6210 	if (!p2p_addressable) {
6211 		uint64_t address_mask = peer_adev->dev->dma_mask ?
6212 			~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6213 		resource_size_t aper_limit =
6214 			adev->gmc.aper_base + adev->gmc.aper_size - 1;
6215 
6216 		p2p_addressable = !(adev->gmc.aper_base & address_mask ||
6217 				     aper_limit & address_mask);
6218 	}
6219 	return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
6220 #else
6221 	return false;
6222 #endif
6223 }
6224 
6225 int amdgpu_device_baco_enter(struct amdgpu_device *adev)
6226 {
6227 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6228 
6229 	if (!amdgpu_device_supports_baco(adev))
6230 		return -ENOTSUPP;
6231 
6232 	if (ras && adev->ras_enabled &&
6233 	    adev->nbio.funcs->enable_doorbell_interrupt)
6234 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6235 
6236 	return amdgpu_dpm_baco_enter(adev);
6237 }
6238 
6239 int amdgpu_device_baco_exit(struct amdgpu_device *adev)
6240 {
6241 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6242 	int ret = 0;
6243 
6244 	if (!amdgpu_device_supports_baco(adev))
6245 		return -ENOTSUPP;
6246 
6247 	ret = amdgpu_dpm_baco_exit(adev);
6248 	if (ret)
6249 		return ret;
6250 
6251 	if (ras && adev->ras_enabled &&
6252 	    adev->nbio.funcs->enable_doorbell_interrupt)
6253 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6254 
6255 	if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
6256 	    adev->nbio.funcs->clear_doorbell_interrupt)
6257 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
6258 
6259 	return 0;
6260 }
6261 
6262 /**
6263  * amdgpu_pci_error_detected - Called when a PCI error is detected.
6264  * @pdev: PCI device struct
6265  * @state: PCI channel state
6266  *
6267  * Description: Called when a PCI error is detected.
6268  *
6269  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6270  */
6271 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6272 {
6273 	struct drm_device *dev = pci_get_drvdata(pdev);
6274 	struct amdgpu_device *adev = drm_to_adev(dev);
6275 	struct amdgpu_hive_info *hive __free(xgmi_put_hive) =
6276 		amdgpu_get_xgmi_hive(adev);
6277 	struct amdgpu_reset_context reset_context;
6278 	struct list_head device_list;
6279 
6280 	dev_info(adev->dev, "PCI error: detected callback!!\n");
6281 
6282 	adev->pci_channel_state = state;
6283 
6284 	switch (state) {
6285 	case pci_channel_io_normal:
6286 		dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state);
6287 		return PCI_ERS_RESULT_CAN_RECOVER;
6288 	case pci_channel_io_frozen:
6289 		/* Fatal error, prepare for slot reset */
6290 		dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
6291 		if (hive) {
6292 			/* Hive devices should be able to support FW based
6293 			 * link reset on other devices, if not return.
6294 			 */
6295 			if (!amdgpu_dpm_is_link_reset_supported(adev)) {
6296 				dev_warn(adev->dev,
6297 					 "No support for XGMI hive yet...\n");
6298 				return PCI_ERS_RESULT_DISCONNECT;
6299 			}
6300 			/* Set dpc status only if device is part of hive
6301 			 * Non-hive devices should be able to recover after
6302 			 * link reset.
6303 			 */
6304 			amdgpu_reset_set_dpc_status(adev, true);
6305 
6306 			mutex_lock(&hive->hive_lock);
6307 		} else {
6308 			if (amdgpu_device_bus_status_check(adev))
6309 				amdgpu_reset_set_dpc_status(adev, true);
6310 		}
6311 		memset(&reset_context, 0, sizeof(reset_context));
6312 		INIT_LIST_HEAD(&device_list);
6313 
6314 		amdgpu_device_recovery_prepare(adev, &device_list, hive);
6315 		amdgpu_device_recovery_get_reset_lock(adev, &device_list);
6316 		amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list,
6317 					      hive, false);
6318 		if (hive)
6319 			mutex_unlock(&hive->hive_lock);
6320 		return PCI_ERS_RESULT_NEED_RESET;
6321 	case pci_channel_io_perm_failure:
6322 		/* Permanent error, prepare for device removal */
6323 		dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state);
6324 		return PCI_ERS_RESULT_DISCONNECT;
6325 	}
6326 
6327 	return PCI_ERS_RESULT_NEED_RESET;
6328 }
6329 
6330 /**
6331  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6332  * @pdev: pointer to PCI device
6333  */
6334 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6335 {
6336 	struct drm_device *dev = pci_get_drvdata(pdev);
6337 	struct amdgpu_device *adev = drm_to_adev(dev);
6338 
6339 	dev_info(adev->dev, "PCI error: mmio enabled callback!!\n");
6340 
6341 	/* TODO - dump whatever for debugging purposes */
6342 
6343 	/* This called only if amdgpu_pci_error_detected returns
6344 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6345 	 * works, no need to reset slot.
6346 	 */
6347 
6348 	return PCI_ERS_RESULT_RECOVERED;
6349 }
6350 
6351 /**
6352  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6353  * @pdev: PCI device struct
6354  *
6355  * Description: This routine is called by the pci error recovery
6356  * code after the PCI slot has been reset, just before we
6357  * should resume normal operations.
6358  */
6359 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6360 {
6361 	struct drm_device *dev = pci_get_drvdata(pdev);
6362 	struct amdgpu_device *adev = drm_to_adev(dev);
6363 	struct amdgpu_reset_context reset_context;
6364 	struct amdgpu_device *tmp_adev;
6365 	struct amdgpu_hive_info *hive;
6366 	struct list_head device_list;
6367 	struct pci_dev *link_dev;
6368 	int r = 0, i, timeout;
6369 	u32 memsize;
6370 	u16 status;
6371 
6372 	dev_info(adev->dev, "PCI error: slot reset callback!!\n");
6373 
6374 	memset(&reset_context, 0, sizeof(reset_context));
6375 	INIT_LIST_HEAD(&device_list);
6376 	hive = amdgpu_get_xgmi_hive(adev);
6377 	if (hive) {
6378 		mutex_lock(&hive->hive_lock);
6379 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
6380 			list_add_tail(&tmp_adev->reset_list, &device_list);
6381 	} else {
6382 		list_add_tail(&adev->reset_list, &device_list);
6383 	}
6384 
6385 	if (adev->pcie_reset_ctx.swus)
6386 		link_dev = adev->pcie_reset_ctx.swus;
6387 	else
6388 		link_dev = adev->pdev;
6389 	/* wait for asic to come out of reset, timeout = 10s */
6390 	timeout = 10000;
6391 	do {
6392 		usleep_range(10000, 10500);
6393 		r = pci_read_config_word(link_dev, PCI_VENDOR_ID, &status);
6394 		timeout -= 10;
6395 	} while (timeout > 0 && (status != PCI_VENDOR_ID_ATI) &&
6396 		 (status != PCI_VENDOR_ID_AMD));
6397 
6398 	if ((status != PCI_VENDOR_ID_ATI) && (status != PCI_VENDOR_ID_AMD)) {
6399 		r = -ETIME;
6400 		goto out;
6401 	}
6402 
6403 	amdgpu_device_load_switch_state(adev);
6404 	/* Restore PCI confspace */
6405 	amdgpu_device_load_pci_state(pdev);
6406 
6407 	/* confirm  ASIC came out of reset */
6408 	for (i = 0; i < adev->usec_timeout; i++) {
6409 		memsize = amdgpu_asic_get_config_memsize(adev);
6410 
6411 		if (memsize != 0xffffffff)
6412 			break;
6413 		udelay(1);
6414 	}
6415 	if (memsize == 0xffffffff) {
6416 		r = -ETIME;
6417 		goto out;
6418 	}
6419 
6420 	reset_context.method = AMD_RESET_METHOD_NONE;
6421 	reset_context.reset_req_dev = adev;
6422 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6423 	set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
6424 
6425 	if (hive) {
6426 		reset_context.hive = hive;
6427 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
6428 			tmp_adev->pcie_reset_ctx.in_link_reset = true;
6429 	} else {
6430 		adev->pcie_reset_ctx.in_link_reset = true;
6431 		set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6432 	}
6433 
6434 	r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
6435 out:
6436 	if (!r) {
6437 		if (amdgpu_device_cache_pci_state(adev->pdev))
6438 			pci_restore_state(adev->pdev);
6439 		dev_info(adev->dev, "PCIe error recovery succeeded\n");
6440 	} else {
6441 		dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r);
6442 		if (hive) {
6443 			list_for_each_entry(tmp_adev, &device_list, reset_list)
6444 				amdgpu_device_unset_mp1_state(tmp_adev);
6445 		}
6446 		amdgpu_device_recovery_put_reset_lock(adev, &device_list);
6447 	}
6448 
6449 	if (hive) {
6450 		mutex_unlock(&hive->hive_lock);
6451 		amdgpu_put_xgmi_hive(hive);
6452 	}
6453 
6454 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6455 }
6456 
6457 /**
6458  * amdgpu_pci_resume() - resume normal ops after PCI reset
6459  * @pdev: pointer to PCI device
6460  *
6461  * Called when the error recovery driver tells us that its
6462  * OK to resume normal operation.
6463  */
6464 void amdgpu_pci_resume(struct pci_dev *pdev)
6465 {
6466 	struct drm_device *dev = pci_get_drvdata(pdev);
6467 	struct amdgpu_device *adev = drm_to_adev(dev);
6468 	struct list_head device_list;
6469 	struct amdgpu_hive_info *hive = NULL;
6470 	struct amdgpu_device *tmp_adev = NULL;
6471 
6472 	dev_info(adev->dev, "PCI error: resume callback!!\n");
6473 
6474 	/* Only continue execution for the case of pci_channel_io_frozen */
6475 	if (adev->pci_channel_state != pci_channel_io_frozen)
6476 		return;
6477 
6478 	INIT_LIST_HEAD(&device_list);
6479 
6480 	hive = amdgpu_get_xgmi_hive(adev);
6481 	if (hive) {
6482 		mutex_lock(&hive->hive_lock);
6483 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
6484 			tmp_adev->pcie_reset_ctx.in_link_reset = false;
6485 			list_add_tail(&tmp_adev->reset_list, &device_list);
6486 		}
6487 	} else {
6488 		adev->pcie_reset_ctx.in_link_reset = false;
6489 		list_add_tail(&adev->reset_list, &device_list);
6490 	}
6491 	amdgpu_device_sched_resume(&device_list, NULL, NULL);
6492 	amdgpu_device_gpu_resume(adev, &device_list, false);
6493 	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
6494 
6495 	if (hive) {
6496 		mutex_unlock(&hive->hive_lock);
6497 		amdgpu_put_xgmi_hive(hive);
6498 	}
6499 }
6500 
6501 static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev)
6502 {
6503 	struct pci_dev *swus, *swds;
6504 	int r;
6505 
6506 	swds = pci_upstream_bridge(adev->pdev);
6507 	if (!swds || swds->vendor != PCI_VENDOR_ID_ATI ||
6508 	    pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM)
6509 		return;
6510 	swus = pci_upstream_bridge(swds);
6511 	if (!swus ||
6512 	    (swus->vendor != PCI_VENDOR_ID_ATI &&
6513 	     swus->vendor != PCI_VENDOR_ID_AMD) ||
6514 	    pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM)
6515 		return;
6516 
6517 	/* If already saved, return */
6518 	if (adev->pcie_reset_ctx.swus)
6519 		return;
6520 	/* Upstream bridge is ATI, assume it's SWUS/DS architecture */
6521 	r = pci_save_state(swds);
6522 	if (r)
6523 		return;
6524 	adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds);
6525 
6526 	r = pci_save_state(swus);
6527 	if (r)
6528 		return;
6529 	adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus);
6530 
6531 	adev->pcie_reset_ctx.swus = swus;
6532 }
6533 
6534 static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)
6535 {
6536 	struct pci_dev *pdev;
6537 	int r;
6538 
6539 	if (!adev->pcie_reset_ctx.swds_pcistate ||
6540 	    !adev->pcie_reset_ctx.swus_pcistate)
6541 		return;
6542 
6543 	pdev = adev->pcie_reset_ctx.swus;
6544 	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swus_pcistate);
6545 	if (!r) {
6546 		pci_restore_state(pdev);
6547 	} else {
6548 		dev_warn(adev->dev, "Failed to load SWUS state, err:%d\n", r);
6549 		return;
6550 	}
6551 
6552 	pdev = pci_upstream_bridge(adev->pdev);
6553 	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swds_pcistate);
6554 	if (!r)
6555 		pci_restore_state(pdev);
6556 	else
6557 		dev_warn(adev->dev, "Failed to load SWDS state, err:%d\n", r);
6558 }
6559 
6560 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6561 {
6562 	struct drm_device *dev = pci_get_drvdata(pdev);
6563 	struct amdgpu_device *adev = drm_to_adev(dev);
6564 	int r;
6565 
6566 	if (amdgpu_sriov_vf(adev))
6567 		return false;
6568 
6569 	r = pci_save_state(pdev);
6570 	if (!r) {
6571 		kfree(adev->pci_state);
6572 
6573 		adev->pci_state = pci_store_saved_state(pdev);
6574 
6575 		if (!adev->pci_state) {
6576 			dev_err(adev->dev, "Failed to store PCI saved state");
6577 			return false;
6578 		}
6579 	} else {
6580 		dev_warn(adev->dev, "Failed to save PCI state, err:%d\n", r);
6581 		return false;
6582 	}
6583 
6584 	amdgpu_device_cache_switch_state(adev);
6585 
6586 	return true;
6587 }
6588 
6589 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6590 {
6591 	struct drm_device *dev = pci_get_drvdata(pdev);
6592 	struct amdgpu_device *adev = drm_to_adev(dev);
6593 	int r;
6594 
6595 	if (!adev->pci_state)
6596 		return false;
6597 
6598 	r = pci_load_saved_state(pdev, adev->pci_state);
6599 
6600 	if (!r) {
6601 		pci_restore_state(pdev);
6602 	} else {
6603 		dev_warn(adev->dev, "Failed to load PCI state, err:%d\n", r);
6604 		return false;
6605 	}
6606 
6607 	return true;
6608 }
6609 
6610 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6611 		struct amdgpu_ring *ring)
6612 {
6613 #ifdef CONFIG_X86_64
6614 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6615 		return;
6616 #endif
6617 	if (adev->gmc.xgmi.connected_to_cpu)
6618 		return;
6619 
6620 	if (ring && ring->funcs->emit_hdp_flush) {
6621 		amdgpu_ring_emit_hdp_flush(ring);
6622 		return;
6623 	}
6624 
6625 	if (!ring && amdgpu_sriov_runtime(adev)) {
6626 		if (!amdgpu_kiq_hdp_flush(adev))
6627 			return;
6628 	}
6629 
6630 	amdgpu_hdp_flush(adev, ring);
6631 }
6632 
6633 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6634 		struct amdgpu_ring *ring)
6635 {
6636 #ifdef CONFIG_X86_64
6637 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6638 		return;
6639 #endif
6640 	if (adev->gmc.xgmi.connected_to_cpu)
6641 		return;
6642 
6643 	amdgpu_hdp_invalidate(adev, ring);
6644 }
6645 
6646 int amdgpu_in_reset(struct amdgpu_device *adev)
6647 {
6648 	return atomic_read(&adev->reset_domain->in_gpu_reset);
6649 }
6650 
6651 /**
6652  * amdgpu_device_halt() - bring hardware to some kind of halt state
6653  *
6654  * @adev: amdgpu_device pointer
6655  *
6656  * Bring hardware to some kind of halt state so that no one can touch it
6657  * any more. It will help to maintain error context when error occurred.
6658  * Compare to a simple hang, the system will keep stable at least for SSH
6659  * access. Then it should be trivial to inspect the hardware state and
6660  * see what's going on. Implemented as following:
6661  *
6662  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6663  *    clears all CPU mappings to device, disallows remappings through page faults
6664  * 2. amdgpu_irq_disable_all() disables all interrupts
6665  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6666  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6667  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6668  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6669  *    flush any in flight DMA operations
6670  */
6671 void amdgpu_device_halt(struct amdgpu_device *adev)
6672 {
6673 	struct pci_dev *pdev = adev->pdev;
6674 	struct drm_device *ddev = adev_to_drm(adev);
6675 
6676 	amdgpu_xcp_dev_unplug(adev);
6677 	drm_dev_unplug(ddev);
6678 
6679 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
6680 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
6681 
6682 	amdgpu_irq_disable_all(adev);
6683 
6684 	amdgpu_fence_driver_hw_fini(adev);
6685 
6686 	adev->no_hw_access = true;
6687 
6688 	amdgpu_device_unmap_mmio(adev);
6689 
6690 	pci_disable_device(pdev);
6691 	pci_wait_for_pending_transaction(pdev);
6692 }
6693 
6694 /**
6695  * amdgpu_device_get_gang - return a reference to the current gang
6696  * @adev: amdgpu_device pointer
6697  *
6698  * Returns: A new reference to the current gang leader.
6699  */
6700 struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
6701 {
6702 	struct dma_fence *fence;
6703 
6704 	rcu_read_lock();
6705 	fence = dma_fence_get_rcu_safe(&adev->gang_submit);
6706 	rcu_read_unlock();
6707 	return fence;
6708 }
6709 
6710 /**
6711  * amdgpu_device_switch_gang - switch to a new gang
6712  * @adev: amdgpu_device pointer
6713  * @gang: the gang to switch to
6714  *
6715  * Try to switch to a new gang.
6716  * Returns: NULL if we switched to the new gang or a reference to the current
6717  * gang leader.
6718  */
6719 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6720 					    struct dma_fence *gang)
6721 {
6722 	struct dma_fence *old = NULL;
6723 
6724 	dma_fence_get(gang);
6725 	do {
6726 		dma_fence_put(old);
6727 		old = amdgpu_device_get_gang(adev);
6728 		if (old == gang)
6729 			break;
6730 
6731 		if (!dma_fence_is_signaled(old)) {
6732 			dma_fence_put(gang);
6733 			return old;
6734 		}
6735 
6736 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6737 			 old, gang) != old);
6738 
6739 	/*
6740 	 * Drop it once for the exchanged reference in adev and once for the
6741 	 * thread local reference acquired in amdgpu_device_get_gang().
6742 	 */
6743 	dma_fence_put(old);
6744 	dma_fence_put(old);
6745 	return NULL;
6746 }
6747 
6748 /**
6749  * amdgpu_device_enforce_isolation - enforce HW isolation
6750  * @adev: the amdgpu device pointer
6751  * @ring: the HW ring the job is supposed to run on
6752  * @job: the job which is about to be pushed to the HW ring
6753  *
6754  * Makes sure that only one client at a time can use the GFX block.
6755  * Returns: The dependency to wait on before the job can be pushed to the HW.
6756  * The function is called multiple times until NULL is returned.
6757  */
6758 struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
6759 						  struct amdgpu_ring *ring,
6760 						  struct amdgpu_job *job)
6761 {
6762 	struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
6763 	struct drm_sched_fence *f = job->base.s_fence;
6764 	struct dma_fence *dep;
6765 	void *owner;
6766 	int r;
6767 
6768 	/*
6769 	 * For now enforce isolation only for the GFX block since we only need
6770 	 * the cleaner shader on those rings.
6771 	 */
6772 	if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
6773 	    ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6774 		return NULL;
6775 
6776 	/*
6777 	 * All submissions where enforce isolation is false are handled as if
6778 	 * they come from a single client. Use ~0l as the owner to distinct it
6779 	 * from kernel submissions where the owner is NULL.
6780 	 */
6781 	owner = job->enforce_isolation ? f->owner : (void *)~0l;
6782 
6783 	mutex_lock(&adev->enforce_isolation_mutex);
6784 
6785 	/*
6786 	 * The "spearhead" submission is the first one which changes the
6787 	 * ownership to its client. We always need to wait for it to be
6788 	 * pushed to the HW before proceeding with anything.
6789 	 */
6790 	if (&f->scheduled != isolation->spearhead &&
6791 	    !dma_fence_is_signaled(isolation->spearhead)) {
6792 		dep = isolation->spearhead;
6793 		goto out_grab_ref;
6794 	}
6795 
6796 	if (isolation->owner != owner) {
6797 
6798 		/*
6799 		 * Wait for any gang to be assembled before switching to a
6800 		 * different owner or otherwise we could deadlock the
6801 		 * submissions.
6802 		 */
6803 		if (!job->gang_submit) {
6804 			dep = amdgpu_device_get_gang(adev);
6805 			if (!dma_fence_is_signaled(dep))
6806 				goto out_return_dep;
6807 			dma_fence_put(dep);
6808 		}
6809 
6810 		dma_fence_put(isolation->spearhead);
6811 		isolation->spearhead = dma_fence_get(&f->scheduled);
6812 		amdgpu_sync_move(&isolation->active, &isolation->prev);
6813 		trace_amdgpu_isolation(isolation->owner, owner);
6814 		isolation->owner = owner;
6815 	}
6816 
6817 	/*
6818 	 * Specifying the ring here helps to pipeline submissions even when
6819 	 * isolation is enabled. If that is not desired for testing NULL can be
6820 	 * used instead of the ring to enforce a CPU round trip while switching
6821 	 * between clients.
6822 	 */
6823 	dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
6824 	r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
6825 	if (r)
6826 		dev_warn(adev->dev, "OOM tracking isolation\n");
6827 
6828 out_grab_ref:
6829 	dma_fence_get(dep);
6830 out_return_dep:
6831 	mutex_unlock(&adev->enforce_isolation_mutex);
6832 	return dep;
6833 }
6834 
6835 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6836 {
6837 	switch (adev->asic_type) {
6838 #ifdef CONFIG_DRM_AMDGPU_SI
6839 	case CHIP_HAINAN:
6840 #endif
6841 	case CHIP_TOPAZ:
6842 		/* chips with no display hardware */
6843 		return false;
6844 #ifdef CONFIG_DRM_AMDGPU_SI
6845 	case CHIP_TAHITI:
6846 	case CHIP_PITCAIRN:
6847 	case CHIP_VERDE:
6848 	case CHIP_OLAND:
6849 #endif
6850 #ifdef CONFIG_DRM_AMDGPU_CIK
6851 	case CHIP_BONAIRE:
6852 	case CHIP_HAWAII:
6853 	case CHIP_KAVERI:
6854 	case CHIP_KABINI:
6855 	case CHIP_MULLINS:
6856 #endif
6857 	case CHIP_TONGA:
6858 	case CHIP_FIJI:
6859 	case CHIP_POLARIS10:
6860 	case CHIP_POLARIS11:
6861 	case CHIP_POLARIS12:
6862 	case CHIP_VEGAM:
6863 	case CHIP_CARRIZO:
6864 	case CHIP_STONEY:
6865 		/* chips with display hardware */
6866 		return true;
6867 	default:
6868 		/* IP discovery */
6869 		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6870 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6871 			return false;
6872 		return true;
6873 	}
6874 }
6875 
6876 ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring)
6877 {
6878 	ssize_t size = 0;
6879 
6880 	if (!ring || !ring->adev)
6881 		return size;
6882 
6883 	if (amdgpu_device_should_recover_gpu(ring->adev))
6884 		size |= AMDGPU_RESET_TYPE_FULL;
6885 
6886 	if (unlikely(!ring->adev->debug_disable_soft_recovery) &&
6887 	    !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery)
6888 		size |= AMDGPU_RESET_TYPE_SOFT_RESET;
6889 
6890 	return size;
6891 }
6892 
6893 ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset)
6894 {
6895 	ssize_t size = 0;
6896 
6897 	if (supported_reset == 0) {
6898 		size += sysfs_emit_at(buf, size, "unsupported");
6899 		size += sysfs_emit_at(buf, size, "\n");
6900 		return size;
6901 
6902 	}
6903 
6904 	if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET)
6905 		size += sysfs_emit_at(buf, size, "soft ");
6906 
6907 	if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
6908 		size += sysfs_emit_at(buf, size, "queue ");
6909 
6910 	if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)
6911 		size += sysfs_emit_at(buf, size, "pipe ");
6912 
6913 	if (supported_reset & AMDGPU_RESET_TYPE_FULL)
6914 		size += sysfs_emit_at(buf, size, "full ");
6915 
6916 	size += sysfs_emit_at(buf, size, "\n");
6917 	return size;
6918 }
6919 
6920 void amdgpu_device_set_uid(struct amdgpu_uid *uid_info,
6921 			   enum amdgpu_uid_type type, uint8_t inst,
6922 			   uint64_t uid)
6923 {
6924 	if (!uid_info)
6925 		return;
6926 
6927 	if (type >= AMDGPU_UID_TYPE_MAX) {
6928 		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
6929 			     type);
6930 		return;
6931 	}
6932 
6933 	if (inst >= AMDGPU_UID_INST_MAX) {
6934 		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
6935 			     inst);
6936 		return;
6937 	}
6938 
6939 	if (uid_info->uid[type][inst] != 0) {
6940 		dev_warn_once(
6941 			uid_info->adev->dev,
6942 			"Overwriting existing UID %llu for type %d instance %d\n",
6943 			uid_info->uid[type][inst], type, inst);
6944 	}
6945 
6946 	uid_info->uid[type][inst] = uid;
6947 }
6948 
6949 u64 amdgpu_device_get_uid(struct amdgpu_uid *uid_info,
6950 			  enum amdgpu_uid_type type, uint8_t inst)
6951 {
6952 	if (!uid_info)
6953 		return 0;
6954 
6955 	if (type >= AMDGPU_UID_TYPE_MAX) {
6956 		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
6957 			     type);
6958 		return 0;
6959 	}
6960 
6961 	if (inst >= AMDGPU_UID_INST_MAX) {
6962 		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
6963 			     inst);
6964 		return 0;
6965 	}
6966 
6967 	return uid_info->uid[type][inst];
6968 }
6969