xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h (revision e332935a540eb76dd656663ca908eb0544d96757)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 #ifndef __AMDGPU_XGMI_H__
23 #define __AMDGPU_XGMI_H__
24 
25 #include <drm/task_barrier.h>
26 #include "amdgpu_ras.h"
27 
28 enum amdgpu_xgmi_link_speed {
29 	XGMI_SPEED_16GT = 16,
30 	XGMI_SPEED_25GT = 25,
31 	XGMI_SPEED_32GT = 32
32 };
33 
34 struct amdgpu_hive_info {
35 	struct kobject kobj;
36 	uint64_t hive_id;
37 	struct list_head device_list;
38 	struct list_head node;
39 	atomic_t number_devices;
40 	struct mutex hive_lock;
41 	int hi_req_count;
42 	struct amdgpu_device *hi_req_gpu;
43 	struct task_barrier tb;
44 	enum {
45 		AMDGPU_XGMI_PSTATE_MIN,
46 		AMDGPU_XGMI_PSTATE_MAX_VEGA20,
47 		AMDGPU_XGMI_PSTATE_UNKNOWN
48 	} pstate;
49 
50 	struct amdgpu_reset_domain *reset_domain;
51 	atomic_t ras_recovery;
52 	struct ras_event_manager event_mgr;
53 	struct work_struct reset_on_init_work;
54 	atomic_t requested_nps_mode;
55 };
56 
57 struct amdgpu_pcs_ras_field {
58 	const char *err_name;
59 	uint32_t pcs_err_mask;
60 	uint32_t pcs_err_shift;
61 };
62 
63 /**
64  * Bandwidth range reporting comes in two modes.
65  *
66  * PER_LINK - range for any xgmi link
67  * PER_PEER - range of max of single xgmi link to max of multiple links based on source peer
68  */
69 enum amdgpu_xgmi_bw_mode {
70 	AMDGPU_XGMI_BW_MODE_PER_LINK = 0,
71 	AMDGPU_XGMI_BW_MODE_PER_PEER
72 };
73 
74 enum amdgpu_xgmi_bw_unit {
75 	AMDGPU_XGMI_BW_UNIT_GBYTES = 0,
76 	AMDGPU_XGMI_BW_UNIT_MBYTES
77 };
78 
79 struct amdgpu_xgmi_ras {
80 	struct amdgpu_ras_block_object ras_block;
81 };
82 extern struct amdgpu_xgmi_ras xgmi_ras;
83 
84 struct amdgpu_xgmi {
85 	/* from psp */
86 	u64 node_id;
87 	u64 hive_id;
88 	/* fixed per family */
89 	u64 node_segment_size;
90 	/* physical node (0-3) */
91 	unsigned physical_node_id;
92 	/* number of nodes (0-4) */
93 	unsigned num_physical_nodes;
94 	/* gpu list in the same hive */
95 	struct list_head head;
96 	bool supported;
97 	struct ras_common_if *ras_if;
98 	bool connected_to_cpu;
99 	struct amdgpu_xgmi_ras *ras;
100 	enum amdgpu_xgmi_link_speed max_speed;
101 	uint8_t max_width;
102 };
103 
104 struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
105 void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive);
106 int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
107 int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
108 int amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
109 int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
110 int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, struct amdgpu_device *peer_adev);
111 int amdgpu_xgmi_get_bandwidth(struct amdgpu_device *adev, struct amdgpu_device *peer_adev,
112 			      enum amdgpu_xgmi_bw_mode bw_mode, enum amdgpu_xgmi_bw_unit bw_unit,
113 			      uint32_t *min_bw, uint32_t *max_bw);
114 bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev,
115 					struct amdgpu_device *peer_adev);
116 uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
117 					   uint64_t addr);
118 bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
119 			   struct amdgpu_device *bo_adev);
120 int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev);
121 int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev);
122 
123 int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev,
124 				   struct amdgpu_hive_info *hive,
125 				   int req_nps_mode);
126 int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev,
127 				int global_link_num);
128 int amdgpu_xgmi_get_ext_link(struct amdgpu_device *adev, int link_num);
129 
130 void amdgpu_xgmi_early_init(struct amdgpu_device *adev);
131 uint32_t amdgpu_xgmi_get_max_bandwidth(struct amdgpu_device *adev);
132 
133 #endif
134