xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h (revision 8d5e70ba5da21452735474b70322446aeb442c94)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Author: Monk.liu@amd.com
23  */
24 #ifndef AMDGPU_VIRT_H
25 #define AMDGPU_VIRT_H
26 
27 #include "amdgv_sriovmsg.h"
28 
29 #define AMDGPU_SRIOV_CAPS_SRIOV_VBIOS  (1 << 0) /* vBIOS is sr-iov ready */
30 #define AMDGPU_SRIOV_CAPS_ENABLE_IOV   (1 << 1) /* sr-iov is enabled on this GPU */
31 #define AMDGPU_SRIOV_CAPS_IS_VF        (1 << 2) /* this GPU is a virtual function */
32 #define AMDGPU_PASSTHROUGH_MODE        (1 << 3) /* thw whole GPU is pass through for VM */
33 #define AMDGPU_SRIOV_CAPS_RUNTIME      (1 << 4) /* is out of full access mode */
34 #define AMDGPU_VF_MMIO_ACCESS_PROTECT  (1 << 5) /* MMIO write access is not allowed in sriov runtime */
35 
36 /* flags for indirect register access path supported by rlcg for sriov */
37 #define AMDGPU_RLCG_GC_WRITE_LEGACY    (0x8 << 28)
38 #define AMDGPU_RLCG_GC_WRITE           (0x0 << 28)
39 #define AMDGPU_RLCG_GC_READ            (0x1 << 28)
40 #define AMDGPU_RLCG_MMHUB_WRITE        (0x2 << 28)
41 
42 /* error code for indirect register access path supported by rlcg for sriov */
43 #define AMDGPU_RLCG_VFGATE_DISABLED		0x4000000
44 #define AMDGPU_RLCG_WRONG_OPERATION_TYPE	0x2000000
45 #define AMDGPU_RLCG_REG_NOT_IN_RANGE		0x1000000
46 
47 #define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK	0xFFFFF
48 #define AMDGPU_RLCG_SCRATCH1_ERROR_MASK	0xF000000
49 
50 /* all asic after AI use this offset */
51 #define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5
52 /* tonga/fiji use this offset */
53 #define mmBIF_IOV_FUNC_IDENTIFIER 0x1503
54 
55 #define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2
56 
57 enum amdgpu_sriov_vf_mode {
58 	SRIOV_VF_MODE_BARE_METAL = 0,
59 	SRIOV_VF_MODE_ONE_VF,
60 	SRIOV_VF_MODE_MULTI_VF,
61 };
62 
63 struct amdgpu_mm_table {
64 	struct amdgpu_bo	*bo;
65 	uint32_t		*cpu_addr;
66 	uint64_t		gpu_addr;
67 };
68 
69 #define AMDGPU_VF_ERROR_ENTRY_SIZE    16
70 
71 /* struct error_entry - amdgpu VF error information. */
72 struct amdgpu_vf_error_buffer {
73 	struct mutex lock;
74 	int read_count;
75 	int write_count;
76 	uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE];
77 	uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE];
78 	uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
79 };
80 
81 enum idh_request;
82 
83 /**
84  * struct amdgpu_virt_ops - amdgpu device virt operations
85  */
86 struct amdgpu_virt_ops {
87 	int (*req_full_gpu)(struct amdgpu_device *adev, bool init);
88 	int (*rel_full_gpu)(struct amdgpu_device *adev, bool init);
89 	int (*req_init_data)(struct amdgpu_device *adev);
90 	int (*reset_gpu)(struct amdgpu_device *adev);
91 	void (*ready_to_reset)(struct amdgpu_device *adev);
92 	int (*wait_reset)(struct amdgpu_device *adev);
93 	void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req,
94 			  u32 data1, u32 data2, u32 data3);
95 	void (*ras_poison_handler)(struct amdgpu_device *adev,
96 					enum amdgpu_ras_block block);
97 	bool (*rcvd_ras_intr)(struct amdgpu_device *adev);
98 	int (*req_ras_err_count)(struct amdgpu_device *adev);
99 	int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr);
100 };
101 
102 /*
103  * Firmware Reserve Frame buffer
104  */
105 struct amdgpu_virt_fw_reserve {
106 	struct amd_sriov_msg_pf2vf_info_header *p_pf2vf;
107 	struct amd_sriov_msg_vf2pf_info_header *p_vf2pf;
108 	void *ras_telemetry;
109 	unsigned int checksum_key;
110 };
111 
112 /*
113  * Legacy GIM header
114  *
115  * Defination between PF and VF
116  * Structures forcibly aligned to 4 to keep the same style as PF.
117  */
118 #define AMDGIM_DATAEXCHANGE_OFFSET		(64 * 1024)
119 
120 #define AMDGIM_GET_STRUCTURE_RESERVED_SIZE(total, u8, u16, u32, u64) \
121 		(total - (((u8)+3) / 4 + ((u16)+1) / 2 + (u32) + (u64)*2))
122 
123 enum AMDGIM_FEATURE_FLAG {
124 	/* GIM supports feature of Error log collecting */
125 	AMDGIM_FEATURE_ERROR_LOG_COLLECT = 0x1,
126 	/* GIM supports feature of loading uCodes */
127 	AMDGIM_FEATURE_GIM_LOAD_UCODES   = 0x2,
128 	/* VRAM LOST by GIM */
129 	AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4,
130 	/* MM bandwidth */
131 	AMDGIM_FEATURE_GIM_MM_BW_MGR = 0x8,
132 	/* PP ONE VF MODE in GIM */
133 	AMDGIM_FEATURE_PP_ONE_VF = (1 << 4),
134 	/* Indirect Reg Access enabled */
135 	AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5),
136 	/* AV1 Support MODE*/
137 	AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6),
138 	/* VCN RB decouple */
139 	AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7),
140 	/* MES info */
141 	AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8),
142 	AMDGIM_FEATURE_RAS_CAPS = (1 << 9),
143 	AMDGIM_FEATURE_RAS_TELEMETRY = (1 << 10),
144 	AMDGIM_FEATURE_RAS_CPER = (1 << 11),
145 };
146 
147 enum AMDGIM_REG_ACCESS_FLAG {
148 	/* Use PSP to program IH_RB_CNTL */
149 	AMDGIM_FEATURE_IH_REG_PSP_EN     = (1 << 0),
150 	/* Use RLC to program MMHUB regs */
151 	AMDGIM_FEATURE_MMHUB_REG_RLC_EN  = (1 << 1),
152 	/* Use RLC to program GC regs */
153 	AMDGIM_FEATURE_GC_REG_RLC_EN     = (1 << 2),
154 };
155 
156 struct amdgim_pf2vf_info_v1 {
157 	/* header contains size and version */
158 	struct amd_sriov_msg_pf2vf_info_header header;
159 	/* max_width * max_height */
160 	unsigned int uvd_enc_max_pixels_count;
161 	/* 16x16 pixels/sec, codec independent */
162 	unsigned int uvd_enc_max_bandwidth;
163 	/* max_width * max_height */
164 	unsigned int vce_enc_max_pixels_count;
165 	/* 16x16 pixels/sec, codec independent */
166 	unsigned int vce_enc_max_bandwidth;
167 	/* MEC FW position in kb from the start of visible frame buffer */
168 	unsigned int mecfw_kboffset;
169 	/* The features flags of the GIM driver supports. */
170 	unsigned int feature_flags;
171 	/* use private key from mailbox 2 to create chueksum */
172 	unsigned int checksum;
173 } __aligned(4);
174 
175 struct amdgim_vf2pf_info_v1 {
176 	/* header contains size and version */
177 	struct amd_sriov_msg_vf2pf_info_header header;
178 	/* driver version */
179 	char driver_version[64];
180 	/* driver certification, 1=WHQL, 0=None */
181 	unsigned int driver_cert;
182 	/* guest OS type and version: need a define */
183 	unsigned int os_info;
184 	/* in the unit of 1M */
185 	unsigned int fb_usage;
186 	/* guest gfx engine usage percentage */
187 	unsigned int gfx_usage;
188 	/* guest gfx engine health percentage */
189 	unsigned int gfx_health;
190 	/* guest compute engine usage percentage */
191 	unsigned int compute_usage;
192 	/* guest compute engine health percentage */
193 	unsigned int compute_health;
194 	/* guest vce engine usage percentage. 0xffff means N/A. */
195 	unsigned int vce_enc_usage;
196 	/* guest vce engine health percentage. 0xffff means N/A. */
197 	unsigned int vce_enc_health;
198 	/* guest uvd engine usage percentage. 0xffff means N/A. */
199 	unsigned int uvd_enc_usage;
200 	/* guest uvd engine usage percentage. 0xffff means N/A. */
201 	unsigned int uvd_enc_health;
202 	unsigned int checksum;
203 } __aligned(4);
204 
205 struct amdgim_vf2pf_info_v2 {
206 	/* header contains size and version */
207 	struct amd_sriov_msg_vf2pf_info_header header;
208 	uint32_t checksum;
209 	/* driver version */
210 	uint8_t driver_version[64];
211 	/* driver certification, 1=WHQL, 0=None */
212 	uint32_t driver_cert;
213 	/* guest OS type and version: need a define */
214 	uint32_t os_info;
215 	/* in the unit of 1M */
216 	uint32_t fb_usage;
217 	/* guest gfx engine usage percentage */
218 	uint32_t gfx_usage;
219 	/* guest gfx engine health percentage */
220 	uint32_t gfx_health;
221 	/* guest compute engine usage percentage */
222 	uint32_t compute_usage;
223 	/* guest compute engine health percentage */
224 	uint32_t compute_health;
225 	/* guest vce engine usage percentage. 0xffff means N/A. */
226 	uint32_t vce_enc_usage;
227 	/* guest vce engine health percentage. 0xffff means N/A. */
228 	uint32_t vce_enc_health;
229 	/* guest uvd engine usage percentage. 0xffff means N/A. */
230 	uint32_t uvd_enc_usage;
231 	/* guest uvd engine usage percentage. 0xffff means N/A. */
232 	uint32_t uvd_enc_health;
233 	uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amd_sriov_msg_vf2pf_info_header)/sizeof(uint32_t)), 0)];
234 } __aligned(4);
235 
236 struct amdgpu_virt_ras_err_handler_data {
237 	/* point to bad page records array */
238 	struct eeprom_table_record *bps;
239 	/* point to reserved bo array */
240 	struct amdgpu_bo **bps_bo;
241 	/* the count of entries */
242 	int count;
243 	/* last reserved entry's index + 1 */
244 	int last_reserved;
245 };
246 
247 struct amdgpu_virt_ras {
248 	struct ratelimit_state ras_error_cnt_rs;
249 	struct ratelimit_state ras_cper_dump_rs;
250 	struct mutex ras_telemetry_mutex;
251 	uint64_t cper_rptr;
252 };
253 
254 /* GPU virtualization */
255 struct amdgpu_virt {
256 	uint32_t			caps;
257 	struct amdgpu_bo		*csa_obj;
258 	void				*csa_cpu_addr;
259 	bool chained_ib_support;
260 	uint32_t			reg_val_offs;
261 	struct amdgpu_irq_src		ack_irq;
262 	struct amdgpu_irq_src		rcv_irq;
263 	struct work_struct		flr_work;
264 	struct amdgpu_mm_table		mm_table;
265 	const struct amdgpu_virt_ops	*ops;
266 	struct amdgpu_vf_error_buffer	vf_errors;
267 	struct amdgpu_virt_fw_reserve	fw_reserve;
268 	uint32_t gim_feature;
269 	uint32_t reg_access_mode;
270 	int req_init_data_ver;
271 	bool tdr_debug;
272 	struct amdgpu_virt_ras_err_handler_data *virt_eh_data;
273 	bool ras_init_done;
274 	uint32_t reg_access;
275 
276 	/* vf2pf message */
277 	struct delayed_work vf2pf_work;
278 	uint32_t vf2pf_update_interval_ms;
279 	int vf2pf_update_retry_cnt;
280 
281 	/* multimedia bandwidth config */
282 	bool     is_mm_bw_enabled;
283 	uint32_t decode_max_dimension_pixels;
284 	uint32_t decode_max_frame_pixels;
285 	uint32_t encode_max_dimension_pixels;
286 	uint32_t encode_max_frame_pixels;
287 
288 	/* the ucode id to signal the autoload */
289 	uint32_t autoload_ucode_id;
290 
291 	/* Spinlock to protect access to the RLCG register interface */
292 	spinlock_t rlcg_reg_lock;
293 
294 	union amd_sriov_ras_caps ras_en_caps;
295 	union amd_sriov_ras_caps ras_telemetry_en_caps;
296 	struct amdgpu_virt_ras ras;
297 	struct amd_sriov_ras_telemetry_error_count count_cache;
298 };
299 
300 struct amdgpu_video_codec_info;
301 
302 #define amdgpu_sriov_enabled(adev) \
303 ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
304 
305 #define amdgpu_sriov_vf(adev) \
306 ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_IS_VF)
307 
308 #define amdgpu_sriov_bios(adev) \
309 ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)
310 
311 #define amdgpu_sriov_runtime(adev) \
312 ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_RUNTIME)
313 
314 #define amdgpu_sriov_fullaccess(adev) \
315 (amdgpu_sriov_vf((adev)) && !amdgpu_sriov_runtime((adev)))
316 
317 #define amdgpu_sriov_reg_indirect_en(adev) \
318 (amdgpu_sriov_vf((adev)) && \
319 	((adev)->virt.gim_feature & (AMDGIM_FEATURE_INDIRECT_REG_ACCESS)))
320 
321 #define amdgpu_sriov_reg_indirect_ih(adev) \
322 (amdgpu_sriov_vf((adev)) && \
323 	((adev)->virt.reg_access & (AMDGIM_FEATURE_IH_REG_PSP_EN)))
324 
325 #define amdgpu_sriov_reg_indirect_mmhub(adev) \
326 (amdgpu_sriov_vf((adev)) && \
327 	((adev)->virt.reg_access & (AMDGIM_FEATURE_MMHUB_REG_RLC_EN)))
328 
329 #define amdgpu_sriov_reg_indirect_gc(adev) \
330 (amdgpu_sriov_vf((adev)) && \
331 	((adev)->virt.reg_access & (AMDGIM_FEATURE_GC_REG_RLC_EN)))
332 
333 #define amdgpu_sriov_rlcg_error_report_enabled(adev) \
334         (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev))
335 
336 #define amdgpu_passthrough(adev) \
337 ((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE)
338 
339 #define amdgpu_sriov_vf_mmio_access_protection(adev) \
340 ((adev)->virt.caps & AMDGPU_VF_MMIO_ACCESS_PROTECT)
341 
342 #define amdgpu_sriov_ras_caps_en(adev) \
343 ((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CAPS)
344 
345 #define amdgpu_sriov_ras_telemetry_en(adev) \
346 (((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_TELEMETRY) && (adev)->virt.fw_reserve.ras_telemetry)
347 
348 #define amdgpu_sriov_ras_telemetry_block_en(adev, sriov_blk) \
349 (amdgpu_sriov_ras_telemetry_en((adev)) && (adev)->virt.ras_telemetry_en_caps.all & BIT(sriov_blk))
350 
351 #define amdgpu_sriov_ras_cper_en(adev) \
352 ((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CPER)
353 
354 static inline bool is_virtual_machine(void)
355 {
356 #if defined(CONFIG_X86)
357 	return boot_cpu_has(X86_FEATURE_HYPERVISOR);
358 #elif defined(CONFIG_ARM64)
359 	return !is_kernel_in_hyp_mode();
360 #else
361 	return false;
362 #endif
363 }
364 
365 #define amdgpu_sriov_is_pp_one_vf(adev) \
366 	((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
367 #define amdgpu_sriov_multi_vf_mode(adev) \
368 	(amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
369 #define amdgpu_sriov_is_debug(adev) \
370 	((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug)
371 #define amdgpu_sriov_is_normal(adev) \
372 	((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug))
373 #define amdgpu_sriov_is_av1_support(adev) \
374 	((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT)
375 #define amdgpu_sriov_is_vcn_rb_decouple(adev) \
376 	((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE)
377 #define amdgpu_sriov_is_mes_info_enable(adev) \
378 	((adev)->virt.gim_feature & AMDGIM_FEATURE_MES_INFO_ENABLE)
379 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
380 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
381 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
382 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
383 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
384 void amdgpu_virt_request_init_data(struct amdgpu_device *adev);
385 void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev);
386 int amdgpu_virt_wait_reset(struct amdgpu_device *adev);
387 int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
388 void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
389 bool amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device *adev);
390 void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev);
391 void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
392 void amdgpu_virt_exchange_data(struct amdgpu_device *adev);
393 void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
394 void amdgpu_virt_init(struct amdgpu_device *adev);
395 
396 bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
397 int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
398 void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
399 
400 enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev);
401 
402 void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev,
403 			struct amdgpu_video_codec_info *encode, uint32_t encode_array_size,
404 			struct amdgpu_video_codec_info *decode, uint32_t decode_array_size);
405 void amdgpu_sriov_wreg(struct amdgpu_device *adev,
406 		       u32 offset, u32 value,
407 		       u32 acc_flags, u32 hwip, u32 xcc_id);
408 u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
409 		      u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id);
410 bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev,
411 			uint32_t ucode_id);
412 void amdgpu_virt_pre_reset(struct amdgpu_device *adev);
413 void amdgpu_virt_post_reset(struct amdgpu_device *adev);
414 bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev);
415 bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
416 					  u32 acc_flags, u32 hwip,
417 					  bool write, u32 *rlcg_flag);
418 u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id);
419 bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev);
420 int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block,
421 				  struct ras_err_data *err_data);
422 int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update);
423 int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev);
424 bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
425 					enum amdgpu_ras_block block);
426 #endif
427