xref: /linux/drivers/accel/amdxdna/aie2_pci.h (revision 9e4e86a604dfd06402933467578c4b79f5412b2c)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
4  */
5 
6 #ifndef _AIE2_PCI_H_
7 #define _AIE2_PCI_H_
8 
9 #include <drm/amdxdna_accel.h>
10 #include <linux/limits.h>
11 #include <linux/semaphore.h>
12 
13 #include "aie2_msg_priv.h"
14 #include "amdxdna_mailbox.h"
15 
16 #define AIE2_INTERVAL	20000	/* us */
17 #define AIE2_TIMEOUT	1000000	/* us */
18 
19 /* Firmware determines device memory base address and size */
20 #define AIE2_DEVM_BASE	0x4000000
21 #define AIE2_DEVM_SIZE	SZ_64M
22 
23 #define NDEV2PDEV(ndev) (to_pci_dev((ndev)->xdna->ddev.dev))
24 
25 #define AIE2_SRAM_OFF(ndev, addr) ((addr) - (ndev)->priv->sram_dev_addr)
26 #define AIE2_MBOX_OFF(ndev, addr) ((addr) - (ndev)->priv->mbox_dev_addr)
27 
28 #define PSP_REG_BAR(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].bar_idx)
29 #define PSP_REG_OFF(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].offset)
30 #define SRAM_REG_OFF(ndev, idx) ((ndev)->priv->sram_offs[(idx)].offset)
31 
32 #define SMU_REG(ndev, idx) \
33 ({ \
34 	typeof(ndev) _ndev = ndev; \
35 	((_ndev)->smu_base + (_ndev)->priv->smu_regs_off[(idx)].offset); \
36 })
37 #define SRAM_GET_ADDR(ndev, idx) \
38 ({ \
39 	typeof(ndev) _ndev = ndev; \
40 	((_ndev)->sram_base + SRAM_REG_OFF((_ndev), (idx))); \
41 })
42 
43 #define CHAN_SLOT_SZ SZ_8K
44 #define MBOX_SIZE(ndev) \
45 ({ \
46 	typeof(ndev) _ndev = (ndev); \
47 	((_ndev)->priv->mbox_size) ? (_ndev)->priv->mbox_size : \
48 	pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \
49 })
50 
51 #if IS_ENABLED(CONFIG_AMD_PMF)
52 #define AIE2_GET_PMF_NPU_METRICS(metrics) amd_pmf_get_npu_data(metrics)
53 #define AIE2_GET_PMF_NPU_DATA(field, val)				\
54 ({									\
55 	struct amd_pmf_npu_metrics _npu_metrics;			\
56 	int _ret;							\
57 									\
58 	_ret = amd_pmf_get_npu_data(&_npu_metrics);			\
59 	val = _ret ? U32_MAX : _npu_metrics.field;			\
60 	(_ret);								\
61 })
62 #else
63 #define AIE2_GET_PMF_NPU_METRICS(metrics)				\
64 ({									\
65 	typeof(metrics) _m = metrics;					\
66 	memset(_m, 0xff, sizeof(*_m));					\
67 	(-EOPNOTSUPP);							\
68 })
69 
70 #define SENSOR_DEFAULT_npu_power	U32_MAX
71 #define AIE2_GET_PMF_NPU_DATA(field, val)				\
72 ({									\
73 	val = SENSOR_DEFAULT_##field;					\
74 	(-EOPNOTSUPP);							\
75 })
76 #endif
77 
78 enum aie2_smu_reg_idx {
79 	SMU_CMD_REG = 0,
80 	SMU_ARG_REG,
81 	SMU_INTR_REG,
82 	SMU_RESP_REG,
83 	SMU_OUT_REG,
84 	SMU_MAX_REGS /* Keep this at the end */
85 };
86 
87 enum aie2_sram_reg_idx {
88 	MBOX_CHANN_OFF = 0,
89 	FW_ALIVE_OFF,
90 	SRAM_MAX_INDEX /* Keep this at the end */
91 };
92 
93 enum psp_reg_idx {
94 	PSP_CMD_REG = 0,
95 	PSP_ARG0_REG,
96 	PSP_ARG1_REG,
97 	PSP_ARG2_REG,
98 	PSP_NUM_IN_REGS, /* number of input registers */
99 	PSP_INTR_REG = PSP_NUM_IN_REGS,
100 	PSP_STATUS_REG,
101 	PSP_RESP_REG,
102 	PSP_PWAITMODE_REG,
103 	PSP_MAX_REGS /* Keep this at the end */
104 };
105 
106 struct amdxdna_client;
107 struct amdxdna_fw_ver;
108 struct amdxdna_hwctx;
109 struct amdxdna_sched_job;
110 
111 struct psp_config {
112 	const void	*fw_buf;
113 	u32		fw_size;
114 	void __iomem	*psp_regs[PSP_MAX_REGS];
115 };
116 
117 struct aie_version {
118 	u16 major;
119 	u16 minor;
120 };
121 
122 struct aie_tile_metadata {
123 	u16 row_count;
124 	u16 row_start;
125 	u16 dma_channel_count;
126 	u16 lock_count;
127 	u16 event_reg_count;
128 };
129 
130 struct aie_metadata {
131 	u32 size;
132 	u16 cols;
133 	u16 rows;
134 	struct aie_version version;
135 	struct aie_tile_metadata core;
136 	struct aie_tile_metadata mem;
137 	struct aie_tile_metadata shim;
138 };
139 
140 enum rt_config_category {
141 	AIE2_RT_CFG_INIT,
142 	AIE2_RT_CFG_CLK_GATING,
143 	AIE2_RT_CFG_FORCE_PREEMPT,
144 	AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT,
145 };
146 
147 struct rt_config {
148 	u32	type;
149 	u32	value;
150 	u32	category;
151 	unsigned long feature_mask;
152 };
153 
154 struct dpm_clk_freq {
155 	u32	npuclk;
156 	u32	hclk;
157 };
158 
159 /*
160  * Define the maximum number of pending commands in a hardware context.
161  * Must be power of 2!
162  */
163 #define HWCTX_MAX_CMDS		4
164 #define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1))
165 struct amdxdna_hwctx_priv {
166 	struct amdxdna_gem_obj		*heap;
167 	void				*mbox_chann;
168 
169 	struct drm_gpu_scheduler	sched;
170 	struct drm_sched_entity		entity;
171 
172 	struct mutex			io_lock; /* protect seq and cmd order */
173 	struct wait_queue_head		job_free_wq;
174 	u32				num_pending;
175 	u64				seq;
176 	struct semaphore		job_sem;
177 	bool				job_done;
178 
179 	/* Completed job counter */
180 	u64				completed;
181 
182 	struct amdxdna_gem_obj		*cmd_buf[HWCTX_MAX_CMDS];
183 	struct drm_syncobj		*syncobj;
184 };
185 
186 enum aie2_dev_status {
187 	AIE2_DEV_UNINIT,
188 	AIE2_DEV_INIT,
189 	AIE2_DEV_START,
190 };
191 
192 struct aie2_exec_msg_ops {
193 	int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
194 			   size_t *size, u32 *msg_op);
195 	int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
196 			    size_t *size, u32 *msg_op);
197 	void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt);
198 	int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
199 	int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
200 	int (*fill_preempt_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
201 	int (*fill_elf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
202 	u32 (*get_chain_msg_op)(u32 cmd_op);
203 };
204 
205 struct amdxdna_dev_hdl {
206 	struct amdxdna_dev		*xdna;
207 	const struct amdxdna_dev_priv	*priv;
208 	void			__iomem *sram_base;
209 	void			__iomem *smu_base;
210 	void			__iomem *mbox_base;
211 	struct psp_device		*psp_hdl;
212 
213 	struct xdna_mailbox_chann_res	mgmt_x2i;
214 	struct xdna_mailbox_chann_res	mgmt_i2x;
215 	u32				mgmt_chan_idx;
216 	u32				mgmt_prot_major;
217 	u32				mgmt_prot_minor;
218 
219 	u32				total_col;
220 	struct aie_version		version;
221 	struct aie_metadata		metadata;
222 	unsigned long			feature_mask;
223 	struct aie2_exec_msg_ops	*exec_msg_ops;
224 
225 	/* power management and clock*/
226 	enum amdxdna_power_mode_type	pw_mode;
227 	u32				dpm_level;
228 	u32				dft_dpm_level;
229 	u32				max_dpm_level;
230 	u32				clk_gating;
231 	u32				npuclk_freq;
232 	u32				hclk_freq;
233 	u32				max_tops;
234 	u32				curr_tops;
235 	u32				force_preempt_enabled;
236 	u32				frame_boundary_preempt;
237 
238 	/* Mailbox and the management channel */
239 	struct mailbox			*mbox;
240 	struct mailbox_channel		*mgmt_chann;
241 	struct async_events		*async_events;
242 
243 	enum aie2_dev_status		dev_status;
244 	u32				hwctx_num;
245 
246 	struct amdxdna_async_error	last_async_err;
247 };
248 
249 #define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
250 	[reg_name] = {bar##_BAR_INDEX, (reg_addr) - bar##_BAR_BASE}
251 
252 struct aie2_bar_off_pair {
253 	int	bar_idx;
254 	u32	offset;
255 };
256 
257 struct aie2_hw_ops {
258 	int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
259 };
260 
261 enum aie2_fw_feature {
262 	AIE2_NPU_COMMAND,
263 	AIE2_PREEMPT,
264 	AIE2_TEMPORAL_ONLY,
265 	AIE2_APP_HEALTH,
266 	AIE2_FEATURE_MAX
267 };
268 
269 struct aie2_fw_feature_tbl {
270 	u64 features;
271 	u32 major;
272 	u32 max_minor;
273 	u32 min_minor;
274 };
275 
276 #define AIE2_ALL_FEATURES	GENMASK_ULL(AIE2_FEATURE_MAX - 1, AIE2_NPU_COMMAND)
277 #define AIE2_FEATURE_ON(ndev, feature)	test_bit(feature, &(ndev)->feature_mask)
278 
279 struct amdxdna_dev_priv {
280 	const char			*fw_path;
281 	const struct rt_config		*rt_config;
282 	const struct dpm_clk_freq	*dpm_clk_tbl;
283 	const struct aie2_fw_feature_tbl *fw_feature_tbl;
284 
285 #define COL_ALIGN_NONE   0
286 #define COL_ALIGN_NATURE 1
287 	u32				col_align;
288 	u32				mbox_dev_addr;
289 	/* If mbox_size is 0, use BAR size. See MBOX_SIZE macro */
290 	u32				mbox_size;
291 	u32				hwctx_limit;
292 	u32				sram_dev_addr;
293 	struct aie2_bar_off_pair	sram_offs[SRAM_MAX_INDEX];
294 	struct aie2_bar_off_pair	psp_regs_off[PSP_MAX_REGS];
295 	struct aie2_bar_off_pair	smu_regs_off[SMU_MAX_REGS];
296 	struct aie2_hw_ops		hw_ops;
297 };
298 
299 extern const struct amdxdna_dev_ops aie2_ops;
300 
301 int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
302 		     enum rt_config_category category, u32 *val);
303 
304 /* aie2 npu hw config */
305 extern const struct dpm_clk_freq npu1_dpm_clk_table[];
306 extern const struct dpm_clk_freq npu4_dpm_clk_table[];
307 extern const struct rt_config npu1_default_rt_cfg[];
308 extern const struct rt_config npu4_default_rt_cfg[];
309 extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[];
310 
311 /* aie2_smu.c */
312 int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
313 void aie2_smu_fini(struct amdxdna_dev_hdl *ndev);
314 int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
315 int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
316 
317 /* aie2_pm.c */
318 int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
319 int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target);
320 int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
321 
322 /* aie2_psp.c */
323 struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf);
324 int aie2_psp_start(struct psp_device *psp);
325 void aie2_psp_stop(struct psp_device *psp);
326 int aie2_psp_waitmode_poll(struct psp_device *psp);
327 
328 /* aie2_error.c */
329 int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev);
330 void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev);
331 int aie2_error_async_msg_thread(void *data);
332 int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev,
333 			       struct amdxdna_drm_get_array *args);
334 
335 /* aie2_message.c */
336 void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
337 void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev);
338 int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
339 int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
340 int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
341 int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value);
342 int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid);
343 int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version);
344 int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
345 int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
346 				struct amdxdna_fw_ver *fw_ver);
347 int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, u32 context_id,
348 			  struct app_health_report *report);
349 int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
350 int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
351 int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size);
352 int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled);
353 int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
354 			 char __user *buf, u32 size,
355 			 struct amdxdna_drm_query_telemetry_header *header);
356 int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
357 				 void *handle, int (*cb)(void*, void __iomem *, size_t));
358 int aie2_config_cu(struct amdxdna_hwctx *hwctx,
359 		   int (*notify_cb)(void *, void __iomem *, size_t));
360 int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
361 		 int (*notify_cb)(void *, void __iomem *, size_t));
362 int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
363 				struct amdxdna_sched_job *job,
364 				int (*notify_cb)(void *, void __iomem *, size_t));
365 int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
366 			       struct amdxdna_sched_job *job,
367 			       int (*notify_cb)(void *, void __iomem *, size_t));
368 int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
369 		 int (*notify_cb)(void *, void __iomem *, size_t));
370 int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
371 			 int (*notify_cb)(void *, void __iomem *, size_t));
372 void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
373 			    dma_addr_t *dma_addr);
374 void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size,
375 			  void *cpu_addr, dma_addr_t dma_addr);
376 
377 /* aie2_hwctx.c */
378 int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
379 void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx);
380 int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
381 int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl);
382 void aie2_hwctx_suspend(struct amdxdna_client *client);
383 int aie2_hwctx_resume(struct amdxdna_client *client);
384 int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
385 void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
386 
387 #endif /* _AIE2_PCI_H_ */
388