xref: /linux/drivers/accel/amdxdna/aie2_pci.h (revision d30c1683aaecb93d2ab95685dc4300a33d3cea7a)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
4  */
5 
6 #ifndef _AIE2_PCI_H_
7 #define _AIE2_PCI_H_
8 
9 #include <drm/amdxdna_accel.h>
10 #include <linux/semaphore.h>
11 
12 #include "amdxdna_mailbox.h"
13 
14 #define AIE2_INTERVAL	20000	/* us */
15 #define AIE2_TIMEOUT	1000000	/* us */
16 
17 /* Firmware determines device memory base address and size */
18 #define AIE2_DEVM_BASE	0x4000000
19 #define AIE2_DEVM_SIZE	SZ_64M
20 
21 #define NDEV2PDEV(ndev) (to_pci_dev((ndev)->xdna->ddev.dev))
22 
23 #define AIE2_SRAM_OFF(ndev, addr) ((addr) - (ndev)->priv->sram_dev_addr)
24 #define AIE2_MBOX_OFF(ndev, addr) ((addr) - (ndev)->priv->mbox_dev_addr)
25 
26 #define PSP_REG_BAR(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].bar_idx)
27 #define PSP_REG_OFF(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].offset)
28 #define SRAM_REG_OFF(ndev, idx) ((ndev)->priv->sram_offs[(idx)].offset)
29 
30 #define SMU_REG(ndev, idx) \
31 ({ \
32 	typeof(ndev) _ndev = ndev; \
33 	((_ndev)->smu_base + (_ndev)->priv->smu_regs_off[(idx)].offset); \
34 })
35 #define SRAM_GET_ADDR(ndev, idx) \
36 ({ \
37 	typeof(ndev) _ndev = ndev; \
38 	((_ndev)->sram_base + SRAM_REG_OFF((_ndev), (idx))); \
39 })
40 
41 #define CHAN_SLOT_SZ SZ_8K
42 #define MBOX_SIZE(ndev) \
43 ({ \
44 	typeof(ndev) _ndev = (ndev); \
45 	((_ndev)->priv->mbox_size) ? (_ndev)->priv->mbox_size : \
46 	pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \
47 })
48 
49 enum aie2_smu_reg_idx {
50 	SMU_CMD_REG = 0,
51 	SMU_ARG_REG,
52 	SMU_INTR_REG,
53 	SMU_RESP_REG,
54 	SMU_OUT_REG,
55 	SMU_MAX_REGS /* Keep this at the end */
56 };
57 
58 enum aie2_sram_reg_idx {
59 	MBOX_CHANN_OFF = 0,
60 	FW_ALIVE_OFF,
61 	SRAM_MAX_INDEX /* Keep this at the end */
62 };
63 
64 enum psp_reg_idx {
65 	PSP_CMD_REG = 0,
66 	PSP_ARG0_REG,
67 	PSP_ARG1_REG,
68 	PSP_ARG2_REG,
69 	PSP_NUM_IN_REGS, /* number of input registers */
70 	PSP_INTR_REG = PSP_NUM_IN_REGS,
71 	PSP_STATUS_REG,
72 	PSP_RESP_REG,
73 	PSP_MAX_REGS /* Keep this at the end */
74 };
75 
76 struct amdxdna_client;
77 struct amdxdna_fw_ver;
78 struct amdxdna_hwctx;
79 struct amdxdna_sched_job;
80 
81 struct psp_config {
82 	const void	*fw_buf;
83 	u32		fw_size;
84 	void __iomem	*psp_regs[PSP_MAX_REGS];
85 };
86 
87 struct aie_version {
88 	u16 major;
89 	u16 minor;
90 };
91 
92 struct aie_tile_metadata {
93 	u16 row_count;
94 	u16 row_start;
95 	u16 dma_channel_count;
96 	u16 lock_count;
97 	u16 event_reg_count;
98 };
99 
100 struct aie_metadata {
101 	u32 size;
102 	u16 cols;
103 	u16 rows;
104 	struct aie_version version;
105 	struct aie_tile_metadata core;
106 	struct aie_tile_metadata mem;
107 	struct aie_tile_metadata shim;
108 };
109 
110 enum rt_config_category {
111 	AIE2_RT_CFG_INIT,
112 	AIE2_RT_CFG_CLK_GATING,
113 	AIE2_RT_CFG_FORCE_PREEMPT,
114 	AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT,
115 };
116 
117 struct rt_config {
118 	u32	type;
119 	u32	value;
120 	u32	category;
121 	unsigned long feature_mask;
122 };
123 
124 struct dpm_clk_freq {
125 	u32	npuclk;
126 	u32	hclk;
127 };
128 
129 /*
130  * Define the maximum number of pending commands in a hardware context.
131  * Must be power of 2!
132  */
133 #define HWCTX_MAX_CMDS		4
134 #define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1))
135 struct amdxdna_hwctx_priv {
136 	struct amdxdna_gem_obj		*heap;
137 	void				*mbox_chann;
138 
139 	struct drm_gpu_scheduler	sched;
140 	struct drm_sched_entity		entity;
141 
142 	struct mutex			io_lock; /* protect seq and cmd order */
143 	struct wait_queue_head		job_free_wq;
144 	u32				num_pending;
145 	u64				seq;
146 	struct semaphore		job_sem;
147 	bool				job_done;
148 
149 	/* Completed job counter */
150 	u64				completed;
151 
152 	struct amdxdna_gem_obj		*cmd_buf[HWCTX_MAX_CMDS];
153 	struct drm_syncobj		*syncobj;
154 };
155 
156 enum aie2_dev_status {
157 	AIE2_DEV_UNINIT,
158 	AIE2_DEV_INIT,
159 	AIE2_DEV_START,
160 };
161 
162 struct aie2_exec_msg_ops {
163 	int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
164 			   size_t *size, u32 *msg_op);
165 	int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
166 			    size_t *size, u32 *msg_op);
167 	void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt);
168 	int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
169 	int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
170 	int (*fill_preempt_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
171 	int (*fill_elf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
172 	u32 (*get_chain_msg_op)(u32 cmd_op);
173 };
174 
175 struct amdxdna_dev_hdl {
176 	struct amdxdna_dev		*xdna;
177 	const struct amdxdna_dev_priv	*priv;
178 	void			__iomem *sram_base;
179 	void			__iomem *smu_base;
180 	void			__iomem *mbox_base;
181 	struct psp_device		*psp_hdl;
182 
183 	struct xdna_mailbox_chann_res	mgmt_x2i;
184 	struct xdna_mailbox_chann_res	mgmt_i2x;
185 	u32				mgmt_chan_idx;
186 	u32				mgmt_prot_major;
187 	u32				mgmt_prot_minor;
188 
189 	u32				total_col;
190 	struct aie_version		version;
191 	struct aie_metadata		metadata;
192 	unsigned long			feature_mask;
193 	struct aie2_exec_msg_ops	*exec_msg_ops;
194 
195 	/* power management and clock*/
196 	enum amdxdna_power_mode_type	pw_mode;
197 	u32				dpm_level;
198 	u32				dft_dpm_level;
199 	u32				max_dpm_level;
200 	u32				clk_gating;
201 	u32				npuclk_freq;
202 	u32				hclk_freq;
203 	u32				max_tops;
204 	u32				curr_tops;
205 	u32				force_preempt_enabled;
206 	u32				frame_boundary_preempt;
207 
208 	/* Mailbox and the management channel */
209 	struct mailbox			*mbox;
210 	struct mailbox_channel		*mgmt_chann;
211 	struct async_events		*async_events;
212 
213 	enum aie2_dev_status		dev_status;
214 	u32				hwctx_num;
215 
216 	struct amdxdna_async_error	last_async_err;
217 };
218 
219 #define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
220 	[reg_name] = {bar##_BAR_INDEX, (reg_addr) - bar##_BAR_BASE}
221 
222 struct aie2_bar_off_pair {
223 	int	bar_idx;
224 	u32	offset;
225 };
226 
227 struct aie2_hw_ops {
228 	int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
229 };
230 
231 enum aie2_fw_feature {
232 	AIE2_NPU_COMMAND,
233 	AIE2_PREEMPT,
234 	AIE2_FEATURE_MAX
235 };
236 
237 struct aie2_fw_feature_tbl {
238 	enum aie2_fw_feature feature;
239 	u32 max_minor;
240 	u32 min_minor;
241 };
242 
243 #define AIE2_FEATURE_ON(ndev, feature)	test_bit(feature, &(ndev)->feature_mask)
244 
245 struct amdxdna_dev_priv {
246 	const char			*fw_path;
247 	u64				protocol_major;
248 	u64				protocol_minor;
249 	const struct rt_config		*rt_config;
250 	const struct dpm_clk_freq	*dpm_clk_tbl;
251 	const struct aie2_fw_feature_tbl *fw_feature_tbl;
252 
253 #define COL_ALIGN_NONE   0
254 #define COL_ALIGN_NATURE 1
255 	u32				col_align;
256 	u32				mbox_dev_addr;
257 	/* If mbox_size is 0, use BAR size. See MBOX_SIZE macro */
258 	u32				mbox_size;
259 	u32				hwctx_limit;
260 	u32				sram_dev_addr;
261 	struct aie2_bar_off_pair	sram_offs[SRAM_MAX_INDEX];
262 	struct aie2_bar_off_pair	psp_regs_off[PSP_MAX_REGS];
263 	struct aie2_bar_off_pair	smu_regs_off[SMU_MAX_REGS];
264 	struct aie2_hw_ops		hw_ops;
265 };
266 
267 extern const struct amdxdna_dev_ops aie2_ops;
268 
269 int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
270 		     enum rt_config_category category, u32 *val);
271 
272 /* aie2 npu hw config */
273 extern const struct dpm_clk_freq npu1_dpm_clk_table[];
274 extern const struct dpm_clk_freq npu4_dpm_clk_table[];
275 extern const struct rt_config npu1_default_rt_cfg[];
276 extern const struct rt_config npu4_default_rt_cfg[];
277 extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[];
278 
279 /* aie2_smu.c */
280 int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
281 void aie2_smu_fini(struct amdxdna_dev_hdl *ndev);
282 int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
283 int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
284 
285 /* aie2_pm.c */
286 int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
287 int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target);
288 
289 /* aie2_psp.c */
290 struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf);
291 int aie2_psp_start(struct psp_device *psp);
292 void aie2_psp_stop(struct psp_device *psp);
293 
294 /* aie2_error.c */
295 int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev);
296 void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev);
297 int aie2_error_async_msg_thread(void *data);
298 int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev,
299 			       struct amdxdna_drm_get_array *args);
300 
301 /* aie2_message.c */
302 void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
303 int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
304 int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
305 int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
306 int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value);
307 int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid);
308 int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version);
309 int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
310 int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
311 				struct amdxdna_fw_ver *fw_ver);
312 int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
313 int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
314 int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size);
315 int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled);
316 int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
317 			 char __user *buf, u32 size,
318 			 struct amdxdna_drm_query_telemetry_header *header);
319 int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
320 				 void *handle, int (*cb)(void*, void __iomem *, size_t));
321 int aie2_config_cu(struct amdxdna_hwctx *hwctx,
322 		   int (*notify_cb)(void *, void __iomem *, size_t));
323 int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
324 		 int (*notify_cb)(void *, void __iomem *, size_t));
325 int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
326 				struct amdxdna_sched_job *job,
327 				int (*notify_cb)(void *, void __iomem *, size_t));
328 int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
329 			       struct amdxdna_sched_job *job,
330 			       int (*notify_cb)(void *, void __iomem *, size_t));
331 int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
332 		 int (*notify_cb)(void *, void __iomem *, size_t));
333 int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
334 			 int (*notify_cb)(void *, void __iomem *, size_t));
335 
336 /* aie2_hwctx.c */
337 int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
338 void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx);
339 int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
340 int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl);
341 void aie2_hwctx_suspend(struct amdxdna_client *client);
342 int aie2_hwctx_resume(struct amdxdna_client *client);
343 int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
344 void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
345 
346 #endif /* _AIE2_PCI_H_ */
347