1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. 4 */ 5 6 #ifndef _AIE2_PCI_H_ 7 #define _AIE2_PCI_H_ 8 9 #include <drm/amdxdna_accel.h> 10 #include <linux/semaphore.h> 11 12 #include "amdxdna_mailbox.h" 13 14 #define AIE2_INTERVAL 20000 /* us */ 15 #define AIE2_TIMEOUT 1000000 /* us */ 16 17 /* Firmware determines device memory base address and size */ 18 #define AIE2_DEVM_BASE 0x4000000 19 #define AIE2_DEVM_SIZE SZ_64M 20 21 #define NDEV2PDEV(ndev) (to_pci_dev((ndev)->xdna->ddev.dev)) 22 23 #define AIE2_SRAM_OFF(ndev, addr) ((addr) - (ndev)->priv->sram_dev_addr) 24 #define AIE2_MBOX_OFF(ndev, addr) ((addr) - (ndev)->priv->mbox_dev_addr) 25 26 #define PSP_REG_BAR(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].bar_idx) 27 #define PSP_REG_OFF(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].offset) 28 #define SRAM_REG_OFF(ndev, idx) ((ndev)->priv->sram_offs[(idx)].offset) 29 30 #define SMU_REG(ndev, idx) \ 31 ({ \ 32 typeof(ndev) _ndev = ndev; \ 33 ((_ndev)->smu_base + (_ndev)->priv->smu_regs_off[(idx)].offset); \ 34 }) 35 #define SRAM_GET_ADDR(ndev, idx) \ 36 ({ \ 37 typeof(ndev) _ndev = ndev; \ 38 ((_ndev)->sram_base + SRAM_REG_OFF((_ndev), (idx))); \ 39 }) 40 41 #define CHAN_SLOT_SZ SZ_8K 42 #define MBOX_SIZE(ndev) \ 43 ({ \ 44 typeof(ndev) _ndev = (ndev); \ 45 ((_ndev)->priv->mbox_size) ? (_ndev)->priv->mbox_size : \ 46 pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \ 47 }) 48 49 enum aie2_smu_reg_idx { 50 SMU_CMD_REG = 0, 51 SMU_ARG_REG, 52 SMU_INTR_REG, 53 SMU_RESP_REG, 54 SMU_OUT_REG, 55 SMU_MAX_REGS /* Keep this at the end */ 56 }; 57 58 enum aie2_sram_reg_idx { 59 MBOX_CHANN_OFF = 0, 60 FW_ALIVE_OFF, 61 SRAM_MAX_INDEX /* Keep this at the end */ 62 }; 63 64 enum psp_reg_idx { 65 PSP_CMD_REG = 0, 66 PSP_ARG0_REG, 67 PSP_ARG1_REG, 68 PSP_ARG2_REG, 69 PSP_NUM_IN_REGS, /* number of input registers */ 70 PSP_INTR_REG = PSP_NUM_IN_REGS, 71 PSP_STATUS_REG, 72 PSP_RESP_REG, 73 PSP_PWAITMODE_REG, 74 PSP_MAX_REGS /* Keep this at the end */ 75 }; 76 77 struct amdxdna_client; 78 struct amdxdna_fw_ver; 79 struct amdxdna_hwctx; 80 struct amdxdna_sched_job; 81 82 struct psp_config { 83 const void *fw_buf; 84 u32 fw_size; 85 void __iomem *psp_regs[PSP_MAX_REGS]; 86 }; 87 88 struct aie_version { 89 u16 major; 90 u16 minor; 91 }; 92 93 struct aie_tile_metadata { 94 u16 row_count; 95 u16 row_start; 96 u16 dma_channel_count; 97 u16 lock_count; 98 u16 event_reg_count; 99 }; 100 101 struct aie_metadata { 102 u32 size; 103 u16 cols; 104 u16 rows; 105 struct aie_version version; 106 struct aie_tile_metadata core; 107 struct aie_tile_metadata mem; 108 struct aie_tile_metadata shim; 109 }; 110 111 enum rt_config_category { 112 AIE2_RT_CFG_INIT, 113 AIE2_RT_CFG_CLK_GATING, 114 AIE2_RT_CFG_FORCE_PREEMPT, 115 AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT, 116 }; 117 118 struct rt_config { 119 u32 type; 120 u32 value; 121 u32 category; 122 unsigned long feature_mask; 123 }; 124 125 struct dpm_clk_freq { 126 u32 npuclk; 127 u32 hclk; 128 }; 129 130 /* 131 * Define the maximum number of pending commands in a hardware context. 132 * Must be power of 2! 133 */ 134 #define HWCTX_MAX_CMDS 4 135 #define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1)) 136 struct amdxdna_hwctx_priv { 137 struct amdxdna_gem_obj *heap; 138 void *mbox_chann; 139 140 struct drm_gpu_scheduler sched; 141 struct drm_sched_entity entity; 142 143 struct mutex io_lock; /* protect seq and cmd order */ 144 struct wait_queue_head job_free_wq; 145 u32 num_pending; 146 u64 seq; 147 struct semaphore job_sem; 148 bool job_done; 149 150 /* Completed job counter */ 151 u64 completed; 152 153 struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS]; 154 struct drm_syncobj *syncobj; 155 }; 156 157 enum aie2_dev_status { 158 AIE2_DEV_UNINIT, 159 AIE2_DEV_INIT, 160 AIE2_DEV_START, 161 }; 162 163 struct aie2_exec_msg_ops { 164 int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req, 165 size_t *size, u32 *msg_op); 166 int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req, 167 size_t *size, u32 *msg_op); 168 void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt); 169 int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); 170 int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); 171 int (*fill_preempt_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); 172 int (*fill_elf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); 173 u32 (*get_chain_msg_op)(u32 cmd_op); 174 }; 175 176 struct amdxdna_dev_hdl { 177 struct amdxdna_dev *xdna; 178 const struct amdxdna_dev_priv *priv; 179 void __iomem *sram_base; 180 void __iomem *smu_base; 181 void __iomem *mbox_base; 182 struct psp_device *psp_hdl; 183 184 struct xdna_mailbox_chann_res mgmt_x2i; 185 struct xdna_mailbox_chann_res mgmt_i2x; 186 u32 mgmt_chan_idx; 187 u32 mgmt_prot_major; 188 u32 mgmt_prot_minor; 189 190 u32 total_col; 191 struct aie_version version; 192 struct aie_metadata metadata; 193 unsigned long feature_mask; 194 struct aie2_exec_msg_ops *exec_msg_ops; 195 196 /* power management and clock*/ 197 enum amdxdna_power_mode_type pw_mode; 198 u32 dpm_level; 199 u32 dft_dpm_level; 200 u32 max_dpm_level; 201 u32 clk_gating; 202 u32 npuclk_freq; 203 u32 hclk_freq; 204 u32 max_tops; 205 u32 curr_tops; 206 u32 force_preempt_enabled; 207 u32 frame_boundary_preempt; 208 209 /* Mailbox and the management channel */ 210 struct mailbox *mbox; 211 struct mailbox_channel *mgmt_chann; 212 struct async_events *async_events; 213 214 enum aie2_dev_status dev_status; 215 u32 hwctx_num; 216 217 struct amdxdna_async_error last_async_err; 218 }; 219 220 #define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \ 221 [reg_name] = {bar##_BAR_INDEX, (reg_addr) - bar##_BAR_BASE} 222 223 struct aie2_bar_off_pair { 224 int bar_idx; 225 u32 offset; 226 }; 227 228 struct aie2_hw_ops { 229 int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level); 230 }; 231 232 enum aie2_fw_feature { 233 AIE2_NPU_COMMAND, 234 AIE2_PREEMPT, 235 AIE2_TEMPORAL_ONLY, 236 AIE2_FEATURE_MAX 237 }; 238 239 struct aie2_fw_feature_tbl { 240 u64 features; 241 u32 major; 242 u32 max_minor; 243 u32 min_minor; 244 }; 245 246 #define AIE2_FEATURE_ON(ndev, feature) test_bit(feature, &(ndev)->feature_mask) 247 248 struct amdxdna_dev_priv { 249 const char *fw_path; 250 const struct rt_config *rt_config; 251 const struct dpm_clk_freq *dpm_clk_tbl; 252 const struct aie2_fw_feature_tbl *fw_feature_tbl; 253 254 #define COL_ALIGN_NONE 0 255 #define COL_ALIGN_NATURE 1 256 u32 col_align; 257 u32 mbox_dev_addr; 258 /* If mbox_size is 0, use BAR size. See MBOX_SIZE macro */ 259 u32 mbox_size; 260 u32 hwctx_limit; 261 u32 sram_dev_addr; 262 struct aie2_bar_off_pair sram_offs[SRAM_MAX_INDEX]; 263 struct aie2_bar_off_pair psp_regs_off[PSP_MAX_REGS]; 264 struct aie2_bar_off_pair smu_regs_off[SMU_MAX_REGS]; 265 struct aie2_hw_ops hw_ops; 266 }; 267 268 extern const struct amdxdna_dev_ops aie2_ops; 269 270 int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev, 271 enum rt_config_category category, u32 *val); 272 273 /* aie2 npu hw config */ 274 extern const struct dpm_clk_freq npu1_dpm_clk_table[]; 275 extern const struct dpm_clk_freq npu4_dpm_clk_table[]; 276 extern const struct rt_config npu1_default_rt_cfg[]; 277 extern const struct rt_config npu4_default_rt_cfg[]; 278 extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[]; 279 280 /* aie2_smu.c */ 281 int aie2_smu_init(struct amdxdna_dev_hdl *ndev); 282 void aie2_smu_fini(struct amdxdna_dev_hdl *ndev); 283 int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); 284 int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); 285 286 /* aie2_pm.c */ 287 int aie2_pm_init(struct amdxdna_dev_hdl *ndev); 288 int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target); 289 int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); 290 291 /* aie2_psp.c */ 292 struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf); 293 int aie2_psp_start(struct psp_device *psp); 294 void aie2_psp_stop(struct psp_device *psp); 295 int aie2_psp_waitmode_poll(struct psp_device *psp); 296 297 /* aie2_error.c */ 298 int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev); 299 void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev); 300 int aie2_error_async_msg_thread(void *data); 301 int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev, 302 struct amdxdna_drm_get_array *args); 303 304 /* aie2_message.c */ 305 void aie2_msg_init(struct amdxdna_dev_hdl *ndev); 306 void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev); 307 int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev); 308 int aie2_resume_fw(struct amdxdna_dev_hdl *ndev); 309 int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value); 310 int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value); 311 int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid); 312 int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version); 313 int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata); 314 int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev, 315 struct amdxdna_fw_ver *fw_ver); 316 int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); 317 int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); 318 int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size); 319 int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled); 320 int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev, 321 char __user *buf, u32 size, 322 struct amdxdna_drm_query_telemetry_header *header); 323 int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size, 324 void *handle, int (*cb)(void*, void __iomem *, size_t)); 325 int aie2_config_cu(struct amdxdna_hwctx *hwctx, 326 int (*notify_cb)(void *, void __iomem *, size_t)); 327 int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, 328 int (*notify_cb)(void *, void __iomem *, size_t)); 329 int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, 330 struct amdxdna_sched_job *job, 331 int (*notify_cb)(void *, void __iomem *, size_t)); 332 int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, 333 struct amdxdna_sched_job *job, 334 int (*notify_cb)(void *, void __iomem *, size_t)); 335 int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, 336 int (*notify_cb)(void *, void __iomem *, size_t)); 337 int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, 338 int (*notify_cb)(void *, void __iomem *, size_t)); 339 void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size, 340 dma_addr_t *dma_addr); 341 #define aie2_free_msg_buffer(ndev, size, buff_addr, dma_addr) \ 342 dma_free_noncoherent((ndev)->xdna->ddev.dev, size, buff_addr, \ 343 dma_addr, DMA_FROM_DEVICE) 344 345 /* aie2_hwctx.c */ 346 int aie2_hwctx_init(struct amdxdna_hwctx *hwctx); 347 void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx); 348 int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); 349 int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl); 350 void aie2_hwctx_suspend(struct amdxdna_client *client); 351 int aie2_hwctx_resume(struct amdxdna_client *client); 352 int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq); 353 void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq); 354 355 #endif /* _AIE2_PCI_H_ */ 356