1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. 4 */ 5 6 #ifndef _AIE2_PCI_H_ 7 #define _AIE2_PCI_H_ 8 9 #include <drm/amdxdna_accel.h> 10 #include <linux/limits.h> 11 #include <linux/semaphore.h> 12 13 #include "aie2_msg_priv.h" 14 #include "amdxdna_mailbox.h" 15 16 #define AIE2_INTERVAL 20000 /* us */ 17 #define AIE2_TIMEOUT 1000000 /* us */ 18 19 /* Firmware determines device memory base address and size */ 20 #define AIE2_DEVM_BASE 0x4000000 21 #define AIE2_DEVM_SIZE SZ_64M 22 23 #define NDEV2PDEV(ndev) (to_pci_dev((ndev)->xdna->ddev.dev)) 24 25 #define AIE2_SRAM_OFF(ndev, addr) ((addr) - (ndev)->priv->sram_dev_addr) 26 #define AIE2_MBOX_OFF(ndev, addr) ((addr) - (ndev)->priv->mbox_dev_addr) 27 28 #define PSP_REG_BAR(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].bar_idx) 29 #define PSP_REG_OFF(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].offset) 30 #define SRAM_REG_OFF(ndev, idx) ((ndev)->priv->sram_offs[(idx)].offset) 31 32 #define SMU_REG(ndev, idx) \ 33 ({ \ 34 typeof(ndev) _ndev = ndev; \ 35 ((_ndev)->smu_base + (_ndev)->priv->smu_regs_off[(idx)].offset); \ 36 }) 37 #define SRAM_GET_ADDR(ndev, idx) \ 38 ({ \ 39 typeof(ndev) _ndev = ndev; \ 40 ((_ndev)->sram_base + SRAM_REG_OFF((_ndev), (idx))); \ 41 }) 42 43 #define CHAN_SLOT_SZ SZ_8K 44 #define MBOX_SIZE(ndev) \ 45 ({ \ 46 typeof(ndev) _ndev = (ndev); \ 47 ((_ndev)->priv->mbox_size) ? (_ndev)->priv->mbox_size : \ 48 pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \ 49 }) 50 51 #if IS_ENABLED(CONFIG_AMD_PMF) 52 #define AIE2_GET_PMF_NPU_METRICS(metrics) amd_pmf_get_npu_data(metrics) 53 #define AIE2_GET_PMF_NPU_DATA(field, val) \ 54 ({ \ 55 struct amd_pmf_npu_metrics _npu_metrics; \ 56 int _ret; \ 57 \ 58 _ret = amd_pmf_get_npu_data(&_npu_metrics); \ 59 val = _ret ? U32_MAX : _npu_metrics.field; \ 60 (_ret); \ 61 }) 62 #else 63 #define AIE2_GET_PMF_NPU_METRICS(metrics) \ 64 ({ \ 65 typeof(metrics) _m = metrics; \ 66 memset(_m, 0xff, sizeof(*_m)); \ 67 (-EOPNOTSUPP); \ 68 }) 69 70 #define SENSOR_DEFAULT_npu_power U32_MAX 71 #define AIE2_GET_PMF_NPU_DATA(field, val) \ 72 ({ \ 73 val = SENSOR_DEFAULT_##field; \ 74 (-EOPNOTSUPP); \ 75 }) 76 #endif 77 78 enum aie2_smu_reg_idx { 79 SMU_CMD_REG = 0, 80 SMU_ARG_REG, 81 SMU_INTR_REG, 82 SMU_RESP_REG, 83 SMU_OUT_REG, 84 SMU_MAX_REGS /* Keep this at the end */ 85 }; 86 87 enum aie2_sram_reg_idx { 88 MBOX_CHANN_OFF = 0, 89 FW_ALIVE_OFF, 90 SRAM_MAX_INDEX /* Keep this at the end */ 91 }; 92 93 enum psp_reg_idx { 94 PSP_CMD_REG = 0, 95 PSP_ARG0_REG, 96 PSP_ARG1_REG, 97 PSP_ARG2_REG, 98 PSP_NUM_IN_REGS, /* number of input registers */ 99 PSP_INTR_REG = PSP_NUM_IN_REGS, 100 PSP_STATUS_REG, 101 PSP_RESP_REG, 102 PSP_PWAITMODE_REG, 103 PSP_MAX_REGS /* Keep this at the end */ 104 }; 105 106 struct amdxdna_client; 107 struct amdxdna_fw_ver; 108 struct amdxdna_hwctx; 109 struct amdxdna_sched_job; 110 111 struct psp_config { 112 const void *fw_buf; 113 u32 fw_size; 114 void __iomem *psp_regs[PSP_MAX_REGS]; 115 }; 116 117 struct aie_version { 118 u16 major; 119 u16 minor; 120 }; 121 122 struct aie_tile_metadata { 123 u16 row_count; 124 u16 row_start; 125 u16 dma_channel_count; 126 u16 lock_count; 127 u16 event_reg_count; 128 }; 129 130 struct aie_metadata { 131 u32 size; 132 u16 cols; 133 u16 rows; 134 struct aie_version version; 135 struct aie_tile_metadata core; 136 struct aie_tile_metadata mem; 137 struct aie_tile_metadata shim; 138 }; 139 140 enum rt_config_category { 141 AIE2_RT_CFG_INIT, 142 AIE2_RT_CFG_CLK_GATING, 143 AIE2_RT_CFG_FORCE_PREEMPT, 144 AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT, 145 }; 146 147 struct rt_config { 148 u32 type; 149 u32 value; 150 u32 category; 151 unsigned long feature_mask; 152 }; 153 154 struct dpm_clk_freq { 155 u32 npuclk; 156 u32 hclk; 157 }; 158 159 /* 160 * Define the maximum number of pending commands in a hardware context. 161 * Must be power of 2! 162 */ 163 #define HWCTX_MAX_CMDS 4 164 #define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1)) 165 struct amdxdna_hwctx_priv { 166 struct amdxdna_gem_obj *heap; 167 void *mbox_chann; 168 169 struct drm_gpu_scheduler sched; 170 struct drm_sched_entity entity; 171 172 struct mutex io_lock; /* protect seq and cmd order */ 173 struct wait_queue_head job_free_wq; 174 u32 num_pending; 175 u64 seq; 176 struct semaphore job_sem; 177 bool job_done; 178 179 /* Completed job counter */ 180 u64 completed; 181 182 struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS]; 183 struct drm_syncobj *syncobj; 184 }; 185 186 enum aie2_dev_status { 187 AIE2_DEV_UNINIT, 188 AIE2_DEV_INIT, 189 AIE2_DEV_START, 190 }; 191 192 struct aie2_exec_msg_ops { 193 int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req, 194 size_t *size, u32 *msg_op); 195 int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req, 196 size_t *size, u32 *msg_op); 197 void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt); 198 int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); 199 int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); 200 int (*fill_preempt_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); 201 int (*fill_elf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size); 202 u32 (*get_chain_msg_op)(u32 cmd_op); 203 }; 204 205 struct amdxdna_dev_hdl { 206 struct amdxdna_dev *xdna; 207 const struct amdxdna_dev_priv *priv; 208 void __iomem *sram_base; 209 void __iomem *smu_base; 210 void __iomem *mbox_base; 211 struct psp_device *psp_hdl; 212 213 struct xdna_mailbox_chann_res mgmt_x2i; 214 struct xdna_mailbox_chann_res mgmt_i2x; 215 u32 mgmt_chan_idx; 216 u32 mgmt_prot_major; 217 u32 mgmt_prot_minor; 218 219 u32 total_col; 220 struct aie_version version; 221 struct aie_metadata metadata; 222 unsigned long feature_mask; 223 struct aie2_exec_msg_ops *exec_msg_ops; 224 225 /* power management and clock*/ 226 enum amdxdna_power_mode_type pw_mode; 227 u32 dpm_level; 228 u32 dft_dpm_level; 229 u32 max_dpm_level; 230 u32 clk_gating; 231 u32 npuclk_freq; 232 u32 hclk_freq; 233 u32 max_tops; 234 u32 curr_tops; 235 u32 force_preempt_enabled; 236 u32 frame_boundary_preempt; 237 238 /* Mailbox and the management channel */ 239 struct mailbox *mbox; 240 struct mailbox_channel *mgmt_chann; 241 struct async_events *async_events; 242 243 enum aie2_dev_status dev_status; 244 u32 hwctx_num; 245 246 struct amdxdna_async_error last_async_err; 247 }; 248 249 #define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \ 250 [reg_name] = {bar##_BAR_INDEX, (reg_addr) - bar##_BAR_BASE} 251 252 struct aie2_bar_off_pair { 253 int bar_idx; 254 u32 offset; 255 }; 256 257 struct aie2_hw_ops { 258 int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level); 259 }; 260 261 enum aie2_fw_feature { 262 AIE2_NPU_COMMAND, 263 AIE2_PREEMPT, 264 AIE2_TEMPORAL_ONLY, 265 AIE2_APP_HEALTH, 266 AIE2_FEATURE_MAX 267 }; 268 269 struct aie2_fw_feature_tbl { 270 u64 features; 271 u32 major; 272 u32 max_minor; 273 u32 min_minor; 274 }; 275 276 #define AIE2_ALL_FEATURES GENMASK_ULL(AIE2_FEATURE_MAX - 1, AIE2_NPU_COMMAND) 277 #define AIE2_FEATURE_ON(ndev, feature) test_bit(feature, &(ndev)->feature_mask) 278 279 struct amdxdna_dev_priv { 280 const char *fw_path; 281 const struct rt_config *rt_config; 282 const struct dpm_clk_freq *dpm_clk_tbl; 283 const struct aie2_fw_feature_tbl *fw_feature_tbl; 284 285 #define COL_ALIGN_NONE 0 286 #define COL_ALIGN_NATURE 1 287 u32 col_align; 288 u32 mbox_dev_addr; 289 /* If mbox_size is 0, use BAR size. See MBOX_SIZE macro */ 290 u32 mbox_size; 291 u32 hwctx_limit; 292 u32 sram_dev_addr; 293 struct aie2_bar_off_pair sram_offs[SRAM_MAX_INDEX]; 294 struct aie2_bar_off_pair psp_regs_off[PSP_MAX_REGS]; 295 struct aie2_bar_off_pair smu_regs_off[SMU_MAX_REGS]; 296 struct aie2_hw_ops hw_ops; 297 }; 298 299 extern const struct amdxdna_dev_ops aie2_ops; 300 301 int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev, 302 enum rt_config_category category, u32 *val); 303 304 /* aie2 npu hw config */ 305 extern const struct dpm_clk_freq npu1_dpm_clk_table[]; 306 extern const struct dpm_clk_freq npu4_dpm_clk_table[]; 307 extern const struct rt_config npu1_default_rt_cfg[]; 308 extern const struct rt_config npu4_default_rt_cfg[]; 309 extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[]; 310 311 /* aie2_smu.c */ 312 int aie2_smu_init(struct amdxdna_dev_hdl *ndev); 313 void aie2_smu_fini(struct amdxdna_dev_hdl *ndev); 314 int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); 315 int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); 316 317 /* aie2_pm.c */ 318 int aie2_pm_init(struct amdxdna_dev_hdl *ndev); 319 int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target); 320 int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); 321 322 /* aie2_psp.c */ 323 struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf); 324 int aie2_psp_start(struct psp_device *psp); 325 void aie2_psp_stop(struct psp_device *psp); 326 int aie2_psp_waitmode_poll(struct psp_device *psp); 327 328 /* aie2_error.c */ 329 int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev); 330 void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev); 331 int aie2_error_async_msg_thread(void *data); 332 int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev, 333 struct amdxdna_drm_get_array *args); 334 335 /* aie2_message.c */ 336 void aie2_msg_init(struct amdxdna_dev_hdl *ndev); 337 void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev); 338 int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev); 339 int aie2_resume_fw(struct amdxdna_dev_hdl *ndev); 340 int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value); 341 int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value); 342 int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid); 343 int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version); 344 int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata); 345 int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev, 346 struct amdxdna_fw_ver *fw_ver); 347 int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, u32 context_id, 348 struct app_health_report *report); 349 int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); 350 int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); 351 int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size); 352 int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled); 353 int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev, 354 char __user *buf, u32 size, 355 struct amdxdna_drm_query_telemetry_header *header); 356 int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size, 357 void *handle, int (*cb)(void*, void __iomem *, size_t)); 358 int aie2_config_cu(struct amdxdna_hwctx *hwctx, 359 int (*notify_cb)(void *, void __iomem *, size_t)); 360 int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, 361 int (*notify_cb)(void *, void __iomem *, size_t)); 362 int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, 363 struct amdxdna_sched_job *job, 364 int (*notify_cb)(void *, void __iomem *, size_t)); 365 int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, 366 struct amdxdna_sched_job *job, 367 int (*notify_cb)(void *, void __iomem *, size_t)); 368 int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, 369 int (*notify_cb)(void *, void __iomem *, size_t)); 370 int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, 371 int (*notify_cb)(void *, void __iomem *, size_t)); 372 void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size, 373 dma_addr_t *dma_addr); 374 void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size, 375 void *cpu_addr, dma_addr_t dma_addr); 376 377 /* aie2_hwctx.c */ 378 int aie2_hwctx_init(struct amdxdna_hwctx *hwctx); 379 void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx); 380 int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); 381 int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl); 382 void aie2_hwctx_suspend(struct amdxdna_client *client); 383 int aie2_hwctx_resume(struct amdxdna_client *client); 384 int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq); 385 void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq); 386 387 #endif /* _AIE2_PCI_H_ */ 388