1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. 4 */ 5 6 #include <drm/amdxdna_accel.h> 7 #include <drm/drm_device.h> 8 #include <drm/gpu_scheduler.h> 9 #include <linux/bits.h> 10 #include <linux/sizes.h> 11 12 #include "aie2_pci.h" 13 #include "amdxdna_mailbox.h" 14 #include "amdxdna_pci_drv.h" 15 16 /* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */ 17 #define MPNPU_PWAITMODE 0x301003C 18 #define MPNPU_PUB_SEC_INTR 0x3010060 19 #define MPNPU_PUB_PWRMGMT_INTR 0x3010064 20 #define MPNPU_PUB_SCRATCH0 0x301006C 21 #define MPNPU_PUB_SCRATCH1 0x3010070 22 #define MPNPU_PUB_SCRATCH2 0x3010074 23 #define MPNPU_PUB_SCRATCH3 0x3010078 24 #define MPNPU_PUB_SCRATCH4 0x301007C 25 #define MPNPU_PUB_SCRATCH5 0x3010080 26 #define MPNPU_PUB_SCRATCH6 0x3010084 27 #define MPNPU_PUB_SCRATCH7 0x3010088 28 #define MPNPU_PUB_SCRATCH8 0x301008C 29 #define MPNPU_PUB_SCRATCH9 0x3010090 30 #define MPNPU_PUB_SCRATCH10 0x3010094 31 #define MPNPU_PUB_SCRATCH11 0x3010098 32 #define MPNPU_PUB_SCRATCH12 0x301009C 33 #define MPNPU_PUB_SCRATCH13 0x30100A0 34 #define MPNPU_PUB_SCRATCH14 0x30100A4 35 #define MPNPU_PUB_SCRATCH15 0x30100A8 36 #define MP0_C2PMSG_73 0x3810A24 37 #define MP0_C2PMSG_123 0x3810AEC 38 39 #define MP1_C2PMSG_0 0x3B10900 40 #define MP1_C2PMSG_60 0x3B109F0 41 #define MP1_C2PMSG_61 0x3B109F4 42 43 #define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000 44 #define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000 45 #define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000 46 #define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000 47 48 #define MMNPU_APERTURE0_BASE 0x3000000 49 #define MMNPU_APERTURE1_BASE 0x3600000 50 #define MMNPU_APERTURE3_BASE 0x3810000 51 #define MMNPU_APERTURE4_BASE 0x3B10000 52 53 /* PCIe BAR Index for NPU4 */ 54 #define NPU4_REG_BAR_INDEX 0 55 #define NPU4_MBOX_BAR_INDEX 0 56 #define NPU4_PSP_BAR_INDEX 4 57 #define NPU4_SMU_BAR_INDEX 5 58 #define NPU4_SRAM_BAR_INDEX 2 59 /* Associated BARs and Apertures */ 60 #define NPU4_REG_BAR_BASE MMNPU_APERTURE0_BASE 61 #define NPU4_MBOX_BAR_BASE MMNPU_APERTURE0_BASE 62 #define NPU4_PSP_BAR_BASE MMNPU_APERTURE3_BASE 63 #define NPU4_SMU_BAR_BASE MMNPU_APERTURE4_BASE 64 #define NPU4_SRAM_BAR_BASE MMNPU_APERTURE1_BASE 65 66 const struct rt_config npu4_default_rt_cfg[] = { 67 { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */ 68 { 10, 1, AIE2_RT_CFG_INIT }, /* DEBUG BUF */ 69 { 14, 0, AIE2_RT_CFG_INIT, BIT_U64(AIE2_PREEMPT) }, /* Frame boundary preemption */ 70 { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ 71 { 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ 72 { 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ 73 { 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ 74 { 13, 0, AIE2_RT_CFG_FORCE_PREEMPT }, 75 { 14, 0, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT }, 76 { 0 }, 77 }; 78 79 const struct dpm_clk_freq npu4_dpm_clk_table[] = { 80 {396, 792}, 81 {600, 1056}, 82 {792, 1152}, 83 {975, 1267}, 84 {975, 1267}, 85 {1056, 1408}, 86 {1152, 1584}, 87 {1267, 1800}, 88 { 0 } 89 }; 90 91 const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = { 92 { .major = 6, .min_minor = 12 }, 93 { .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6, .min_minor = 15 }, 94 { .features = BIT_U64(AIE2_PREEMPT), .major = 6, .min_minor = 12 }, 95 { .features = BIT_U64(AIE2_TEMPORAL_ONLY), .major = 6, .min_minor = 12 }, 96 { .features = GENMASK_ULL(AIE2_TEMPORAL_ONLY, AIE2_NPU_COMMAND), .major = 7 }, 97 { 0 } 98 }; 99 100 static const struct amdxdna_dev_priv npu4_dev_priv = { 101 .fw_path = "amdnpu/17f0_10/", 102 .rt_config = npu4_default_rt_cfg, 103 .dpm_clk_tbl = npu4_dpm_clk_table, 104 .fw_feature_tbl = npu4_fw_feature_table, 105 .col_align = COL_ALIGN_NATURE, 106 .mbox_dev_addr = NPU4_MBOX_BAR_BASE, 107 .mbox_size = 0, /* Use BAR size */ 108 .sram_dev_addr = NPU4_SRAM_BAR_BASE, 109 .hwctx_limit = 16, 110 .sram_offs = { 111 DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_0), 112 DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_15), 113 }, 114 .psp_regs_off = { 115 DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU4_PSP, MP0_C2PMSG_123), 116 DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU4_REG, MPNPU_PUB_SCRATCH3), 117 DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU4_REG, MPNPU_PUB_SCRATCH4), 118 DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU4_REG, MPNPU_PUB_SCRATCH9), 119 DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU4_PSP, MP0_C2PMSG_73), 120 DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU4_PSP, MP0_C2PMSG_123), 121 DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU4_REG, MPNPU_PUB_SCRATCH3), 122 DEFINE_BAR_OFFSET(PSP_PWAITMODE_REG, NPU4_REG, MPNPU_PWAITMODE), 123 }, 124 .smu_regs_off = { 125 DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU4_SMU, MP1_C2PMSG_0), 126 DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU4_SMU, MP1_C2PMSG_60), 127 DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU4_SMU, MMNPU_APERTURE4_BASE), 128 DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61), 129 DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU4_SMU, MP1_C2PMSG_60), 130 }, 131 .hw_ops = { 132 .set_dpm = npu4_set_dpm, 133 }, 134 }; 135 136 const struct amdxdna_dev_info dev_npu4_info = { 137 .reg_bar = NPU4_REG_BAR_INDEX, 138 .mbox_bar = NPU4_MBOX_BAR_INDEX, 139 .sram_bar = NPU4_SRAM_BAR_INDEX, 140 .psp_bar = NPU4_PSP_BAR_INDEX, 141 .smu_bar = NPU4_SMU_BAR_INDEX, 142 .first_col = 0, 143 .dev_mem_buf_shift = 15, /* 32 KiB aligned */ 144 .dev_mem_base = AIE2_DEVM_BASE, 145 .dev_mem_size = AIE2_DEVM_SIZE, 146 .vbnv = "RyzenAI-npu4", 147 .device_type = AMDXDNA_DEV_TYPE_KMQ, 148 .dev_priv = &npu4_dev_priv, 149 .ops = &aie2_ops, /* NPU4 can share NPU1's callback */ 150 }; 151