1 /* SPDX-License-Identifier: GPL-2.0-only or MIT */ 2 /* Copyright 2025 Arm, Ltd. */ 3 4 #ifndef __ETHOSU_DEVICE_H__ 5 #define __ETHOSU_DEVICE_H__ 6 7 #include <linux/bitfield.h> 8 #include <linux/bits.h> 9 #include <linux/types.h> 10 11 #include <drm/drm_device.h> 12 #include <drm/gpu_scheduler.h> 13 14 #include <drm/ethosu_accel.h> 15 16 struct clk; 17 struct gen_pool; 18 19 #define NPU_REG_ID 0x0000 20 #define NPU_REG_STATUS 0x0004 21 #define NPU_REG_CMD 0x0008 22 #define NPU_REG_RESET 0x000c 23 #define NPU_REG_QBASE 0x0010 24 #define NPU_REG_QBASE_HI 0x0014 25 #define NPU_REG_QREAD 0x0018 26 #define NPU_REG_QCONFIG 0x001c 27 #define NPU_REG_QSIZE 0x0020 28 #define NPU_REG_PROT 0x0024 29 #define NPU_REG_CONFIG 0x0028 30 #define NPU_REG_REGIONCFG 0x003c 31 #define NPU_REG_AXILIMIT0 0x0040 // U65 32 #define NPU_REG_AXILIMIT1 0x0044 // U65 33 #define NPU_REG_AXILIMIT2 0x0048 // U65 34 #define NPU_REG_AXILIMIT3 0x004c // U65 35 #define NPU_REG_MEM_ATTR0 0x0040 // U85 36 #define NPU_REG_MEM_ATTR1 0x0044 // U85 37 #define NPU_REG_MEM_ATTR2 0x0048 // U85 38 #define NPU_REG_MEM_ATTR3 0x004c // U85 39 #define NPU_REG_AXI_SRAM 0x0050 // U85 40 #define NPU_REG_AXI_EXT 0x0054 // U85 41 42 #define NPU_REG_BASEP(x) (0x0080 + (x) * 8) 43 #define NPU_REG_BASEP_HI(x) (0x0084 + (x) * 8) 44 #define NPU_BASEP_REGION_MAX 8 45 46 #define ID_ARCH_MAJOR_MASK GENMASK(31, 28) 47 #define ID_ARCH_MINOR_MASK GENMASK(27, 20) 48 #define ID_ARCH_PATCH_MASK GENMASK(19, 16) 49 #define ID_VER_MAJOR_MASK GENMASK(11, 8) 50 #define ID_VER_MINOR_MASK GENMASK(7, 4) 51 52 #define CONFIG_MACS_PER_CC_MASK GENMASK(3, 0) 53 #define CONFIG_CMD_STREAM_VER_MASK GENMASK(7, 4) 54 55 #define STATUS_STATE_RUNNING BIT(0) 56 #define STATUS_IRQ_RAISED BIT(1) 57 #define STATUS_BUS_STATUS BIT(2) 58 #define STATUS_RESET_STATUS BIT(3) 59 #define STATUS_CMD_PARSE_ERR BIT(4) 60 #define STATUS_CMD_END_REACHED BIT(5) 61 62 #define CMD_CLEAR_IRQ BIT(1) 63 #define CMD_TRANSITION_TO_RUN BIT(0) 64 65 #define RESET_PENDING_CSL BIT(1) 66 #define RESET_PENDING_CPL BIT(0) 67 68 #define PROT_ACTIVE_CSL BIT(1) 69 70 enum ethosu_cmds { 71 NPU_OP_CONV = 0x2, 72 NPU_OP_DEPTHWISE = 0x3, 73 NPU_OP_POOL = 0x5, 74 NPU_OP_ELEMENTWISE = 0x6, 75 NPU_OP_RESIZE = 0x7, // U85 only 76 NPU_OP_DMA_START = 0x10, 77 NPU_SET_IFM_PAD_TOP = 0x100, 78 NPU_SET_IFM_PAD_LEFT = 0x101, 79 NPU_SET_IFM_PAD_RIGHT = 0x102, 80 NPU_SET_IFM_PAD_BOTTOM = 0x103, 81 NPU_SET_IFM_DEPTH_M1 = 0x104, 82 NPU_SET_IFM_PRECISION = 0x105, 83 NPU_SET_IFM_BROADCAST = 0x108, 84 NPU_SET_IFM_WIDTH0_M1 = 0x10a, 85 NPU_SET_IFM_HEIGHT0_M1 = 0x10b, 86 NPU_SET_IFM_HEIGHT1_M1 = 0x10c, 87 NPU_SET_IFM_REGION = 0x10f, 88 NPU_SET_OFM_WIDTH_M1 = 0x111, 89 NPU_SET_OFM_HEIGHT_M1 = 0x112, 90 NPU_SET_OFM_DEPTH_M1 = 0x113, 91 NPU_SET_OFM_PRECISION = 0x114, 92 NPU_SET_OFM_WIDTH0_M1 = 0x11a, 93 NPU_SET_OFM_HEIGHT0_M1 = 0x11b, 94 NPU_SET_OFM_HEIGHT1_M1 = 0x11c, 95 NPU_SET_OFM_REGION = 0x11f, 96 NPU_SET_KERNEL_WIDTH_M1 = 0x120, 97 NPU_SET_KERNEL_HEIGHT_M1 = 0x121, 98 NPU_SET_KERNEL_STRIDE = 0x122, 99 NPU_SET_WEIGHT_REGION = 0x128, 100 NPU_SET_SCALE_REGION = 0x129, 101 NPU_SET_DMA0_SRC_REGION = 0x130, 102 NPU_SET_DMA0_DST_REGION = 0x131, 103 NPU_SET_DMA0_SIZE0 = 0x132, 104 NPU_SET_DMA0_SIZE1 = 0x133, 105 NPU_SET_IFM2_BROADCAST = 0x180, 106 NPU_SET_IFM2_PRECISION = 0x185, 107 NPU_SET_IFM2_WIDTH0_M1 = 0x18a, 108 NPU_SET_IFM2_HEIGHT0_M1 = 0x18b, 109 NPU_SET_IFM2_HEIGHT1_M1 = 0x18c, 110 NPU_SET_IFM2_REGION = 0x18f, 111 NPU_SET_IFM_BASE0 = 0x4000, 112 NPU_SET_IFM_BASE1 = 0x4001, 113 NPU_SET_IFM_BASE2 = 0x4002, 114 NPU_SET_IFM_BASE3 = 0x4003, 115 NPU_SET_IFM_STRIDE_X = 0x4004, 116 NPU_SET_IFM_STRIDE_Y = 0x4005, 117 NPU_SET_IFM_STRIDE_C = 0x4006, 118 NPU_SET_OFM_BASE0 = 0x4010, 119 NPU_SET_OFM_BASE1 = 0x4011, 120 NPU_SET_OFM_BASE2 = 0x4012, 121 NPU_SET_OFM_BASE3 = 0x4013, 122 NPU_SET_OFM_STRIDE_X = 0x4014, 123 NPU_SET_OFM_STRIDE_Y = 0x4015, 124 NPU_SET_OFM_STRIDE_C = 0x4016, 125 NPU_SET_WEIGHT_BASE = 0x4020, 126 NPU_SET_WEIGHT_LENGTH = 0x4021, 127 NPU_SET_SCALE_BASE = 0x4022, 128 NPU_SET_SCALE_LENGTH = 0x4023, 129 NPU_SET_DMA0_SRC = 0x4030, 130 NPU_SET_DMA0_DST = 0x4031, 131 NPU_SET_DMA0_LEN = 0x4032, 132 NPU_SET_DMA0_SRC_STRIDE0 = 0x4033, 133 NPU_SET_DMA0_SRC_STRIDE1 = 0x4034, 134 NPU_SET_DMA0_DST_STRIDE0 = 0x4035, 135 NPU_SET_DMA0_DST_STRIDE1 = 0x4036, 136 NPU_SET_IFM2_BASE0 = 0x4080, 137 NPU_SET_IFM2_BASE1 = 0x4081, 138 NPU_SET_IFM2_BASE2 = 0x4082, 139 NPU_SET_IFM2_BASE3 = 0x4083, 140 NPU_SET_IFM2_STRIDE_X = 0x4084, 141 NPU_SET_IFM2_STRIDE_Y = 0x4085, 142 NPU_SET_IFM2_STRIDE_C = 0x4086, 143 NPU_SET_WEIGHT1_BASE = 0x4090, 144 NPU_SET_WEIGHT1_LENGTH = 0x4091, 145 NPU_SET_SCALE1_BASE = 0x4092, 146 NPU_SET_WEIGHT2_BASE = 0x4092, 147 NPU_SET_SCALE1_LENGTH = 0x4093, 148 NPU_SET_WEIGHT2_LENGTH = 0x4093, 149 NPU_SET_WEIGHT3_BASE = 0x4094, 150 NPU_SET_WEIGHT3_LENGTH = 0x4095, 151 }; 152 153 #define ETHOSU_SRAM_REGION 2 /* Matching Vela compiler */ 154 155 /** 156 * struct ethosu_device - Ethosu device 157 */ 158 struct ethosu_device { 159 /** @base: Base drm_device. */ 160 struct drm_device base; 161 162 /** @iomem: CPU mapping of the registers. */ 163 void __iomem *regs; 164 165 void __iomem *sram; 166 struct gen_pool *srampool; 167 dma_addr_t sramphys; 168 169 struct clk_bulk_data *clks; 170 int num_clks; 171 int irq; 172 173 struct drm_ethosu_npu_info npu_info; 174 175 struct ethosu_job *in_flight_job; 176 /* For in_flight_job and ethosu_job_hw_submit() */ 177 struct mutex job_lock; 178 179 /* For dma_fence */ 180 spinlock_t fence_lock; 181 182 struct drm_gpu_scheduler sched; 183 /* For ethosu_job_do_push() */ 184 struct mutex sched_lock; 185 u64 fence_context; 186 u64 emit_seqno; 187 }; 188 189 #define to_ethosu_device(drm_dev) \ 190 ((struct ethosu_device *)container_of(drm_dev, struct ethosu_device, base)) 191 192 static inline bool ethosu_is_u65(const struct ethosu_device *ethosudev) 193 { 194 return FIELD_GET(ID_ARCH_MAJOR_MASK, ethosudev->npu_info.id) == 1; 195 } 196 197 #endif 198