xref: /linux/drivers/accel/amdxdna/npu4_regs.c (revision 9e4e86a604dfd06402933467578c4b79f5412b2c)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
4  */
5 
6 #include <drm/amdxdna_accel.h>
7 #include <drm/drm_device.h>
8 #include <drm/gpu_scheduler.h>
9 #include <linux/bits.h>
10 #include <linux/sizes.h>
11 
12 #include "aie2_pci.h"
13 #include "amdxdna_mailbox.h"
14 #include "amdxdna_pci_drv.h"
15 
16 /* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
17 #define MPNPU_PWAITMODE                0x301003C
18 #define MPNPU_PUB_SEC_INTR             0x3010060
19 #define MPNPU_PUB_PWRMGMT_INTR         0x3010064
20 #define MPNPU_PUB_SCRATCH0             0x301006C
21 #define MPNPU_PUB_SCRATCH1             0x3010070
22 #define MPNPU_PUB_SCRATCH2             0x3010074
23 #define MPNPU_PUB_SCRATCH3             0x3010078
24 #define MPNPU_PUB_SCRATCH4             0x301007C
25 #define MPNPU_PUB_SCRATCH5             0x3010080
26 #define MPNPU_PUB_SCRATCH6             0x3010084
27 #define MPNPU_PUB_SCRATCH7             0x3010088
28 #define MPNPU_PUB_SCRATCH8             0x301008C
29 #define MPNPU_PUB_SCRATCH9             0x3010090
30 #define MPNPU_PUB_SCRATCH10            0x3010094
31 #define MPNPU_PUB_SCRATCH11            0x3010098
32 #define MPNPU_PUB_SCRATCH12            0x301009C
33 #define MPNPU_PUB_SCRATCH13            0x30100A0
34 #define MPNPU_PUB_SCRATCH14            0x30100A4
35 #define MPNPU_PUB_SCRATCH15            0x30100A8
36 #define MP0_C2PMSG_73                  0x3810A24
37 #define MP0_C2PMSG_123                 0x3810AEC
38 
39 #define MP1_C2PMSG_0                   0x3B10900
40 #define MP1_C2PMSG_60                  0x3B109F0
41 #define MP1_C2PMSG_61                  0x3B109F4
42 
43 #define MPNPU_SRAM_X2I_MAILBOX_0       0x3600000
44 #define MPNPU_SRAM_X2I_MAILBOX_15      0x361E000
45 #define MPNPU_SRAM_X2I_MAILBOX_31      0x363E000
46 #define MPNPU_SRAM_I2X_MAILBOX_31      0x363F000
47 
48 #define MMNPU_APERTURE0_BASE           0x3000000
49 #define MMNPU_APERTURE1_BASE           0x3600000
50 #define MMNPU_APERTURE3_BASE           0x3810000
51 #define MMNPU_APERTURE4_BASE           0x3B10000
52 
53 /* PCIe BAR Index for NPU4 */
54 #define NPU4_REG_BAR_INDEX	0
55 #define NPU4_MBOX_BAR_INDEX	0
56 #define NPU4_PSP_BAR_INDEX	4
57 #define NPU4_SMU_BAR_INDEX	5
58 #define NPU4_SRAM_BAR_INDEX	2
59 /* Associated BARs and Apertures */
60 #define NPU4_REG_BAR_BASE	MMNPU_APERTURE0_BASE
61 #define NPU4_MBOX_BAR_BASE	MMNPU_APERTURE0_BASE
62 #define NPU4_PSP_BAR_BASE	MMNPU_APERTURE3_BASE
63 #define NPU4_SMU_BAR_BASE	MMNPU_APERTURE4_BASE
64 #define NPU4_SRAM_BAR_BASE	MMNPU_APERTURE1_BASE
65 
66 const struct rt_config npu4_default_rt_cfg[] = {
67 	{ 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
68 	{ 10, 1, AIE2_RT_CFG_INIT }, /* DEBUG BUF */
69 	{ 14, 0, AIE2_RT_CFG_INIT, BIT_U64(AIE2_PREEMPT) }, /* Frame boundary preemption */
70 	{ 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
71 	{ 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
72 	{ 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
73 	{ 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
74 	{ 13, 0, AIE2_RT_CFG_FORCE_PREEMPT },
75 	{ 14, 0, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT },
76 	{ 0 },
77 };
78 
79 const struct dpm_clk_freq npu4_dpm_clk_table[] = {
80 	{396, 792},
81 	{600, 1056},
82 	{792, 1152},
83 	{975, 1267},
84 	{975, 1267},
85 	{1056, 1408},
86 	{1152, 1584},
87 	{1267, 1800},
88 	{ 0 }
89 };
90 
91 const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
92 	{ .major = 6, .min_minor = 12 },
93 	{ .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6, .min_minor = 15 },
94 	{ .features = BIT_U64(AIE2_PREEMPT), .major = 6, .min_minor = 12 },
95 	{ .features = BIT_U64(AIE2_TEMPORAL_ONLY), .major = 6, .min_minor = 12 },
96 	{ .features = BIT_U64(AIE2_APP_HEALTH), .major = 6, .min_minor = 18 },
97 	{ .features = AIE2_ALL_FEATURES, .major = 7 },
98 	{ 0 }
99 };
100 
101 static const struct amdxdna_dev_priv npu4_dev_priv = {
102 	.fw_path        = "amdnpu/17f0_10/",
103 	.rt_config	= npu4_default_rt_cfg,
104 	.dpm_clk_tbl	= npu4_dpm_clk_table,
105 	.fw_feature_tbl = npu4_fw_feature_table,
106 	.col_align	= COL_ALIGN_NATURE,
107 	.mbox_dev_addr  = NPU4_MBOX_BAR_BASE,
108 	.mbox_size      = 0, /* Use BAR size */
109 	.sram_dev_addr  = NPU4_SRAM_BAR_BASE,
110 	.hwctx_limit    = 16,
111 	.sram_offs      = {
112 		DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
113 		DEFINE_BAR_OFFSET(FW_ALIVE_OFF,   NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
114 	},
115 	.psp_regs_off   = {
116 		DEFINE_BAR_OFFSET(PSP_CMD_REG,    NPU4_PSP, MP0_C2PMSG_123),
117 		DEFINE_BAR_OFFSET(PSP_ARG0_REG,   NPU4_REG, MPNPU_PUB_SCRATCH3),
118 		DEFINE_BAR_OFFSET(PSP_ARG1_REG,   NPU4_REG, MPNPU_PUB_SCRATCH4),
119 		DEFINE_BAR_OFFSET(PSP_ARG2_REG,   NPU4_REG, MPNPU_PUB_SCRATCH9),
120 		DEFINE_BAR_OFFSET(PSP_INTR_REG,   NPU4_PSP, MP0_C2PMSG_73),
121 		DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU4_PSP, MP0_C2PMSG_123),
122 		DEFINE_BAR_OFFSET(PSP_RESP_REG,   NPU4_REG, MPNPU_PUB_SCRATCH3),
123 		DEFINE_BAR_OFFSET(PSP_PWAITMODE_REG, NPU4_REG, MPNPU_PWAITMODE),
124 	},
125 	.smu_regs_off   = {
126 		DEFINE_BAR_OFFSET(SMU_CMD_REG,  NPU4_SMU, MP1_C2PMSG_0),
127 		DEFINE_BAR_OFFSET(SMU_ARG_REG,  NPU4_SMU, MP1_C2PMSG_60),
128 		DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU4_SMU, MMNPU_APERTURE4_BASE),
129 		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
130 		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU4_SMU, MP1_C2PMSG_60),
131 	},
132 	.hw_ops		= {
133 		.set_dpm = npu4_set_dpm,
134 	},
135 };
136 
137 const struct amdxdna_dev_info dev_npu4_info = {
138 	.reg_bar           = NPU4_REG_BAR_INDEX,
139 	.mbox_bar          = NPU4_MBOX_BAR_INDEX,
140 	.sram_bar          = NPU4_SRAM_BAR_INDEX,
141 	.psp_bar           = NPU4_PSP_BAR_INDEX,
142 	.smu_bar           = NPU4_SMU_BAR_INDEX,
143 	.first_col         = 0,
144 	.dev_mem_buf_shift = 15, /* 32 KiB aligned */
145 	.dev_mem_base      = AIE2_DEVM_BASE,
146 	.dev_mem_size      = AIE2_DEVM_SIZE,
147 	.vbnv              = "RyzenAI-npu4",
148 	.device_type       = AMDXDNA_DEV_TYPE_KMQ,
149 	.dev_priv          = &npu4_dev_priv,
150 	.ops               = &aie2_ops, /* NPU4 can share NPU1's callback */
151 };
152