xref: /linux/drivers/gpu/drm/xe/xe_tuning.c (revision 75f65f1a4c06da1d87f28570a9d4cdad28f13360)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_tuning.h"
7 
8 #include <kunit/visibility.h>
9 
10 #include <drm/drm_managed.h>
11 #include <drm/drm_print.h>
12 
13 #include "regs/xe_engine_regs.h"
14 #include "regs/xe_gt_regs.h"
15 #include "xe_gt_types.h"
16 #include "xe_platform_types.h"
17 #include "xe_rtp.h"
18 #include "xe_sriov.h"
19 
20 #undef XE_REG_MCR
21 #define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
22 
23 static const struct xe_rtp_entry_sr gt_tunings[] = {
24 	{ XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"),
25 	  XE_RTP_RULES(PLATFORM(DG2)),
26 	  XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS))
27 	},
28 	{ XE_RTP_NAME("Tuning: 32B Access Enable"),
29 	  XE_RTP_RULES(PLATFORM(DG2)),
30 	  XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS))
31 	},
32 
33 	/* Xe2 */
34 
35 	{ XE_RTP_NAME("Tuning: L3 cache"),
36 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3499)),
37 	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
38 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
39 	},
40 	{ XE_RTP_NAME("Tuning: L3 cache - media"),
41 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, 3499)),
42 	  XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
43 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
44 	},
45 	{ XE_RTP_NAME("Tuning: Compression Overfetch"),
46 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3499),
47 		       FUNC(xe_rtp_match_has_flat_ccs)),
48 	  XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX),
49 			 SET(CCCHKNREG1, L3CMPCTRL))
50 	},
51 	{ XE_RTP_NAME("Tuning: Compression Overfetch - media"),
52 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
53 	  XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX),
54 			 SET(XE2LPM_CCCHKNREG1, L3CMPCTRL))
55 	},
56 	{ XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"),
57 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3499)),
58 	  XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN))
59 	},
60 	{ XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"),
61 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
62 	  XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN))
63 	},
64 	{ XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"),
65 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED),
66 		       FUNC(xe_rtp_match_has_flat_ccs)),
67 	  XE_RTP_ACTIONS(SET(L3SQCREG2,
68 			     COMPMEMRD256BOVRFETCHEN))
69 	},
70 	{ XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"),
71 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED),
72 		       FUNC(xe_rtp_match_has_flat_ccs)),
73 	  XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2,
74 			     COMPMEMRD256BOVRFETCHEN))
75 	},
76 	{ XE_RTP_NAME("Tuning: Stateless compression control"),
77 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
78 	  XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT,
79 				   REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
80 	},
81 	{ XE_RTP_NAME("Tuning: Stateless compression control - media"),
82 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, XE_RTP_END_VERSION_UNDEFINED)),
83 	  XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT,
84 				   REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
85 	},
86 	{ XE_RTP_NAME("Tuning: L3 RW flush all Cache"),
87 	  XE_RTP_RULES(GRAPHICS_VERSION(2004)),
88 	  XE_RTP_ACTIONS(SET(SCRATCH3_LBCF, RWFLUSHALLEN))
89 	},
90 	{ XE_RTP_NAME("Tuning: L3 RW flush all cache - media"),
91 	  XE_RTP_RULES(MEDIA_VERSION(2000)),
92 	  XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN))
93 	},
94 
95 	/* Xe3p */
96 
97 	{ XE_RTP_NAME("Tuning: Set STLB Bank Hash Mode to 4KB"),
98 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3510, XE_RTP_END_VERSION_UNDEFINED),
99 		       IS_INTEGRATED),
100 	  XE_RTP_ACTIONS(FIELD_SET(GAMSTLB_CTRL, BANK_HASH_MODE,
101 				   BANK_HASH_4KB_MODE))
102 	},
103 };
104 
105 static const struct xe_rtp_entry_sr engine_tunings[] = {
106 	{ XE_RTP_NAME("Tuning: L3 Hashing Mask"),
107 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210),
108 		       FUNC(xe_rtp_match_first_render_or_compute)),
109 	  XE_RTP_ACTIONS(CLR(XELP_GARBCNTL, XELP_BUS_HASH_CTL_BIT_EXC))
110 	},
111 	{ XE_RTP_NAME("Tuning: Set Indirect State Override"),
112 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1274),
113 		       ENGINE_CLASS(RENDER)),
114 	  XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE))
115 	},
116 	{ XE_RTP_NAME("Tuning: Disable NULL query for Anyhit Shader"),
117 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED),
118 		       FUNC(xe_rtp_match_first_render_or_compute)),
119 	  XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
120 	},
121 	{ XE_RTP_NAME("Tuning: disable HW reporting of ctx switch to GHWSP"),
122 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3500, XE_RTP_END_VERSION_UNDEFINED)),
123 	  XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0),
124 			     GHWSP_CSB_REPORT_DIS,
125 			     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
126 	},
127 	{ XE_RTP_NAME("Tuning: TileY 2x2 Walk"),
128 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3510, XE_RTP_END_VERSION_UNDEFINED),
129 		       FUNC(xe_rtp_match_first_render_or_compute)),
130 	  XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN2, TILEY_LOCALID))
131 	},
132 };
133 
134 static const struct xe_rtp_entry_sr lrc_tunings[] = {
135 	{ XE_RTP_NAME("Tuning: Windower HW Filtering"),
136 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3599), ENGINE_CLASS(RENDER)),
137 	  XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN4, HW_FILTERING))
138 	},
139 
140 	/* DG2 */
141 
142 	{ XE_RTP_NAME("Tuning: L3 cache"),
143 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
144 	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
145 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
146 	},
147 	{ XE_RTP_NAME("Tuning: TDS gang timer"),
148 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
149 	  /* read verification is ignored as in i915 - need to check enabling */
150 	  XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2,
151 						FF_MODE2_TDS_TIMER_MASK,
152 						FF_MODE2_TDS_TIMER_128))
153 	},
154 	{ XE_RTP_NAME("Tuning: TBIMR fast clip"),
155 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
156 	  XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP))
157 	},
158 
159 	/* Xe_LPG */
160 
161 	{ XE_RTP_NAME("Tuning: L3 cache"),
162 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), ENGINE_CLASS(RENDER)),
163 	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
164 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
165 	},
166 
167 	/* Xe2_HPG */
168 
169 	{ XE_RTP_NAME("Tuning: vs hit max value"),
170 	  XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
171 	  XE_RTP_ACTIONS(FIELD_SET(FF_MODE, VS_HIT_MAX_VALUE_MASK,
172 				   REG_FIELD_PREP(VS_HIT_MAX_VALUE_MASK, 0x3f)))
173 	},
174 };
175 
176 /**
177  * xe_tuning_init - initialize gt with tunings bookkeeping
178  * @gt: GT instance to initialize
179  *
180  * Returns 0 for success, negative error code otherwise.
181  */
182 int xe_tuning_init(struct xe_gt *gt)
183 {
184 	struct xe_device *xe = gt_to_xe(gt);
185 	size_t n_lrc, n_engine, n_gt, total;
186 	unsigned long *p;
187 
188 	n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_tunings));
189 	n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_tunings));
190 	n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_tunings));
191 	total = n_gt + n_engine + n_lrc;
192 
193 	p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL);
194 	if (!p)
195 		return -ENOMEM;
196 
197 	gt->tuning_active.gt = p;
198 	p += n_gt;
199 	gt->tuning_active.engine = p;
200 	p += n_engine;
201 	gt->tuning_active.lrc = p;
202 
203 	return 0;
204 }
205 ALLOW_ERROR_INJECTION(xe_tuning_init, ERRNO); /* See xe_pci_probe() */
206 
207 void xe_tuning_process_gt(struct xe_gt *gt)
208 {
209 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
210 
211 	xe_rtp_process_ctx_enable_active_tracking(&ctx,
212 						  gt->tuning_active.gt,
213 						  ARRAY_SIZE(gt_tunings));
214 	xe_rtp_process_to_sr(&ctx, gt_tunings, ARRAY_SIZE(gt_tunings),
215 			     &gt->reg_sr, false);
216 }
217 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt);
218 
219 void xe_tuning_process_engine(struct xe_hw_engine *hwe)
220 {
221 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
222 
223 	xe_rtp_process_ctx_enable_active_tracking(&ctx,
224 						  hwe->gt->tuning_active.engine,
225 						  ARRAY_SIZE(engine_tunings));
226 	xe_rtp_process_to_sr(&ctx, engine_tunings, ARRAY_SIZE(engine_tunings),
227 			     &hwe->reg_sr, false);
228 }
229 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine);
230 
231 /**
232  * xe_tuning_process_lrc - process lrc tunings
233  * @hwe: engine instance to process tunings for
234  *
235  * Process LRC table for this platform, saving in @hwe all the tunings that need
236  * to be applied on context restore. These are tunings touching registers that
237  * are part of the HW context image.
238  */
239 void xe_tuning_process_lrc(struct xe_hw_engine *hwe)
240 {
241 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
242 
243 	xe_rtp_process_ctx_enable_active_tracking(&ctx,
244 						  hwe->gt->tuning_active.lrc,
245 						  ARRAY_SIZE(lrc_tunings));
246 	xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings),
247 			     &hwe->reg_lrc, true);
248 }
249 
250 /**
251  * xe_tuning_dump() - Dump GT tuning info into a drm printer.
252  * @gt: the &xe_gt
253  * @p: the &drm_printer
254  *
255  * Return: always 0.
256  */
257 int xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p)
258 {
259 	size_t idx;
260 
261 	drm_printf(p, "GT Tunings\n");
262 	for_each_set_bit(idx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings))
263 		drm_printf_indent(p, 1, "%s\n", gt_tunings[idx].name);
264 
265 	drm_puts(p, "\n");
266 	drm_printf(p, "Engine Tunings\n");
267 	for_each_set_bit(idx, gt->tuning_active.engine, ARRAY_SIZE(engine_tunings))
268 		drm_printf_indent(p, 1, "%s\n", engine_tunings[idx].name);
269 
270 	drm_puts(p, "\n");
271 	drm_printf(p, "LRC Tunings\n");
272 	for_each_set_bit(idx, gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings))
273 		drm_printf_indent(p, 1, "%s\n", lrc_tunings[idx].name);
274 
275 	return 0;
276 }
277