xref: /linux/drivers/gpu/drm/xe/xe_tuning.c (revision f6e8dc9edf963dbc99085e54f6ced6da9daa6100)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_tuning.h"
7 
8 #include <kunit/visibility.h>
9 
10 #include <drm/drm_managed.h>
11 #include <drm/drm_print.h>
12 
13 #include "regs/xe_gt_regs.h"
14 #include "xe_gt_types.h"
15 #include "xe_platform_types.h"
16 #include "xe_rtp.h"
17 
18 #undef XE_REG_MCR
19 #define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
20 
21 static const struct xe_rtp_entry_sr gt_tunings[] = {
22 	{ XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"),
23 	  XE_RTP_RULES(PLATFORM(DG2)),
24 	  XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS))
25 	},
26 	{ XE_RTP_NAME("Tuning: 32B Access Enable"),
27 	  XE_RTP_RULES(PLATFORM(DG2)),
28 	  XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS))
29 	},
30 
31 	/* Xe2 */
32 
33 	{ XE_RTP_NAME("Tuning: L3 cache"),
34 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
35 	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
36 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
37 	},
38 	{ XE_RTP_NAME("Tuning: L3 cache - media"),
39 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
40 	  XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
41 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
42 	},
43 	{ XE_RTP_NAME("Tuning: Compression Overfetch"),
44 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED),
45 		       FUNC(xe_rtp_match_has_flat_ccs)),
46 	  XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX),
47 			 SET(CCCHKNREG1, L3CMPCTRL))
48 	},
49 	{ XE_RTP_NAME("Tuning: Compression Overfetch - media"),
50 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
51 	  XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX),
52 			 SET(XE2LPM_CCCHKNREG1, L3CMPCTRL))
53 	},
54 	{ XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"),
55 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
56 	  XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN))
57 	},
58 	{ XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"),
59 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
60 	  XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN))
61 	},
62 	{ XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"),
63 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED),
64 		       FUNC(xe_rtp_match_has_flat_ccs)),
65 	  XE_RTP_ACTIONS(SET(L3SQCREG2,
66 			     COMPMEMRD256BOVRFETCHEN))
67 	},
68 	{ XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"),
69 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED),
70 		       FUNC(xe_rtp_match_has_flat_ccs)),
71 	  XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2,
72 			     COMPMEMRD256BOVRFETCHEN))
73 	},
74 	{ XE_RTP_NAME("Tuning: Stateless compression control"),
75 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
76 	  XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT,
77 				   REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
78 	},
79 	{ XE_RTP_NAME("Tuning: Stateless compression control - media"),
80 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, XE_RTP_END_VERSION_UNDEFINED)),
81 	  XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT,
82 				   REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
83 	},
84 	{ XE_RTP_NAME("Tuning: L3 RW flush all Cache"),
85 	  XE_RTP_RULES(GRAPHICS_VERSION(2004)),
86 	  XE_RTP_ACTIONS(SET(SCRATCH3_LBCF, RWFLUSHALLEN))
87 	},
88 	{ XE_RTP_NAME("Tuning: L3 RW flush all cache - media"),
89 	  XE_RTP_RULES(MEDIA_VERSION(2000)),
90 	  XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN))
91 	},
92 };
93 
94 static const struct xe_rtp_entry_sr engine_tunings[] = {
95 	{ XE_RTP_NAME("Tuning: L3 Hashing Mask"),
96 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210),
97 		       FUNC(xe_rtp_match_first_render_or_compute)),
98 	  XE_RTP_ACTIONS(CLR(XELP_GARBCNTL, XELP_BUS_HASH_CTL_BIT_EXC))
99 	},
100 	{ XE_RTP_NAME("Tuning: Set Indirect State Override"),
101 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1274),
102 		       ENGINE_CLASS(RENDER)),
103 	  XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE))
104 	},
105 	{ XE_RTP_NAME("Tuning: Disable NULL query for Anyhit Shader"),
106 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED),
107 		       FUNC(xe_rtp_match_first_render_or_compute)),
108 	  XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
109 	},
110 };
111 
112 static const struct xe_rtp_entry_sr lrc_tunings[] = {
113 	/* DG2 */
114 
115 	{ XE_RTP_NAME("Tuning: L3 cache"),
116 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
117 	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
118 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
119 	},
120 	{ XE_RTP_NAME("Tuning: TDS gang timer"),
121 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
122 	  /* read verification is ignored as in i915 - need to check enabling */
123 	  XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2,
124 						FF_MODE2_TDS_TIMER_MASK,
125 						FF_MODE2_TDS_TIMER_128))
126 	},
127 	{ XE_RTP_NAME("Tuning: TBIMR fast clip"),
128 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
129 	  XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP))
130 	},
131 
132 	/* Xe_LPG */
133 
134 	{ XE_RTP_NAME("Tuning: L3 cache"),
135 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), ENGINE_CLASS(RENDER)),
136 	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
137 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
138 	},
139 
140 	/* Xe2_HPG */
141 
142 	{ XE_RTP_NAME("Tuning: vs hit max value"),
143 	  XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
144 	  XE_RTP_ACTIONS(FIELD_SET(FF_MODE, VS_HIT_MAX_VALUE_MASK,
145 				   REG_FIELD_PREP(VS_HIT_MAX_VALUE_MASK, 0x3f)))
146 	},
147 };
148 
149 /**
150  * xe_tuning_init - initialize gt with tunings bookkeeping
151  * @gt: GT instance to initialize
152  *
153  * Returns 0 for success, negative error code otherwise.
154  */
155 int xe_tuning_init(struct xe_gt *gt)
156 {
157 	struct xe_device *xe = gt_to_xe(gt);
158 	size_t n_lrc, n_engine, n_gt, total;
159 	unsigned long *p;
160 
161 	n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_tunings));
162 	n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_tunings));
163 	n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_tunings));
164 	total = n_gt + n_engine + n_lrc;
165 
166 	p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL);
167 	if (!p)
168 		return -ENOMEM;
169 
170 	gt->tuning_active.gt = p;
171 	p += n_gt;
172 	gt->tuning_active.engine = p;
173 	p += n_engine;
174 	gt->tuning_active.lrc = p;
175 
176 	return 0;
177 }
178 ALLOW_ERROR_INJECTION(xe_tuning_init, ERRNO); /* See xe_pci_probe() */
179 
180 void xe_tuning_process_gt(struct xe_gt *gt)
181 {
182 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
183 
184 	xe_rtp_process_ctx_enable_active_tracking(&ctx,
185 						  gt->tuning_active.gt,
186 						  ARRAY_SIZE(gt_tunings));
187 	xe_rtp_process_to_sr(&ctx, gt_tunings, ARRAY_SIZE(gt_tunings), &gt->reg_sr);
188 }
189 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt);
190 
191 void xe_tuning_process_engine(struct xe_hw_engine *hwe)
192 {
193 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
194 
195 	xe_rtp_process_ctx_enable_active_tracking(&ctx,
196 						  hwe->gt->tuning_active.engine,
197 						  ARRAY_SIZE(engine_tunings));
198 	xe_rtp_process_to_sr(&ctx, engine_tunings, ARRAY_SIZE(engine_tunings),
199 			     &hwe->reg_sr);
200 }
201 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine);
202 
203 /**
204  * xe_tuning_process_lrc - process lrc tunings
205  * @hwe: engine instance to process tunings for
206  *
207  * Process LRC table for this platform, saving in @hwe all the tunings that need
208  * to be applied on context restore. These are tunings touching registers that
209  * are part of the HW context image.
210  */
211 void xe_tuning_process_lrc(struct xe_hw_engine *hwe)
212 {
213 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
214 
215 	xe_rtp_process_ctx_enable_active_tracking(&ctx,
216 						  hwe->gt->tuning_active.lrc,
217 						  ARRAY_SIZE(lrc_tunings));
218 	xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), &hwe->reg_lrc);
219 }
220 
221 /**
222  * xe_tuning_dump() - Dump GT tuning info into a drm printer.
223  * @gt: the &xe_gt
224  * @p: the &drm_printer
225  *
226  * Return: always 0.
227  */
228 int xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p)
229 {
230 	size_t idx;
231 
232 	drm_printf(p, "GT Tunings\n");
233 	for_each_set_bit(idx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings))
234 		drm_printf_indent(p, 1, "%s\n", gt_tunings[idx].name);
235 
236 	drm_puts(p, "\n");
237 	drm_printf(p, "Engine Tunings\n");
238 	for_each_set_bit(idx, gt->tuning_active.engine, ARRAY_SIZE(engine_tunings))
239 		drm_printf_indent(p, 1, "%s\n", engine_tunings[idx].name);
240 
241 	drm_puts(p, "\n");
242 	drm_printf(p, "LRC Tunings\n");
243 	for_each_set_bit(idx, gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings))
244 		drm_printf_indent(p, 1, "%s\n", lrc_tunings[idx].name);
245 
246 	return 0;
247 }
248