xref: /linux/drivers/gpu/drm/xe/xe_tuning.c (revision face6a3615a649456eb4549f6d474221d877d604)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_tuning.h"
7 
8 #include <kunit/visibility.h>
9 
10 #include <drm/drm_managed.h>
11 
12 #include "regs/xe_gt_regs.h"
13 #include "xe_gt_types.h"
14 #include "xe_platform_types.h"
15 #include "xe_rtp.h"
16 
17 #undef XE_REG_MCR
18 #define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
19 
20 static const struct xe_rtp_entry_sr gt_tunings[] = {
21 	{ XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"),
22 	  XE_RTP_RULES(PLATFORM(DG2)),
23 	  XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS))
24 	},
25 	{ XE_RTP_NAME("Tuning: 32B Access Enable"),
26 	  XE_RTP_RULES(PLATFORM(DG2)),
27 	  XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS))
28 	},
29 
30 	/* Xe2 */
31 
32 	{ XE_RTP_NAME("Tuning: L3 cache"),
33 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
34 	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
35 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
36 	},
37 	{ XE_RTP_NAME("Tuning: L3 cache - media"),
38 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
39 	  XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
40 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
41 	},
42 	{ XE_RTP_NAME("Tuning: Compression Overfetch"),
43 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED),
44 		       FUNC(xe_rtp_match_has_flat_ccs)),
45 	  XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX),
46 			 SET(CCCHKNREG1, L3CMPCTRL))
47 	},
48 	{ XE_RTP_NAME("Tuning: Compression Overfetch - media"),
49 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
50 	  XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX),
51 			 SET(XE2LPM_CCCHKNREG1, L3CMPCTRL))
52 	},
53 	{ XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"),
54 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
55 	  XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN))
56 	},
57 	{ XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"),
58 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
59 	  XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN))
60 	},
61 	{ XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"),
62 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED),
63 		       FUNC(xe_rtp_match_has_flat_ccs)),
64 	  XE_RTP_ACTIONS(SET(L3SQCREG2,
65 			     COMPMEMRD256BOVRFETCHEN))
66 	},
67 	{ XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"),
68 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED),
69 		       FUNC(xe_rtp_match_has_flat_ccs)),
70 	  XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2,
71 			     COMPMEMRD256BOVRFETCHEN))
72 	},
73 	{ XE_RTP_NAME("Tuning: Stateless compression control"),
74 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
75 	  XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT,
76 				   REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
77 	},
78 	{ XE_RTP_NAME("Tuning: Stateless compression control - media"),
79 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, XE_RTP_END_VERSION_UNDEFINED)),
80 	  XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT,
81 				   REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
82 	},
83 	{ XE_RTP_NAME("Tuning: L3 RW flush all Cache"),
84 	  XE_RTP_RULES(GRAPHICS_VERSION(2004)),
85 	  XE_RTP_ACTIONS(SET(SCRATCH3_LBCF, RWFLUSHALLEN))
86 	},
87 	{ XE_RTP_NAME("Tuning: L3 RW flush all cache - media"),
88 	  XE_RTP_RULES(MEDIA_VERSION(2000)),
89 	  XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN))
90 	},
91 };
92 
93 static const struct xe_rtp_entry_sr engine_tunings[] = {
94 	{ XE_RTP_NAME("Tuning: L3 Hashing Mask"),
95 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210),
96 		       FUNC(xe_rtp_match_first_render_or_compute)),
97 	  XE_RTP_ACTIONS(CLR(XELP_GARBCNTL, XELP_BUS_HASH_CTL_BIT_EXC))
98 	},
99 	{ XE_RTP_NAME("Tuning: Set Indirect State Override"),
100 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1274),
101 		       ENGINE_CLASS(RENDER)),
102 	  XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE))
103 	},
104 	{ XE_RTP_NAME("Tuning: Disable NULL query for Anyhit Shader"),
105 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED),
106 		       FUNC(xe_rtp_match_first_render_or_compute)),
107 	  XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
108 	},
109 };
110 
111 static const struct xe_rtp_entry_sr lrc_tunings[] = {
112 	/* DG2 */
113 
114 	{ XE_RTP_NAME("Tuning: L3 cache"),
115 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
116 	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
117 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
118 	},
119 	{ XE_RTP_NAME("Tuning: TDS gang timer"),
120 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
121 	  /* read verification is ignored as in i915 - need to check enabling */
122 	  XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2,
123 						FF_MODE2_TDS_TIMER_MASK,
124 						FF_MODE2_TDS_TIMER_128))
125 	},
126 	{ XE_RTP_NAME("Tuning: TBIMR fast clip"),
127 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
128 	  XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP))
129 	},
130 
131 	/* Xe_LPG */
132 
133 	{ XE_RTP_NAME("Tuning: L3 cache"),
134 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), ENGINE_CLASS(RENDER)),
135 	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
136 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
137 	},
138 
139 	/* Xe2_HPG */
140 
141 	{ XE_RTP_NAME("Tuning: vs hit max value"),
142 	  XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
143 	  XE_RTP_ACTIONS(FIELD_SET(FF_MODE, VS_HIT_MAX_VALUE_MASK,
144 				   REG_FIELD_PREP(VS_HIT_MAX_VALUE_MASK, 0x3f)))
145 	},
146 };
147 
148 /**
149  * xe_tuning_init - initialize gt with tunings bookkeeping
150  * @gt: GT instance to initialize
151  *
152  * Returns 0 for success, negative error code otherwise.
153  */
154 int xe_tuning_init(struct xe_gt *gt)
155 {
156 	struct xe_device *xe = gt_to_xe(gt);
157 	size_t n_lrc, n_engine, n_gt, total;
158 	unsigned long *p;
159 
160 	n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_tunings));
161 	n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_tunings));
162 	n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_tunings));
163 	total = n_gt + n_engine + n_lrc;
164 
165 	p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL);
166 	if (!p)
167 		return -ENOMEM;
168 
169 	gt->tuning_active.gt = p;
170 	p += n_gt;
171 	gt->tuning_active.engine = p;
172 	p += n_engine;
173 	gt->tuning_active.lrc = p;
174 
175 	return 0;
176 }
177 ALLOW_ERROR_INJECTION(xe_tuning_init, ERRNO); /* See xe_pci_probe() */
178 
179 void xe_tuning_process_gt(struct xe_gt *gt)
180 {
181 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
182 
183 	xe_rtp_process_ctx_enable_active_tracking(&ctx,
184 						  gt->tuning_active.gt,
185 						  ARRAY_SIZE(gt_tunings));
186 	xe_rtp_process_to_sr(&ctx, gt_tunings, ARRAY_SIZE(gt_tunings), &gt->reg_sr);
187 }
188 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt);
189 
190 void xe_tuning_process_engine(struct xe_hw_engine *hwe)
191 {
192 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
193 
194 	xe_rtp_process_ctx_enable_active_tracking(&ctx,
195 						  hwe->gt->tuning_active.engine,
196 						  ARRAY_SIZE(engine_tunings));
197 	xe_rtp_process_to_sr(&ctx, engine_tunings, ARRAY_SIZE(engine_tunings),
198 			     &hwe->reg_sr);
199 }
200 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine);
201 
202 /**
203  * xe_tuning_process_lrc - process lrc tunings
204  * @hwe: engine instance to process tunings for
205  *
206  * Process LRC table for this platform, saving in @hwe all the tunings that need
207  * to be applied on context restore. These are tunings touching registers that
208  * are part of the HW context image.
209  */
210 void xe_tuning_process_lrc(struct xe_hw_engine *hwe)
211 {
212 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
213 
214 	xe_rtp_process_ctx_enable_active_tracking(&ctx,
215 						  hwe->gt->tuning_active.lrc,
216 						  ARRAY_SIZE(lrc_tunings));
217 	xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), &hwe->reg_lrc);
218 }
219 
220 /**
221  * xe_tuning_dump() - Dump GT tuning info into a drm printer.
222  * @gt: the &xe_gt
223  * @p: the &drm_printer
224  *
225  * Return: always 0.
226  */
227 int xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p)
228 {
229 	size_t idx;
230 
231 	drm_printf(p, "GT Tunings\n");
232 	for_each_set_bit(idx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings))
233 		drm_printf_indent(p, 1, "%s\n", gt_tunings[idx].name);
234 
235 	drm_puts(p, "\n");
236 	drm_printf(p, "Engine Tunings\n");
237 	for_each_set_bit(idx, gt->tuning_active.engine, ARRAY_SIZE(engine_tunings))
238 		drm_printf_indent(p, 1, "%s\n", engine_tunings[idx].name);
239 
240 	drm_puts(p, "\n");
241 	drm_printf(p, "LRC Tunings\n");
242 	for_each_set_bit(idx, gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings))
243 		drm_printf_indent(p, 1, "%s\n", lrc_tunings[idx].name);
244 
245 	return 0;
246 }
247