xref: /linux/drivers/gpu/drm/xe/xe_tuning.c (revision 746680ec6696585e30db3e18c93a63df9cbec39c)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_tuning.h"
7 
8 #include <kunit/visibility.h>
9 
10 #include <drm/drm_managed.h>
11 
12 #include "regs/xe_gt_regs.h"
13 #include "xe_gt_types.h"
14 #include "xe_platform_types.h"
15 #include "xe_rtp.h"
16 
17 #undef XE_REG_MCR
18 #define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
19 
20 static const struct xe_rtp_entry_sr gt_tunings[] = {
21 	{ XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"),
22 	  XE_RTP_RULES(PLATFORM(DG2)),
23 	  XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS))
24 	},
25 	{ XE_RTP_NAME("Tuning: 32B Access Enable"),
26 	  XE_RTP_RULES(PLATFORM(DG2)),
27 	  XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS))
28 	},
29 
30 	/* Xe2 */
31 
32 	{ XE_RTP_NAME("Tuning: L3 cache"),
33 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
34 	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
35 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
36 	},
37 	{ XE_RTP_NAME("Tuning: L3 cache - media"),
38 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
39 	  XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
40 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
41 	},
42 	{ XE_RTP_NAME("Tuning: Compression Overfetch"),
43 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
44 	  XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX),
45 			 SET(CCCHKNREG1, L3CMPCTRL))
46 	},
47 	{ XE_RTP_NAME("Tuning: Compression Overfetch - media"),
48 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
49 	  XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX),
50 			 SET(XE2LPM_CCCHKNREG1, L3CMPCTRL))
51 	},
52 	{ XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"),
53 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
54 	  XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN))
55 	},
56 	{ XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"),
57 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
58 	  XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN))
59 	},
60 	{ XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"),
61 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
62 	  XE_RTP_ACTIONS(SET(L3SQCREG2,
63 			     COMPMEMRD256BOVRFETCHEN))
64 	},
65 	{ XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"),
66 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
67 	  XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2,
68 			     COMPMEMRD256BOVRFETCHEN))
69 	},
70 	{ XE_RTP_NAME("Tuning: Stateless compression control"),
71 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
72 	  XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT,
73 				   REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
74 	},
75 	{ XE_RTP_NAME("Tuning: Stateless compression control - media"),
76 	  XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, XE_RTP_END_VERSION_UNDEFINED)),
77 	  XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT,
78 				   REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0)))
79 	},
80 	{ XE_RTP_NAME("Tuning: L3 RW flush all Cache"),
81 	  XE_RTP_RULES(GRAPHICS_VERSION(2004)),
82 	  XE_RTP_ACTIONS(SET(SCRATCH3_LBCF, RWFLUSHALLEN))
83 	},
84 	{ XE_RTP_NAME("Tuning: L3 RW flush all cache - media"),
85 	  XE_RTP_RULES(MEDIA_VERSION(2000)),
86 	  XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN))
87 	},
88 };
89 
90 static const struct xe_rtp_entry_sr engine_tunings[] = {
91 	{ XE_RTP_NAME("Tuning: L3 Hashing Mask"),
92 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210),
93 		       FUNC(xe_rtp_match_first_render_or_compute)),
94 	  XE_RTP_ACTIONS(CLR(XELP_GARBCNTL, XELP_BUS_HASH_CTL_BIT_EXC))
95 	},
96 	{ XE_RTP_NAME("Tuning: Set Indirect State Override"),
97 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1274),
98 		       ENGINE_CLASS(RENDER)),
99 	  XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE))
100 	},
101 	{ XE_RTP_NAME("Tuning: Disable NULL query for Anyhit Shader"),
102 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, XE_RTP_END_VERSION_UNDEFINED),
103 		       FUNC(xe_rtp_match_first_render_or_compute)),
104 	  XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
105 	},
106 };
107 
108 static const struct xe_rtp_entry_sr lrc_tunings[] = {
109 	/* DG2 */
110 
111 	{ XE_RTP_NAME("Tuning: L3 cache"),
112 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
113 	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
114 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
115 	},
116 	{ XE_RTP_NAME("Tuning: TDS gang timer"),
117 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
118 	  /* read verification is ignored as in i915 - need to check enabling */
119 	  XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2,
120 						FF_MODE2_TDS_TIMER_MASK,
121 						FF_MODE2_TDS_TIMER_128))
122 	},
123 	{ XE_RTP_NAME("Tuning: TBIMR fast clip"),
124 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
125 	  XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP))
126 	},
127 
128 	/* Xe_LPG */
129 
130 	{ XE_RTP_NAME("Tuning: L3 cache"),
131 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), ENGINE_CLASS(RENDER)),
132 	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
133 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
134 	},
135 
136 	/* Xe2_HPG */
137 
138 	{ XE_RTP_NAME("Tuning: vs hit max value"),
139 	  XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
140 	  XE_RTP_ACTIONS(FIELD_SET(FF_MODE, VS_HIT_MAX_VALUE_MASK,
141 				   REG_FIELD_PREP(VS_HIT_MAX_VALUE_MASK, 0x3f)))
142 	},
143 };
144 
145 /**
146  * xe_tuning_init - initialize gt with tunings bookkeeping
147  * @gt: GT instance to initialize
148  *
149  * Returns 0 for success, negative error code otherwise.
150  */
151 int xe_tuning_init(struct xe_gt *gt)
152 {
153 	struct xe_device *xe = gt_to_xe(gt);
154 	size_t n_lrc, n_engine, n_gt, total;
155 	unsigned long *p;
156 
157 	n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_tunings));
158 	n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_tunings));
159 	n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_tunings));
160 	total = n_gt + n_engine + n_lrc;
161 
162 	p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL);
163 	if (!p)
164 		return -ENOMEM;
165 
166 	gt->tuning_active.gt = p;
167 	p += n_gt;
168 	gt->tuning_active.engine = p;
169 	p += n_engine;
170 	gt->tuning_active.lrc = p;
171 
172 	return 0;
173 }
174 ALLOW_ERROR_INJECTION(xe_tuning_init, ERRNO); /* See xe_pci_probe() */
175 
176 void xe_tuning_process_gt(struct xe_gt *gt)
177 {
178 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
179 
180 	xe_rtp_process_ctx_enable_active_tracking(&ctx,
181 						  gt->tuning_active.gt,
182 						  ARRAY_SIZE(gt_tunings));
183 	xe_rtp_process_to_sr(&ctx, gt_tunings, ARRAY_SIZE(gt_tunings), &gt->reg_sr);
184 }
185 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt);
186 
187 void xe_tuning_process_engine(struct xe_hw_engine *hwe)
188 {
189 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
190 
191 	xe_rtp_process_ctx_enable_active_tracking(&ctx,
192 						  hwe->gt->tuning_active.engine,
193 						  ARRAY_SIZE(engine_tunings));
194 	xe_rtp_process_to_sr(&ctx, engine_tunings, ARRAY_SIZE(engine_tunings),
195 			     &hwe->reg_sr);
196 }
197 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine);
198 
199 /**
200  * xe_tuning_process_lrc - process lrc tunings
201  * @hwe: engine instance to process tunings for
202  *
203  * Process LRC table for this platform, saving in @hwe all the tunings that need
204  * to be applied on context restore. These are tunings touching registers that
205  * are part of the HW context image.
206  */
207 void xe_tuning_process_lrc(struct xe_hw_engine *hwe)
208 {
209 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
210 
211 	xe_rtp_process_ctx_enable_active_tracking(&ctx,
212 						  hwe->gt->tuning_active.lrc,
213 						  ARRAY_SIZE(lrc_tunings));
214 	xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), &hwe->reg_lrc);
215 }
216 
217 void xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p)
218 {
219 	size_t idx;
220 
221 	drm_printf(p, "GT Tunings\n");
222 	for_each_set_bit(idx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings))
223 		drm_printf_indent(p, 1, "%s\n", gt_tunings[idx].name);
224 
225 	drm_printf(p, "\nEngine Tunings\n");
226 	for_each_set_bit(idx, gt->tuning_active.engine, ARRAY_SIZE(engine_tunings))
227 		drm_printf_indent(p, 1, "%s\n", engine_tunings[idx].name);
228 
229 	drm_printf(p, "\nLRC Tunings\n");
230 	for_each_set_bit(idx, gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings))
231 		drm_printf_indent(p, 1, "%s\n", lrc_tunings[idx].name);
232 }
233