1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_tuning.h" 7 8 #include <kunit/visibility.h> 9 10 #include <drm/drm_managed.h> 11 #include <drm/drm_print.h> 12 13 #include "regs/xe_engine_regs.h" 14 #include "regs/xe_gt_regs.h" 15 #include "xe_gt_types.h" 16 #include "xe_platform_types.h" 17 #include "xe_rtp.h" 18 #include "xe_sriov.h" 19 20 #undef XE_REG_MCR 21 #define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) 22 23 static const struct xe_rtp_entry_sr gt_tunings[] = { 24 { XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"), 25 XE_RTP_RULES(PLATFORM(DG2)), 26 XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS)) 27 }, 28 { XE_RTP_NAME("Tuning: 32B Access Enable"), 29 XE_RTP_RULES(PLATFORM(DG2)), 30 XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS)) 31 }, 32 33 /* Xe2 */ 34 35 { XE_RTP_NAME("Tuning: L3 cache"), 36 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3499)), 37 XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, 38 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) 39 }, 40 { XE_RTP_NAME("Tuning: L3 cache - media"), 41 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, 3499)), 42 XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, 43 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) 44 }, 45 { XE_RTP_NAME("Tuning: Compression Overfetch"), 46 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), 47 FUNC(xe_rtp_match_has_flat_ccs)), 48 XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX), 49 SET(CCCHKNREG1, L3CMPCTRL)) 50 }, 51 { XE_RTP_NAME("Tuning: Compression Overfetch - media"), 52 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)), 53 XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX), 54 SET(XE2LPM_CCCHKNREG1, L3CMPCTRL)) 55 }, 56 { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"), 57 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3499)), 58 XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN)) 59 }, 60 { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"), 61 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)), 62 XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN)) 63 }, 64 { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"), 65 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), 66 FUNC(xe_rtp_match_has_flat_ccs)), 67 XE_RTP_ACTIONS(SET(L3SQCREG2, 68 COMPMEMRD256BOVRFETCHEN)) 69 }, 70 { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"), 71 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED), 72 FUNC(xe_rtp_match_has_flat_ccs)), 73 XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2, 74 COMPMEMRD256BOVRFETCHEN)) 75 }, 76 { XE_RTP_NAME("Tuning: Stateless compression control"), 77 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), 78 XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, 79 REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) 80 }, 81 { XE_RTP_NAME("Tuning: Stateless compression control - media"), 82 XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, XE_RTP_END_VERSION_UNDEFINED)), 83 XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, 84 REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) 85 }, 86 { XE_RTP_NAME("Tuning: L3 RW flush all Cache"), 87 XE_RTP_RULES(GRAPHICS_VERSION(2004)), 88 XE_RTP_ACTIONS(SET(SCRATCH3_LBCF, RWFLUSHALLEN)) 89 }, 90 { XE_RTP_NAME("Tuning: L3 RW flush all cache - media"), 91 XE_RTP_RULES(MEDIA_VERSION(2000)), 92 XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN)) 93 }, 94 95 /* Xe3p */ 96 97 { XE_RTP_NAME("Tuning: Set STLB Bank Hash Mode to 4KB"), 98 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3510, XE_RTP_END_VERSION_UNDEFINED), 99 IS_INTEGRATED), 100 XE_RTP_ACTIONS(FIELD_SET(XEHP_GAMSTLB_CTRL, BANK_HASH_MODE, 101 BANK_HASH_4KB_MODE)) 102 }, 103 }; 104 105 static const struct xe_rtp_entry_sr engine_tunings[] = { 106 { XE_RTP_NAME("Tuning: L3 Hashing Mask"), 107 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), 108 FUNC(xe_rtp_match_first_render_or_compute)), 109 XE_RTP_ACTIONS(CLR(XELP_GARBCNTL, XELP_BUS_HASH_CTL_BIT_EXC)) 110 }, 111 { XE_RTP_NAME("Tuning: Set Indirect State Override"), 112 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1274), 113 ENGINE_CLASS(RENDER)), 114 XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE)) 115 }, 116 { XE_RTP_NAME("Tuning: Disable NULL query for Anyhit Shader"), 117 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED), 118 FUNC(xe_rtp_match_first_render_or_compute)), 119 XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) 120 }, 121 { XE_RTP_NAME("Tuning: disable HW reporting of ctx switch to GHWSP"), 122 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3500, XE_RTP_END_VERSION_UNDEFINED)), 123 XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), 124 GHWSP_CSB_REPORT_DIS, 125 XE_RTP_ACTION_FLAG(ENGINE_BASE))) 126 }, 127 }; 128 129 static const struct xe_rtp_entry_sr lrc_tunings[] = { 130 { XE_RTP_NAME("Tuning: Windower HW Filtering"), 131 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3599), ENGINE_CLASS(RENDER)), 132 XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, HW_FILTERING)) 133 }, 134 135 /* DG2 */ 136 137 { XE_RTP_NAME("Tuning: L3 cache"), 138 XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), 139 XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, 140 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) 141 }, 142 { XE_RTP_NAME("Tuning: TDS gang timer"), 143 XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), 144 /* read verification is ignored as in i915 - need to check enabling */ 145 XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2, 146 FF_MODE2_TDS_TIMER_MASK, 147 FF_MODE2_TDS_TIMER_128)) 148 }, 149 { XE_RTP_NAME("Tuning: TBIMR fast clip"), 150 XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), 151 XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP)) 152 }, 153 154 /* Xe_LPG */ 155 156 { XE_RTP_NAME("Tuning: L3 cache"), 157 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), ENGINE_CLASS(RENDER)), 158 XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, 159 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) 160 }, 161 162 /* Xe2_HPG */ 163 164 { XE_RTP_NAME("Tuning: vs hit max value"), 165 XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), 166 XE_RTP_ACTIONS(FIELD_SET(FF_MODE, VS_HIT_MAX_VALUE_MASK, 167 REG_FIELD_PREP(VS_HIT_MAX_VALUE_MASK, 0x3f))) 168 }, 169 }; 170 171 /** 172 * xe_tuning_init - initialize gt with tunings bookkeeping 173 * @gt: GT instance to initialize 174 * 175 * Returns 0 for success, negative error code otherwise. 176 */ 177 int xe_tuning_init(struct xe_gt *gt) 178 { 179 struct xe_device *xe = gt_to_xe(gt); 180 size_t n_lrc, n_engine, n_gt, total; 181 unsigned long *p; 182 183 n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_tunings)); 184 n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_tunings)); 185 n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_tunings)); 186 total = n_gt + n_engine + n_lrc; 187 188 p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL); 189 if (!p) 190 return -ENOMEM; 191 192 gt->tuning_active.gt = p; 193 p += n_gt; 194 gt->tuning_active.engine = p; 195 p += n_engine; 196 gt->tuning_active.lrc = p; 197 198 return 0; 199 } 200 ALLOW_ERROR_INJECTION(xe_tuning_init, ERRNO); /* See xe_pci_probe() */ 201 202 void xe_tuning_process_gt(struct xe_gt *gt) 203 { 204 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); 205 206 xe_rtp_process_ctx_enable_active_tracking(&ctx, 207 gt->tuning_active.gt, 208 ARRAY_SIZE(gt_tunings)); 209 xe_rtp_process_to_sr(&ctx, gt_tunings, ARRAY_SIZE(gt_tunings), 210 >->reg_sr, false); 211 } 212 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt); 213 214 void xe_tuning_process_engine(struct xe_hw_engine *hwe) 215 { 216 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); 217 218 xe_rtp_process_ctx_enable_active_tracking(&ctx, 219 hwe->gt->tuning_active.engine, 220 ARRAY_SIZE(engine_tunings)); 221 xe_rtp_process_to_sr(&ctx, engine_tunings, ARRAY_SIZE(engine_tunings), 222 &hwe->reg_sr, false); 223 } 224 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine); 225 226 /** 227 * xe_tuning_process_lrc - process lrc tunings 228 * @hwe: engine instance to process tunings for 229 * 230 * Process LRC table for this platform, saving in @hwe all the tunings that need 231 * to be applied on context restore. These are tunings touching registers that 232 * are part of the HW context image. 233 */ 234 void xe_tuning_process_lrc(struct xe_hw_engine *hwe) 235 { 236 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); 237 238 xe_rtp_process_ctx_enable_active_tracking(&ctx, 239 hwe->gt->tuning_active.lrc, 240 ARRAY_SIZE(lrc_tunings)); 241 xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), 242 &hwe->reg_lrc, true); 243 } 244 245 /** 246 * xe_tuning_dump() - Dump GT tuning info into a drm printer. 247 * @gt: the &xe_gt 248 * @p: the &drm_printer 249 * 250 * Return: always 0. 251 */ 252 int xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p) 253 { 254 size_t idx; 255 256 drm_printf(p, "GT Tunings\n"); 257 for_each_set_bit(idx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings)) 258 drm_printf_indent(p, 1, "%s\n", gt_tunings[idx].name); 259 260 drm_puts(p, "\n"); 261 drm_printf(p, "Engine Tunings\n"); 262 for_each_set_bit(idx, gt->tuning_active.engine, ARRAY_SIZE(engine_tunings)) 263 drm_printf_indent(p, 1, "%s\n", engine_tunings[idx].name); 264 265 drm_puts(p, "\n"); 266 drm_printf(p, "LRC Tunings\n"); 267 for_each_set_bit(idx, gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings)) 268 drm_printf_indent(p, 1, "%s\n", lrc_tunings[idx].name); 269 270 return 0; 271 } 272