1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_tuning.h" 7 8 #include <kunit/visibility.h> 9 10 #include <drm/drm_managed.h> 11 #include <drm/drm_print.h> 12 13 #include "regs/xe_gt_regs.h" 14 #include "xe_gt_types.h" 15 #include "xe_platform_types.h" 16 #include "xe_rtp.h" 17 18 #undef XE_REG_MCR 19 #define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) 20 21 static const struct xe_rtp_entry_sr gt_tunings[] = { 22 { XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"), 23 XE_RTP_RULES(PLATFORM(DG2)), 24 XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS)) 25 }, 26 { XE_RTP_NAME("Tuning: 32B Access Enable"), 27 XE_RTP_RULES(PLATFORM(DG2)), 28 XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS)) 29 }, 30 31 /* Xe2 */ 32 33 { XE_RTP_NAME("Tuning: L3 cache"), 34 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), 35 XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, 36 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) 37 }, 38 { XE_RTP_NAME("Tuning: L3 cache - media"), 39 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)), 40 XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, 41 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) 42 }, 43 { XE_RTP_NAME("Tuning: Compression Overfetch"), 44 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), 45 FUNC(xe_rtp_match_has_flat_ccs)), 46 XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX), 47 SET(CCCHKNREG1, L3CMPCTRL)) 48 }, 49 { XE_RTP_NAME("Tuning: Compression Overfetch - media"), 50 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)), 51 XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX), 52 SET(XE2LPM_CCCHKNREG1, L3CMPCTRL)) 53 }, 54 { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"), 55 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), 56 XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN)) 57 }, 58 { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"), 59 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)), 60 XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN)) 61 }, 62 { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"), 63 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), 64 FUNC(xe_rtp_match_has_flat_ccs)), 65 XE_RTP_ACTIONS(SET(L3SQCREG2, 66 COMPMEMRD256BOVRFETCHEN)) 67 }, 68 { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"), 69 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED), 70 FUNC(xe_rtp_match_has_flat_ccs)), 71 XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2, 72 COMPMEMRD256BOVRFETCHEN)) 73 }, 74 { XE_RTP_NAME("Tuning: Stateless compression control"), 75 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), 76 XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, 77 REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) 78 }, 79 { XE_RTP_NAME("Tuning: Stateless compression control - media"), 80 XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, XE_RTP_END_VERSION_UNDEFINED)), 81 XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, 82 REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) 83 }, 84 { XE_RTP_NAME("Tuning: L3 RW flush all Cache"), 85 XE_RTP_RULES(GRAPHICS_VERSION(2004)), 86 XE_RTP_ACTIONS(SET(SCRATCH3_LBCF, RWFLUSHALLEN)) 87 }, 88 { XE_RTP_NAME("Tuning: L3 RW flush all cache - media"), 89 XE_RTP_RULES(MEDIA_VERSION(2000)), 90 XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN)) 91 }, 92 }; 93 94 static const struct xe_rtp_entry_sr engine_tunings[] = { 95 { XE_RTP_NAME("Tuning: L3 Hashing Mask"), 96 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), 97 FUNC(xe_rtp_match_first_render_or_compute)), 98 XE_RTP_ACTIONS(CLR(XELP_GARBCNTL, XELP_BUS_HASH_CTL_BIT_EXC)) 99 }, 100 { XE_RTP_NAME("Tuning: Set Indirect State Override"), 101 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1274), 102 ENGINE_CLASS(RENDER)), 103 XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE)) 104 }, 105 { XE_RTP_NAME("Tuning: Disable NULL query for Anyhit Shader"), 106 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED), 107 FUNC(xe_rtp_match_first_render_or_compute)), 108 XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) 109 }, 110 }; 111 112 static const struct xe_rtp_entry_sr lrc_tunings[] = { 113 /* DG2 */ 114 115 { XE_RTP_NAME("Tuning: L3 cache"), 116 XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), 117 XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, 118 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) 119 }, 120 { XE_RTP_NAME("Tuning: TDS gang timer"), 121 XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), 122 /* read verification is ignored as in i915 - need to check enabling */ 123 XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2, 124 FF_MODE2_TDS_TIMER_MASK, 125 FF_MODE2_TDS_TIMER_128)) 126 }, 127 { XE_RTP_NAME("Tuning: TBIMR fast clip"), 128 XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), 129 XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP)) 130 }, 131 132 /* Xe_LPG */ 133 134 { XE_RTP_NAME("Tuning: L3 cache"), 135 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), ENGINE_CLASS(RENDER)), 136 XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, 137 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) 138 }, 139 140 /* Xe2_HPG */ 141 142 { XE_RTP_NAME("Tuning: vs hit max value"), 143 XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), 144 XE_RTP_ACTIONS(FIELD_SET(FF_MODE, VS_HIT_MAX_VALUE_MASK, 145 REG_FIELD_PREP(VS_HIT_MAX_VALUE_MASK, 0x3f))) 146 }, 147 }; 148 149 /** 150 * xe_tuning_init - initialize gt with tunings bookkeeping 151 * @gt: GT instance to initialize 152 * 153 * Returns 0 for success, negative error code otherwise. 154 */ 155 int xe_tuning_init(struct xe_gt *gt) 156 { 157 struct xe_device *xe = gt_to_xe(gt); 158 size_t n_lrc, n_engine, n_gt, total; 159 unsigned long *p; 160 161 n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_tunings)); 162 n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_tunings)); 163 n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_tunings)); 164 total = n_gt + n_engine + n_lrc; 165 166 p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL); 167 if (!p) 168 return -ENOMEM; 169 170 gt->tuning_active.gt = p; 171 p += n_gt; 172 gt->tuning_active.engine = p; 173 p += n_engine; 174 gt->tuning_active.lrc = p; 175 176 return 0; 177 } 178 ALLOW_ERROR_INJECTION(xe_tuning_init, ERRNO); /* See xe_pci_probe() */ 179 180 void xe_tuning_process_gt(struct xe_gt *gt) 181 { 182 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); 183 184 xe_rtp_process_ctx_enable_active_tracking(&ctx, 185 gt->tuning_active.gt, 186 ARRAY_SIZE(gt_tunings)); 187 xe_rtp_process_to_sr(&ctx, gt_tunings, ARRAY_SIZE(gt_tunings), >->reg_sr); 188 } 189 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt); 190 191 void xe_tuning_process_engine(struct xe_hw_engine *hwe) 192 { 193 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); 194 195 xe_rtp_process_ctx_enable_active_tracking(&ctx, 196 hwe->gt->tuning_active.engine, 197 ARRAY_SIZE(engine_tunings)); 198 xe_rtp_process_to_sr(&ctx, engine_tunings, ARRAY_SIZE(engine_tunings), 199 &hwe->reg_sr); 200 } 201 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine); 202 203 /** 204 * xe_tuning_process_lrc - process lrc tunings 205 * @hwe: engine instance to process tunings for 206 * 207 * Process LRC table for this platform, saving in @hwe all the tunings that need 208 * to be applied on context restore. These are tunings touching registers that 209 * are part of the HW context image. 210 */ 211 void xe_tuning_process_lrc(struct xe_hw_engine *hwe) 212 { 213 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); 214 215 xe_rtp_process_ctx_enable_active_tracking(&ctx, 216 hwe->gt->tuning_active.lrc, 217 ARRAY_SIZE(lrc_tunings)); 218 xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), &hwe->reg_lrc); 219 } 220 221 /** 222 * xe_tuning_dump() - Dump GT tuning info into a drm printer. 223 * @gt: the &xe_gt 224 * @p: the &drm_printer 225 * 226 * Return: always 0. 227 */ 228 int xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p) 229 { 230 size_t idx; 231 232 drm_printf(p, "GT Tunings\n"); 233 for_each_set_bit(idx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings)) 234 drm_printf_indent(p, 1, "%s\n", gt_tunings[idx].name); 235 236 drm_puts(p, "\n"); 237 drm_printf(p, "Engine Tunings\n"); 238 for_each_set_bit(idx, gt->tuning_active.engine, ARRAY_SIZE(engine_tunings)) 239 drm_printf_indent(p, 1, "%s\n", engine_tunings[idx].name); 240 241 drm_puts(p, "\n"); 242 drm_printf(p, "LRC Tunings\n"); 243 for_each_set_bit(idx, gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings)) 244 drm_printf_indent(p, 1, "%s\n", lrc_tunings[idx].name); 245 246 return 0; 247 } 248