1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_tuning.h" 7 8 #include <kunit/visibility.h> 9 10 #include <drm/drm_managed.h> 11 12 #include "regs/xe_gt_regs.h" 13 #include "xe_gt_types.h" 14 #include "xe_platform_types.h" 15 #include "xe_rtp.h" 16 17 #undef XE_REG_MCR 18 #define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) 19 20 static const struct xe_rtp_entry_sr gt_tunings[] = { 21 { XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable"), 22 XE_RTP_RULES(PLATFORM(DG2)), 23 XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS)) 24 }, 25 { XE_RTP_NAME("Tuning: 32B Access Enable"), 26 XE_RTP_RULES(PLATFORM(DG2)), 27 XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS)) 28 }, 29 30 /* Xe2 */ 31 32 { XE_RTP_NAME("Tuning: L3 cache"), 33 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), 34 XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, 35 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) 36 }, 37 { XE_RTP_NAME("Tuning: L3 cache - media"), 38 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)), 39 XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, 40 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) 41 }, 42 { XE_RTP_NAME("Tuning: Compression Overfetch"), 43 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), 44 FUNC(xe_rtp_match_has_flat_ccs)), 45 XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX), 46 SET(CCCHKNREG1, L3CMPCTRL)) 47 }, 48 { XE_RTP_NAME("Tuning: Compression Overfetch - media"), 49 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)), 50 XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX), 51 SET(XE2LPM_CCCHKNREG1, L3CMPCTRL)) 52 }, 53 { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"), 54 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), 55 XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN)) 56 }, 57 { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"), 58 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)), 59 XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN)) 60 }, 61 { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"), 62 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), 63 FUNC(xe_rtp_match_has_flat_ccs)), 64 XE_RTP_ACTIONS(SET(L3SQCREG2, 65 COMPMEMRD256BOVRFETCHEN)) 66 }, 67 { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"), 68 XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED), 69 FUNC(xe_rtp_match_has_flat_ccs)), 70 XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2, 71 COMPMEMRD256BOVRFETCHEN)) 72 }, 73 { XE_RTP_NAME("Tuning: Stateless compression control"), 74 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), 75 XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, 76 REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) 77 }, 78 { XE_RTP_NAME("Tuning: Stateless compression control - media"), 79 XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, XE_RTP_END_VERSION_UNDEFINED)), 80 XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, 81 REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) 82 }, 83 { XE_RTP_NAME("Tuning: L3 RW flush all Cache"), 84 XE_RTP_RULES(GRAPHICS_VERSION(2004)), 85 XE_RTP_ACTIONS(SET(SCRATCH3_LBCF, RWFLUSHALLEN)) 86 }, 87 { XE_RTP_NAME("Tuning: L3 RW flush all cache - media"), 88 XE_RTP_RULES(MEDIA_VERSION(2000)), 89 XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN)) 90 }, 91 }; 92 93 static const struct xe_rtp_entry_sr engine_tunings[] = { 94 { XE_RTP_NAME("Tuning: L3 Hashing Mask"), 95 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), 96 FUNC(xe_rtp_match_first_render_or_compute)), 97 XE_RTP_ACTIONS(CLR(XELP_GARBCNTL, XELP_BUS_HASH_CTL_BIT_EXC)) 98 }, 99 { XE_RTP_NAME("Tuning: Set Indirect State Override"), 100 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1274), 101 ENGINE_CLASS(RENDER)), 102 XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE)) 103 }, 104 { XE_RTP_NAME("Tuning: Disable NULL query for Anyhit Shader"), 105 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED), 106 FUNC(xe_rtp_match_first_render_or_compute)), 107 XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) 108 }, 109 }; 110 111 static const struct xe_rtp_entry_sr lrc_tunings[] = { 112 /* DG2 */ 113 114 { XE_RTP_NAME("Tuning: L3 cache"), 115 XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), 116 XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, 117 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) 118 }, 119 { XE_RTP_NAME("Tuning: TDS gang timer"), 120 XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), 121 /* read verification is ignored as in i915 - need to check enabling */ 122 XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2, 123 FF_MODE2_TDS_TIMER_MASK, 124 FF_MODE2_TDS_TIMER_128)) 125 }, 126 { XE_RTP_NAME("Tuning: TBIMR fast clip"), 127 XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), 128 XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP)) 129 }, 130 131 /* Xe_LPG */ 132 133 { XE_RTP_NAME("Tuning: L3 cache"), 134 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), ENGINE_CLASS(RENDER)), 135 XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, 136 REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) 137 }, 138 139 /* Xe2_HPG */ 140 141 { XE_RTP_NAME("Tuning: vs hit max value"), 142 XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), 143 XE_RTP_ACTIONS(FIELD_SET(FF_MODE, VS_HIT_MAX_VALUE_MASK, 144 REG_FIELD_PREP(VS_HIT_MAX_VALUE_MASK, 0x3f))) 145 }, 146 }; 147 148 /** 149 * xe_tuning_init - initialize gt with tunings bookkeeping 150 * @gt: GT instance to initialize 151 * 152 * Returns 0 for success, negative error code otherwise. 153 */ 154 int xe_tuning_init(struct xe_gt *gt) 155 { 156 struct xe_device *xe = gt_to_xe(gt); 157 size_t n_lrc, n_engine, n_gt, total; 158 unsigned long *p; 159 160 n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_tunings)); 161 n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_tunings)); 162 n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_tunings)); 163 total = n_gt + n_engine + n_lrc; 164 165 p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL); 166 if (!p) 167 return -ENOMEM; 168 169 gt->tuning_active.gt = p; 170 p += n_gt; 171 gt->tuning_active.engine = p; 172 p += n_engine; 173 gt->tuning_active.lrc = p; 174 175 return 0; 176 } 177 ALLOW_ERROR_INJECTION(xe_tuning_init, ERRNO); /* See xe_pci_probe() */ 178 179 void xe_tuning_process_gt(struct xe_gt *gt) 180 { 181 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); 182 183 xe_rtp_process_ctx_enable_active_tracking(&ctx, 184 gt->tuning_active.gt, 185 ARRAY_SIZE(gt_tunings)); 186 xe_rtp_process_to_sr(&ctx, gt_tunings, ARRAY_SIZE(gt_tunings), >->reg_sr); 187 } 188 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt); 189 190 void xe_tuning_process_engine(struct xe_hw_engine *hwe) 191 { 192 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); 193 194 xe_rtp_process_ctx_enable_active_tracking(&ctx, 195 hwe->gt->tuning_active.engine, 196 ARRAY_SIZE(engine_tunings)); 197 xe_rtp_process_to_sr(&ctx, engine_tunings, ARRAY_SIZE(engine_tunings), 198 &hwe->reg_sr); 199 } 200 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine); 201 202 /** 203 * xe_tuning_process_lrc - process lrc tunings 204 * @hwe: engine instance to process tunings for 205 * 206 * Process LRC table for this platform, saving in @hwe all the tunings that need 207 * to be applied on context restore. These are tunings touching registers that 208 * are part of the HW context image. 209 */ 210 void xe_tuning_process_lrc(struct xe_hw_engine *hwe) 211 { 212 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); 213 214 xe_rtp_process_ctx_enable_active_tracking(&ctx, 215 hwe->gt->tuning_active.lrc, 216 ARRAY_SIZE(lrc_tunings)); 217 xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), &hwe->reg_lrc); 218 } 219 220 /** 221 * xe_tuning_dump() - Dump GT tuning info into a drm printer. 222 * @gt: the &xe_gt 223 * @p: the &drm_printer 224 * 225 * Return: always 0. 226 */ 227 int xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p) 228 { 229 size_t idx; 230 231 drm_printf(p, "GT Tunings\n"); 232 for_each_set_bit(idx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings)) 233 drm_printf_indent(p, 1, "%s\n", gt_tunings[idx].name); 234 235 drm_puts(p, "\n"); 236 drm_printf(p, "Engine Tunings\n"); 237 for_each_set_bit(idx, gt->tuning_active.engine, ARRAY_SIZE(engine_tunings)) 238 drm_printf_indent(p, 1, "%s\n", engine_tunings[idx].name); 239 240 drm_puts(p, "\n"); 241 drm_printf(p, "LRC Tunings\n"); 242 for_each_set_bit(idx, gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings)) 243 drm_printf_indent(p, 1, "%s\n", lrc_tunings[idx].name); 244 245 return 0; 246 } 247