1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Copyright 2023 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: AMD 24 * 25 */ 26 27 #include "dml2_policy.h" 28 29 static void get_optimal_ntuple( 30 const struct soc_bounding_box_st *socbb, 31 struct soc_state_bounding_box_st *entry) 32 { 33 if (entry->dcfclk_mhz > 0) { 34 float bw_on_sdp = (float)(entry->dcfclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); 35 36 entry->fabricclk_mhz = bw_on_sdp / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); 37 entry->dram_speed_mts = bw_on_sdp / (socbb->num_chans * 38 socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); 39 } else if (entry->fabricclk_mhz > 0) { 40 float bw_on_fabric = (float)(entry->fabricclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); 41 42 entry->dcfclk_mhz = bw_on_fabric / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); 43 entry->dram_speed_mts = bw_on_fabric / (socbb->num_chans * 44 socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); 45 } else if (entry->dram_speed_mts > 0) { 46 float bw_on_dram = (float)(entry->dram_speed_mts * socbb->num_chans * 47 socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); 48 49 entry->fabricclk_mhz = bw_on_dram / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); 50 entry->dcfclk_mhz = bw_on_dram / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); 51 } 52 } 53 54 static float calculate_net_bw_in_mbytes_sec(const struct soc_bounding_box_st *socbb, 55 struct soc_state_bounding_box_st *entry) 56 { 57 float memory_bw_mbytes_sec = (float)(entry->dram_speed_mts * socbb->num_chans * 58 socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); 59 60 float fabric_bw_mbytes_sec = (float)(entry->fabricclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); 61 62 float sdp_bw_mbytes_sec = (float)(entry->dcfclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); 63 64 float limiting_bw_mbytes_sec = memory_bw_mbytes_sec; 65 66 if (fabric_bw_mbytes_sec < limiting_bw_mbytes_sec) 67 limiting_bw_mbytes_sec = fabric_bw_mbytes_sec; 68 69 if (sdp_bw_mbytes_sec < limiting_bw_mbytes_sec) 70 limiting_bw_mbytes_sec = sdp_bw_mbytes_sec; 71 72 return limiting_bw_mbytes_sec; 73 } 74 75 static void insert_entry_into_table_sorted(const struct soc_bounding_box_st *socbb, 76 struct soc_states_st *table, 77 struct soc_state_bounding_box_st *entry) 78 { 79 int index = 0; 80 int i = 0; 81 float net_bw_of_new_state = 0; 82 83 get_optimal_ntuple(socbb, entry); 84 85 if (table->num_states == 0) { 86 index = 0; 87 } else { 88 net_bw_of_new_state = calculate_net_bw_in_mbytes_sec(socbb, entry); 89 while (net_bw_of_new_state > calculate_net_bw_in_mbytes_sec(socbb, &table->state_array[index])) { 90 index++; 91 if (index >= (int) table->num_states) 92 break; 93 } 94 95 for (i = table->num_states; i > index; i--) { 96 table->state_array[i] = table->state_array[i - 1]; 97 } 98 //ASSERT(index < MAX_CLK_TABLE_SIZE); 99 } 100 101 table->state_array[index] = *entry; 102 table->state_array[index].dcfclk_mhz = (int)entry->dcfclk_mhz; 103 table->state_array[index].fabricclk_mhz = (int)entry->fabricclk_mhz; 104 table->state_array[index].dram_speed_mts = (int)entry->dram_speed_mts; 105 table->num_states++; 106 } 107 108 static void remove_entry_from_table_at_index(struct soc_states_st *table, 109 unsigned int index) 110 { 111 int i; 112 113 if (table->num_states == 0) 114 return; 115 116 for (i = index; i < (int) table->num_states - 1; i++) { 117 table->state_array[i] = table->state_array[i + 1]; 118 } 119 memset(&table->state_array[--table->num_states], 0, sizeof(struct soc_state_bounding_box_st)); 120 } 121 122 int dml2_policy_build_synthetic_soc_states(struct dml2_policy_build_synthetic_soc_states_scratch *s, 123 struct dml2_policy_build_synthetic_soc_states_params *p) 124 { 125 int i, j; 126 unsigned int min_fclk_mhz = p->in_states->state_array[0].fabricclk_mhz; 127 unsigned int min_dcfclk_mhz = p->in_states->state_array[0].dcfclk_mhz; 128 unsigned int min_socclk_mhz = p->in_states->state_array[0].socclk_mhz; 129 130 int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, 131 max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, 132 max_uclk_mhz = 0, max_socclk_mhz = 0; 133 134 int num_uclk_dpms = 0, num_fclk_dpms = 0; 135 136 for (i = 0; i < __DML_MAX_STATE_ARRAY_SIZE__; i++) { 137 if (p->in_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz) 138 max_dcfclk_mhz = (int) p->in_states->state_array[i].dcfclk_mhz; 139 if (p->in_states->state_array[i].fabricclk_mhz > max_fclk_mhz) 140 max_fclk_mhz = (int) p->in_states->state_array[i].fabricclk_mhz; 141 if (p->in_states->state_array[i].socclk_mhz > max_socclk_mhz) 142 max_socclk_mhz = (int) p->in_states->state_array[i].socclk_mhz; 143 if (p->in_states->state_array[i].dram_speed_mts > max_uclk_mhz) 144 max_uclk_mhz = (int) p->in_states->state_array[i].dram_speed_mts; 145 if (p->in_states->state_array[i].dispclk_mhz > max_dispclk_mhz) 146 max_dispclk_mhz = (int) p->in_states->state_array[i].dispclk_mhz; 147 if (p->in_states->state_array[i].dppclk_mhz > max_dppclk_mhz) 148 max_dppclk_mhz = (int) p->in_states->state_array[i].dppclk_mhz; 149 if (p->in_states->state_array[i].phyclk_mhz > max_phyclk_mhz) 150 max_phyclk_mhz = (int)p->in_states->state_array[i].phyclk_mhz; 151 if (p->in_states->state_array[i].dtbclk_mhz > max_dtbclk_mhz) 152 max_dtbclk_mhz = (int)p->in_states->state_array[i].dtbclk_mhz; 153 154 if (p->in_states->state_array[i].fabricclk_mhz > 0) 155 num_fclk_dpms++; 156 if (p->in_states->state_array[i].dram_speed_mts > 0) 157 num_uclk_dpms++; 158 } 159 160 if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dppclk_mhz || !max_phyclk_mhz || !max_dtbclk_mhz) 161 return -1; 162 163 p->out_states->num_states = 0; 164 165 s->entry = p->in_states->state_array[0]; 166 167 s->entry.dispclk_mhz = max_dispclk_mhz; 168 s->entry.dppclk_mhz = max_dppclk_mhz; 169 s->entry.dtbclk_mhz = max_dtbclk_mhz; 170 s->entry.phyclk_mhz = max_phyclk_mhz; 171 172 s->entry.dscclk_mhz = max_dispclk_mhz / 3; 173 s->entry.phyclk_mhz = max_phyclk_mhz; 174 s->entry.dtbclk_mhz = max_dtbclk_mhz; 175 176 // Insert all the DCFCLK STAs first 177 for (i = 0; i < p->num_dcfclk_stas; i++) { 178 s->entry.dcfclk_mhz = p->dcfclk_stas_mhz[i]; 179 s->entry.fabricclk_mhz = 0; 180 s->entry.dram_speed_mts = 0; 181 if (i > 0) 182 s->entry.socclk_mhz = max_socclk_mhz; 183 184 insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); 185 } 186 187 // Insert the UCLK DPMS 188 for (i = 0; i < num_uclk_dpms; i++) { 189 s->entry.dcfclk_mhz = 0; 190 s->entry.fabricclk_mhz = 0; 191 s->entry.dram_speed_mts = p->in_states->state_array[i].dram_speed_mts; 192 if (i == 0) { 193 s->entry.socclk_mhz = min_socclk_mhz; 194 } else { 195 s->entry.socclk_mhz = max_socclk_mhz; 196 } 197 198 insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); 199 } 200 201 // Insert FCLK DPMs (if present) 202 if (num_fclk_dpms > 2) { 203 for (i = 0; i < num_fclk_dpms; i++) { 204 s->entry.dcfclk_mhz = 0; 205 s->entry.fabricclk_mhz = p->in_states->state_array[i].fabricclk_mhz; 206 s->entry.dram_speed_mts = 0; 207 208 insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); 209 } 210 } 211 // Add max FCLK 212 else { 213 s->entry.dcfclk_mhz = 0; 214 s->entry.fabricclk_mhz = p->in_states->state_array[num_fclk_dpms - 1].fabricclk_mhz; 215 s->entry.dram_speed_mts = 0; 216 217 insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry); 218 } 219 220 // Remove states that require higher clocks than are supported 221 for (i = p->out_states->num_states - 1; i >= 0; i--) { 222 if (p->out_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz || 223 p->out_states->state_array[i].fabricclk_mhz > max_fclk_mhz || 224 p->out_states->state_array[i].dram_speed_mts > max_uclk_mhz) 225 remove_entry_from_table_at_index(p->out_states, i); 226 } 227 228 // At this point, the table contains all "points of interest" based on 229 // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock 230 // ratios (by derate, are exact). 231 232 // Round up UCLK to DPMs 233 for (i = p->out_states->num_states - 1; i >= 0; i--) { 234 for (j = 0; j < num_uclk_dpms; j++) { 235 if (p->in_states->state_array[j].dram_speed_mts >= p->out_states->state_array[i].dram_speed_mts) { 236 p->out_states->state_array[i].dram_speed_mts = p->in_states->state_array[j].dram_speed_mts; 237 break; 238 } 239 } 240 } 241 242 // If FCLK is coarse grained, round up to next DPMs 243 if (num_fclk_dpms > 2) { 244 for (i = p->out_states->num_states - 1; i >= 0; i--) { 245 for (j = 0; j < num_fclk_dpms; j++) { 246 if (p->in_states->state_array[j].fabricclk_mhz >= p->out_states->state_array[i].fabricclk_mhz) { 247 p->out_states->state_array[i].fabricclk_mhz = p->in_states->state_array[j].fabricclk_mhz; 248 break; 249 } 250 } 251 } 252 } 253 254 // Clamp to min FCLK/DCFCLK 255 for (i = p->out_states->num_states - 1; i >= 0; i--) { 256 if (p->out_states->state_array[i].fabricclk_mhz < min_fclk_mhz) { 257 p->out_states->state_array[i].fabricclk_mhz = min_fclk_mhz; 258 } 259 if (p->out_states->state_array[i].dcfclk_mhz < min_dcfclk_mhz) { 260 p->out_states->state_array[i].dcfclk_mhz = min_dcfclk_mhz; 261 } 262 } 263 264 // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. 265 i = 0; 266 while (i < (int) p->out_states->num_states - 1) { 267 if (p->out_states->state_array[i].dcfclk_mhz == p->out_states->state_array[i + 1].dcfclk_mhz && 268 p->out_states->state_array[i].fabricclk_mhz == p->out_states->state_array[i + 1].fabricclk_mhz && 269 p->out_states->state_array[i].dram_speed_mts == p->out_states->state_array[i + 1].dram_speed_mts) 270 remove_entry_from_table_at_index(p->out_states, i); 271 else 272 i++; 273 } 274 275 return 0; 276 } 277 278 void build_unoptimized_policy_settings(enum dml_project_id project, struct dml_mode_eval_policy_st *policy) 279 { 280 for (int i = 0; i < __DML_NUM_PLANES__; i++) { 281 policy->MPCCombineUse[i] = dml_mpc_as_needed_for_voltage; // TOREVIEW: Is this still needed? When is MPCC useful for pstate given CRB? 282 policy->ODMUse[i] = dml_odm_use_policy_combine_as_needed; 283 policy->ImmediateFlipRequirement[i] = dml_immediate_flip_required; 284 policy->AllowForPStateChangeOrStutterInVBlank[i] = dml_prefetch_support_uclk_fclk_and_stutter_if_possible; 285 } 286 287 /* Change the default policy initializations as per spreadsheet. We might need to 288 * review and change them later on as per Jun's earlier comments. 289 */ 290 policy->UseUnboundedRequesting = dml_unbounded_requesting_enable; 291 policy->UseMinimumRequiredDCFCLK = false; 292 policy->DRAMClockChangeRequirementFinal = true; // TOREVIEW: What does this mean? 293 policy->FCLKChangeRequirementFinal = true; // TOREVIEW: What does this mean? 294 policy->USRRetrainingRequiredFinal = true; 295 policy->EnhancedPrefetchScheduleAccelerationFinal = true; // TOREVIEW: What does this mean? 296 policy->NomDETInKByteOverrideEnable = false; 297 policy->NomDETInKByteOverrideValue = 0; 298 policy->DCCProgrammingAssumesScanDirectionUnknownFinal = true; 299 policy->SynchronizeTimingsFinal = true; 300 policy->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = true; 301 policy->AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported = true; // TOREVIEW: What does this mean? 302 policy->AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported = true; // TOREVIEW: What does this mean? 303 if (project == dml_project_dcn35 || 304 project == dml_project_dcn351) { 305 policy->DCCProgrammingAssumesScanDirectionUnknownFinal = false; 306 policy->EnhancedPrefetchScheduleAccelerationFinal = 0; 307 policy->AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter_if_possible; /*new*/ 308 policy->UseOnlyMaxPrefetchModes = 1; 309 } 310 } 311