1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. 4 */ 5 6 #include <linux/dim.h> 7 #include <linux/rtnetlink.h> 8 9 /* 10 * Net DIM profiles: 11 * There are different set of profiles for each CQ period mode. 12 * There are different set of profiles for RX/TX CQs. 13 * Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES 14 */ 15 #define NET_DIM_RX_EQE_PROFILES { \ 16 {.usec = 1, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ 17 {.usec = 8, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ 18 {.usec = 64, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ 19 {.usec = 128, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ 20 {.usec = 256, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,} \ 21 } 22 23 #define NET_DIM_RX_CQE_PROFILES { \ 24 {.usec = 2, .pkts = 256,}, \ 25 {.usec = 8, .pkts = 128,}, \ 26 {.usec = 16, .pkts = 64,}, \ 27 {.usec = 32, .pkts = 64,}, \ 28 {.usec = 64, .pkts = 64,} \ 29 } 30 31 #define NET_DIM_TX_EQE_PROFILES { \ 32 {.usec = 1, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ 33 {.usec = 8, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ 34 {.usec = 32, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ 35 {.usec = 64, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ 36 {.usec = 128, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,} \ 37 } 38 39 #define NET_DIM_TX_CQE_PROFILES { \ 40 {.usec = 5, .pkts = 128,}, \ 41 {.usec = 8, .pkts = 64,}, \ 42 {.usec = 16, .pkts = 32,}, \ 43 {.usec = 32, .pkts = 32,}, \ 44 {.usec = 64, .pkts = 32,} \ 45 } 46 47 static const struct dim_cq_moder 48 rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { 49 NET_DIM_RX_EQE_PROFILES, 50 NET_DIM_RX_CQE_PROFILES, 51 }; 52 53 static const struct dim_cq_moder 54 tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { 55 NET_DIM_TX_EQE_PROFILES, 56 NET_DIM_TX_CQE_PROFILES, 57 }; 58 59 struct dim_cq_moder 60 net_dim_get_rx_moderation(u8 cq_period_mode, int ix) 61 { 62 struct dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix]; 63 64 cq_moder.cq_period_mode = cq_period_mode; 65 return cq_moder; 66 } 67 EXPORT_SYMBOL(net_dim_get_rx_moderation); 68 69 struct dim_cq_moder 70 net_dim_get_def_rx_moderation(u8 cq_period_mode) 71 { 72 u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? 73 NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; 74 75 return net_dim_get_rx_moderation(cq_period_mode, profile_ix); 76 } 77 EXPORT_SYMBOL(net_dim_get_def_rx_moderation); 78 79 struct dim_cq_moder 80 net_dim_get_tx_moderation(u8 cq_period_mode, int ix) 81 { 82 struct dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix]; 83 84 cq_moder.cq_period_mode = cq_period_mode; 85 return cq_moder; 86 } 87 EXPORT_SYMBOL(net_dim_get_tx_moderation); 88 89 struct dim_cq_moder 90 net_dim_get_def_tx_moderation(u8 cq_period_mode) 91 { 92 u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? 93 NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; 94 95 return net_dim_get_tx_moderation(cq_period_mode, profile_ix); 96 } 97 EXPORT_SYMBOL(net_dim_get_def_tx_moderation); 98 99 int net_dim_init_irq_moder(struct net_device *dev, u8 profile_flags, 100 u8 coal_flags, u8 rx_mode, u8 tx_mode, 101 void (*rx_dim_work)(struct work_struct *work), 102 void (*tx_dim_work)(struct work_struct *work)) 103 { 104 struct dim_cq_moder *rxp = NULL, *txp; 105 struct dim_irq_moder *moder; 106 int len; 107 108 dev->irq_moder = kzalloc(sizeof(*dev->irq_moder), GFP_KERNEL); 109 if (!dev->irq_moder) 110 return -ENOMEM; 111 112 moder = dev->irq_moder; 113 len = NET_DIM_PARAMS_NUM_PROFILES * sizeof(*moder->rx_profile); 114 115 moder->coal_flags = coal_flags; 116 moder->profile_flags = profile_flags; 117 118 if (profile_flags & DIM_PROFILE_RX) { 119 moder->rx_dim_work = rx_dim_work; 120 moder->dim_rx_mode = rx_mode; 121 rxp = kmemdup(rx_profile[rx_mode], len, GFP_KERNEL); 122 if (!rxp) 123 goto free_moder; 124 125 rcu_assign_pointer(moder->rx_profile, rxp); 126 } 127 128 if (profile_flags & DIM_PROFILE_TX) { 129 moder->tx_dim_work = tx_dim_work; 130 moder->dim_tx_mode = tx_mode; 131 txp = kmemdup(tx_profile[tx_mode], len, GFP_KERNEL); 132 if (!txp) 133 goto free_rxp; 134 135 rcu_assign_pointer(moder->tx_profile, txp); 136 } 137 138 return 0; 139 140 free_rxp: 141 kfree(rxp); 142 free_moder: 143 kfree(moder); 144 return -ENOMEM; 145 } 146 EXPORT_SYMBOL(net_dim_init_irq_moder); 147 148 /* RTNL lock is held. */ 149 void net_dim_free_irq_moder(struct net_device *dev) 150 { 151 struct dim_cq_moder *rxp, *txp; 152 153 if (!dev->irq_moder) 154 return; 155 156 rxp = rtnl_dereference(dev->irq_moder->rx_profile); 157 txp = rtnl_dereference(dev->irq_moder->tx_profile); 158 159 rcu_assign_pointer(dev->irq_moder->rx_profile, NULL); 160 rcu_assign_pointer(dev->irq_moder->tx_profile, NULL); 161 162 kfree_rcu(rxp, rcu); 163 kfree_rcu(txp, rcu); 164 kfree(dev->irq_moder); 165 } 166 EXPORT_SYMBOL(net_dim_free_irq_moder); 167 168 void net_dim_setting(struct net_device *dev, struct dim *dim, bool is_tx) 169 { 170 struct dim_irq_moder *irq_moder = dev->irq_moder; 171 172 if (!irq_moder) 173 return; 174 175 if (is_tx) { 176 INIT_WORK(&dim->work, irq_moder->tx_dim_work); 177 dim->mode = READ_ONCE(irq_moder->dim_tx_mode); 178 return; 179 } 180 181 INIT_WORK(&dim->work, irq_moder->rx_dim_work); 182 dim->mode = READ_ONCE(irq_moder->dim_rx_mode); 183 } 184 EXPORT_SYMBOL(net_dim_setting); 185 186 void net_dim_work_cancel(struct dim *dim) 187 { 188 cancel_work_sync(&dim->work); 189 } 190 EXPORT_SYMBOL(net_dim_work_cancel); 191 192 struct dim_cq_moder net_dim_get_rx_irq_moder(struct net_device *dev, 193 struct dim *dim) 194 { 195 struct dim_cq_moder res, *profile; 196 197 rcu_read_lock(); 198 profile = rcu_dereference(dev->irq_moder->rx_profile); 199 res = profile[dim->profile_ix]; 200 rcu_read_unlock(); 201 202 res.cq_period_mode = dim->mode; 203 204 return res; 205 } 206 EXPORT_SYMBOL(net_dim_get_rx_irq_moder); 207 208 struct dim_cq_moder net_dim_get_tx_irq_moder(struct net_device *dev, 209 struct dim *dim) 210 { 211 struct dim_cq_moder res, *profile; 212 213 rcu_read_lock(); 214 profile = rcu_dereference(dev->irq_moder->tx_profile); 215 res = profile[dim->profile_ix]; 216 rcu_read_unlock(); 217 218 res.cq_period_mode = dim->mode; 219 220 return res; 221 } 222 EXPORT_SYMBOL(net_dim_get_tx_irq_moder); 223 224 void net_dim_set_rx_mode(struct net_device *dev, u8 rx_mode) 225 { 226 WRITE_ONCE(dev->irq_moder->dim_rx_mode, rx_mode); 227 } 228 EXPORT_SYMBOL(net_dim_set_rx_mode); 229 230 void net_dim_set_tx_mode(struct net_device *dev, u8 tx_mode) 231 { 232 WRITE_ONCE(dev->irq_moder->dim_tx_mode, tx_mode); 233 } 234 EXPORT_SYMBOL(net_dim_set_tx_mode); 235 236 static int net_dim_step(struct dim *dim) 237 { 238 if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2)) 239 return DIM_TOO_TIRED; 240 241 switch (dim->tune_state) { 242 case DIM_PARKING_ON_TOP: 243 case DIM_PARKING_TIRED: 244 break; 245 case DIM_GOING_RIGHT: 246 if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1)) 247 return DIM_ON_EDGE; 248 dim->profile_ix++; 249 dim->steps_right++; 250 break; 251 case DIM_GOING_LEFT: 252 if (dim->profile_ix == 0) 253 return DIM_ON_EDGE; 254 dim->profile_ix--; 255 dim->steps_left++; 256 break; 257 } 258 259 dim->tired++; 260 return DIM_STEPPED; 261 } 262 263 static void net_dim_exit_parking(struct dim *dim) 264 { 265 dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT : DIM_GOING_RIGHT; 266 net_dim_step(dim); 267 } 268 269 static int net_dim_stats_compare(struct dim_stats *curr, 270 struct dim_stats *prev) 271 { 272 if (!prev->bpms) 273 return curr->bpms ? DIM_STATS_BETTER : DIM_STATS_SAME; 274 275 if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms)) 276 return (curr->bpms > prev->bpms) ? DIM_STATS_BETTER : 277 DIM_STATS_WORSE; 278 279 if (!prev->ppms) 280 return curr->ppms ? DIM_STATS_BETTER : 281 DIM_STATS_SAME; 282 283 if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms)) 284 return (curr->ppms > prev->ppms) ? DIM_STATS_BETTER : 285 DIM_STATS_WORSE; 286 287 if (!prev->epms) 288 return DIM_STATS_SAME; 289 290 if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms)) 291 return (curr->epms < prev->epms) ? DIM_STATS_BETTER : 292 DIM_STATS_WORSE; 293 294 return DIM_STATS_SAME; 295 } 296 297 static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim) 298 { 299 int prev_state = dim->tune_state; 300 int prev_ix = dim->profile_ix; 301 int stats_res; 302 int step_res; 303 304 switch (dim->tune_state) { 305 case DIM_PARKING_ON_TOP: 306 stats_res = net_dim_stats_compare(curr_stats, 307 &dim->prev_stats); 308 if (stats_res != DIM_STATS_SAME) 309 net_dim_exit_parking(dim); 310 break; 311 312 case DIM_PARKING_TIRED: 313 dim->tired--; 314 if (!dim->tired) 315 net_dim_exit_parking(dim); 316 break; 317 318 case DIM_GOING_RIGHT: 319 case DIM_GOING_LEFT: 320 stats_res = net_dim_stats_compare(curr_stats, 321 &dim->prev_stats); 322 if (stats_res != DIM_STATS_BETTER) 323 dim_turn(dim); 324 325 if (dim_on_top(dim)) { 326 dim_park_on_top(dim); 327 break; 328 } 329 330 step_res = net_dim_step(dim); 331 switch (step_res) { 332 case DIM_ON_EDGE: 333 dim_park_on_top(dim); 334 break; 335 case DIM_TOO_TIRED: 336 dim_park_tired(dim); 337 break; 338 } 339 340 break; 341 } 342 343 if (prev_state != DIM_PARKING_ON_TOP || 344 dim->tune_state != DIM_PARKING_ON_TOP) 345 dim->prev_stats = *curr_stats; 346 347 return dim->profile_ix != prev_ix; 348 } 349 350 void net_dim(struct dim *dim, const struct dim_sample *end_sample) 351 { 352 struct dim_stats curr_stats; 353 u16 nevents; 354 355 switch (dim->state) { 356 case DIM_MEASURE_IN_PROGRESS: 357 nevents = BIT_GAP(BITS_PER_TYPE(u16), 358 end_sample->event_ctr, 359 dim->start_sample.event_ctr); 360 if (nevents < DIM_NEVENTS) 361 break; 362 if (!dim_calc_stats(&dim->start_sample, end_sample, &curr_stats)) 363 break; 364 if (net_dim_decision(&curr_stats, dim)) { 365 dim->state = DIM_APPLY_NEW_PROFILE; 366 schedule_work(&dim->work); 367 break; 368 } 369 fallthrough; 370 case DIM_START_MEASURE: 371 dim_update_sample(end_sample->event_ctr, end_sample->pkt_ctr, 372 end_sample->byte_ctr, &dim->start_sample); 373 dim->state = DIM_MEASURE_IN_PROGRESS; 374 break; 375 case DIM_APPLY_NEW_PROFILE: 376 break; 377 } 378 } 379 EXPORT_SYMBOL(net_dim); 380