xref: /linux/drivers/net/ethernet/intel/ice/ice_dcb_lib.c (revision add452d09a38c7a7c44aea55c1015392cebf9fa7)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2019, Intel Corporation. */
3 
4 #include "ice_dcb_lib.h"
5 #include "ice_dcb_nl.h"
6 #include "devlink/devlink.h"
7 
8 /**
9  * ice_dcb_get_ena_tc - return bitmap of enabled TCs
10  * @dcbcfg: DCB config to evaluate for enabled TCs
11  */
12 static u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg)
13 {
14 	u8 i, num_tc, ena_tc = 1;
15 
16 	num_tc = ice_dcb_get_num_tc(dcbcfg);
17 
18 	for (i = 0; i < num_tc; i++)
19 		ena_tc |= BIT(i);
20 
21 	return ena_tc;
22 }
23 
24 /**
25  * ice_is_pfc_causing_hung_q
26  * @pf: pointer to PF structure
27  * @txqueue: Tx queue which is supposedly hung queue
28  *
29  * find if PFC is causing the hung queue, if yes return true else false
30  */
31 bool ice_is_pfc_causing_hung_q(struct ice_pf *pf, unsigned int txqueue)
32 {
33 	u8 num_tcs = 0, i, tc, up_mapped_tc, up_in_tc = 0;
34 	u64 ref_prio_xoff[ICE_MAX_UP];
35 	struct ice_vsi *vsi;
36 	u32 up2tc;
37 
38 	vsi = ice_get_main_vsi(pf);
39 	if (!vsi)
40 		return false;
41 
42 	ice_for_each_traffic_class(i)
43 		if (vsi->tc_cfg.ena_tc & BIT(i))
44 			num_tcs++;
45 
46 	/* first find out the TC to which the hung queue belongs to */
47 	for (tc = 0; tc < num_tcs - 1; tc++)
48 		if (ice_find_q_in_range(vsi->tc_cfg.tc_info[tc].qoffset,
49 					vsi->tc_cfg.tc_info[tc + 1].qoffset,
50 					txqueue))
51 			break;
52 
53 	/* Build a bit map of all UPs associated to the suspect hung queue TC,
54 	 * so that we check for its counter increment.
55 	 */
56 	up2tc = rd32(&pf->hw, PRTDCB_TUP2TC);
57 	for (i = 0; i < ICE_MAX_UP; i++) {
58 		up_mapped_tc = (up2tc >> (i * 3)) & 0x7;
59 		if (up_mapped_tc == tc)
60 			up_in_tc |= BIT(i);
61 	}
62 
63 	/* Now that we figured out that hung queue is PFC enabled, still the
64 	 * Tx timeout can be legitimate. So to make sure Tx timeout is
65 	 * absolutely caused by PFC storm, check if the counters are
66 	 * incrementing.
67 	 */
68 	for (i = 0; i < ICE_MAX_UP; i++)
69 		if (up_in_tc & BIT(i))
70 			ref_prio_xoff[i] = pf->stats.priority_xoff_rx[i];
71 
72 	ice_update_dcb_stats(pf);
73 
74 	for (i = 0; i < ICE_MAX_UP; i++)
75 		if (up_in_tc & BIT(i))
76 			if (pf->stats.priority_xoff_rx[i] > ref_prio_xoff[i])
77 				return true;
78 
79 	return false;
80 }
81 
82 /**
83  * ice_dcb_get_mode - gets the DCB mode
84  * @port_info: pointer to port info structure
85  * @host: if set it's HOST if not it's MANAGED
86  */
87 static u8 ice_dcb_get_mode(struct ice_port_info *port_info, bool host)
88 {
89 	u8 mode;
90 
91 	if (host)
92 		mode = DCB_CAP_DCBX_HOST;
93 	else
94 		mode = DCB_CAP_DCBX_LLD_MANAGED;
95 
96 	if (port_info->qos_cfg.local_dcbx_cfg.dcbx_mode & ICE_DCBX_MODE_CEE)
97 		return mode | DCB_CAP_DCBX_VER_CEE;
98 	else
99 		return mode | DCB_CAP_DCBX_VER_IEEE;
100 }
101 
102 /**
103  * ice_dcb_get_num_tc - Get the number of TCs from DCBX config
104  * @dcbcfg: config to retrieve number of TCs from
105  */
106 u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg)
107 {
108 	bool tc_unused = false;
109 	u8 num_tc = 0;
110 	u8 ret = 0;
111 	int i;
112 
113 	/* Scan the ETS Config Priority Table to find traffic classes
114 	 * enabled and create a bitmask of enabled TCs
115 	 */
116 	for (i = 0; i < CEE_DCBX_MAX_PRIO; i++)
117 		num_tc |= BIT(dcbcfg->etscfg.prio_table[i]);
118 
119 	/* Scan bitmask for contiguous TCs starting with TC0 */
120 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
121 		if (num_tc & BIT(i)) {
122 			if (!tc_unused) {
123 				ret++;
124 			} else {
125 				pr_err("Non-contiguous TCs - Disabling DCB\n");
126 				return 1;
127 			}
128 		} else {
129 			tc_unused = true;
130 		}
131 	}
132 
133 	/* There is always at least 1 TC */
134 	if (!ret)
135 		ret = 1;
136 
137 	return ret;
138 }
139 
140 /**
141  * ice_get_first_droptc - returns number of first droptc
142  * @vsi: used to find the first droptc
143  *
144  * This function returns the value of first_droptc.
145  * When DCB is enabled, first droptc information is derived from enabled_tc
146  * and PFC enabled bits. otherwise this function returns 0 as there is one
147  * TC without DCB (tc0)
148  */
149 static u8 ice_get_first_droptc(struct ice_vsi *vsi)
150 {
151 	struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg;
152 	struct device *dev = ice_pf_to_dev(vsi->back);
153 	u8 num_tc, ena_tc_map, pfc_ena_map;
154 	u8 i;
155 
156 	num_tc = ice_dcb_get_num_tc(cfg);
157 
158 	/* get bitmap of enabled TCs */
159 	ena_tc_map = ice_dcb_get_ena_tc(cfg);
160 
161 	/* get bitmap of PFC enabled TCs */
162 	pfc_ena_map = cfg->pfc.pfcena;
163 
164 	/* get first TC that is not PFC enabled */
165 	for (i = 0; i < num_tc; i++) {
166 		if ((ena_tc_map & BIT(i)) && (!(pfc_ena_map & BIT(i)))) {
167 			dev_dbg(dev, "first drop tc = %d\n", i);
168 			return i;
169 		}
170 	}
171 
172 	dev_dbg(dev, "first drop tc = 0\n");
173 	return 0;
174 }
175 
176 /**
177  * ice_vsi_set_dcb_tc_cfg - Set VSI's TC based on DCB configuration
178  * @vsi: pointer to the VSI instance
179  */
180 void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi)
181 {
182 	struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg;
183 
184 	switch (vsi->type) {
185 	case ICE_VSI_PF:
186 		vsi->tc_cfg.ena_tc = ice_dcb_get_ena_tc(cfg);
187 		vsi->tc_cfg.numtc = ice_dcb_get_num_tc(cfg);
188 		break;
189 	case ICE_VSI_CHNL:
190 	case ICE_VSI_SF:
191 		vsi->tc_cfg.ena_tc = BIT(ice_get_first_droptc(vsi));
192 		vsi->tc_cfg.numtc = 1;
193 		break;
194 	case ICE_VSI_CTRL:
195 	case ICE_VSI_LB:
196 	default:
197 		vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS;
198 		vsi->tc_cfg.numtc = 1;
199 	}
200 }
201 
202 /**
203  * ice_dcb_get_tc - Get the TC associated with the queue
204  * @vsi: ptr to the VSI
205  * @queue_index: queue number associated with VSI
206  */
207 u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index)
208 {
209 	return vsi->tx_rings[queue_index]->dcb_tc;
210 }
211 
212 /**
213  * ice_vsi_cfg_dcb_rings - Update rings to reflect DCB TC
214  * @vsi: VSI owner of rings being updated
215  */
216 void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi)
217 {
218 	struct ice_tx_ring *tx_ring;
219 	struct ice_rx_ring *rx_ring;
220 	u16 qoffset, qcount;
221 	int i, n;
222 
223 	if (!test_bit(ICE_FLAG_DCB_ENA, vsi->back->flags)) {
224 		/* Reset the TC information */
225 		ice_for_each_txq(vsi, i) {
226 			tx_ring = vsi->tx_rings[i];
227 			tx_ring->dcb_tc = 0;
228 		}
229 		ice_for_each_rxq(vsi, i) {
230 			rx_ring = vsi->rx_rings[i];
231 			rx_ring->dcb_tc = 0;
232 		}
233 		return;
234 	}
235 
236 	ice_for_each_traffic_class(n) {
237 		if (!(vsi->tc_cfg.ena_tc & BIT(n)))
238 			break;
239 
240 		qoffset = vsi->tc_cfg.tc_info[n].qoffset;
241 		qcount = vsi->tc_cfg.tc_info[n].qcount_tx;
242 		for (i = qoffset; i < (qoffset + qcount); i++)
243 			vsi->tx_rings[i]->dcb_tc = n;
244 
245 		qcount = vsi->tc_cfg.tc_info[n].qcount_rx;
246 		for (i = qoffset; i < (qoffset + qcount); i++)
247 			vsi->rx_rings[i]->dcb_tc = n;
248 	}
249 	/* applicable only if "all_enatc" is set, which will be set from
250 	 * setup_tc method as part of configuring channels
251 	 */
252 	if (vsi->all_enatc) {
253 		u8 first_droptc = ice_get_first_droptc(vsi);
254 
255 		/* When DCB is configured, TC for ADQ queues (which are really
256 		 * PF queues) should be the first drop TC of the main VSI
257 		 */
258 		ice_for_each_chnl_tc(n) {
259 			if (!(vsi->all_enatc & BIT(n)))
260 				break;
261 
262 			qoffset = vsi->mqprio_qopt.qopt.offset[n];
263 			qcount = vsi->mqprio_qopt.qopt.count[n];
264 			for (i = qoffset; i < (qoffset + qcount); i++) {
265 				vsi->tx_rings[i]->dcb_tc = first_droptc;
266 				vsi->rx_rings[i]->dcb_tc = first_droptc;
267 			}
268 		}
269 	}
270 }
271 
272 /**
273  * ice_dcb_ena_dis_vsi - disable certain VSIs for DCB config/reconfig
274  * @pf: pointer to the PF instance
275  * @ena: true to enable VSIs, false to disable
276  * @locked: true if caller holds RTNL lock, false otherwise
277  *
278  * Before a new DCB configuration can be applied, VSIs of type PF, SWITCHDEV
279  * and CHNL need to be brought down. Following completion of DCB configuration
280  * the VSIs that were downed need to be brought up again. This helper function
281  * does both.
282  */
283 static void ice_dcb_ena_dis_vsi(struct ice_pf *pf, bool ena, bool locked)
284 {
285 	int i;
286 
287 	ice_for_each_vsi(pf, i) {
288 		struct ice_vsi *vsi = pf->vsi[i];
289 
290 		if (!vsi)
291 			continue;
292 
293 		switch (vsi->type) {
294 		case ICE_VSI_CHNL:
295 		case ICE_VSI_PF:
296 			if (ena)
297 				ice_ena_vsi(vsi, locked);
298 			else
299 				ice_dis_vsi(vsi, locked);
300 			break;
301 		default:
302 			continue;
303 		}
304 	}
305 }
306 
307 /**
308  * ice_dcb_bwchk - check if ETS bandwidth input parameters are correct
309  * @pf: pointer to the PF struct
310  * @dcbcfg: pointer to DCB config structure
311  */
312 int ice_dcb_bwchk(struct ice_pf *pf, struct ice_dcbx_cfg *dcbcfg)
313 {
314 	struct ice_dcb_ets_cfg *etscfg = &dcbcfg->etscfg;
315 	u8 num_tc, total_bw = 0;
316 	int i;
317 
318 	/* returns number of contigous TCs and 1 TC for non-contigous TCs,
319 	 * since at least 1 TC has to be configured
320 	 */
321 	num_tc = ice_dcb_get_num_tc(dcbcfg);
322 
323 	/* no bandwidth checks required if there's only one TC, so assign
324 	 * all bandwidth to TC0 and return
325 	 */
326 	if (num_tc == 1) {
327 		etscfg->tcbwtable[0] = ICE_TC_MAX_BW;
328 		return 0;
329 	}
330 
331 	for (i = 0; i < num_tc; i++)
332 		total_bw += etscfg->tcbwtable[i];
333 
334 	if (!total_bw) {
335 		etscfg->tcbwtable[0] = ICE_TC_MAX_BW;
336 	} else if (total_bw != ICE_TC_MAX_BW) {
337 		dev_err(ice_pf_to_dev(pf), "Invalid config, total bandwidth must equal 100\n");
338 		return -EINVAL;
339 	}
340 
341 	return 0;
342 }
343 
344 /**
345  * ice_pf_dcb_cfg - Apply new DCB configuration
346  * @pf: pointer to the PF struct
347  * @new_cfg: DCBX config to apply
348  * @locked: is the RTNL held
349  */
350 int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
351 {
352 	struct ice_aqc_port_ets_elem buf = { 0 };
353 	struct ice_dcbx_cfg *old_cfg, *curr_cfg;
354 	struct device *dev = ice_pf_to_dev(pf);
355 	int ret = ICE_DCB_NO_HW_CHG;
356 	struct iidc_event *event;
357 	struct ice_vsi *pf_vsi;
358 
359 	curr_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
360 
361 	/* FW does not care if change happened */
362 	if (!pf->hw.port_info->qos_cfg.is_sw_lldp)
363 		ret = ICE_DCB_HW_CHG_RST;
364 
365 	/* Enable DCB tagging only when more than one TC */
366 	if (ice_dcb_get_num_tc(new_cfg) > 1) {
367 		dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n");
368 		if (pf->hw.port_info->is_custom_tx_enabled) {
369 			dev_err(dev, "Custom Tx scheduler feature enabled, can't configure DCB\n");
370 			return -EBUSY;
371 		}
372 		ice_tear_down_devlink_rate_tree(pf);
373 
374 		set_bit(ICE_FLAG_DCB_ENA, pf->flags);
375 	} else {
376 		dev_dbg(dev, "DCB tagging disabled (num TC = 1)\n");
377 		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
378 	}
379 
380 	if (!memcmp(new_cfg, curr_cfg, sizeof(*new_cfg))) {
381 		dev_dbg(dev, "No change in DCB config required\n");
382 		return ret;
383 	}
384 
385 	if (ice_dcb_bwchk(pf, new_cfg))
386 		return -EINVAL;
387 
388 	/* Store old config in case FW config fails */
389 	old_cfg = kmemdup(curr_cfg, sizeof(*old_cfg), GFP_KERNEL);
390 	if (!old_cfg)
391 		return -ENOMEM;
392 
393 	dev_info(dev, "Commit DCB Configuration to the hardware\n");
394 	pf_vsi = ice_get_main_vsi(pf);
395 	if (!pf_vsi) {
396 		dev_dbg(dev, "PF VSI doesn't exist\n");
397 		ret = -EINVAL;
398 		goto free_cfg;
399 	}
400 
401 	/* Notify AUX drivers about impending change to TCs */
402 	event = kzalloc(sizeof(*event), GFP_KERNEL);
403 	if (!event) {
404 		ret = -ENOMEM;
405 		goto free_cfg;
406 	}
407 
408 	set_bit(IIDC_EVENT_BEFORE_TC_CHANGE, event->type);
409 	ice_send_event_to_aux(pf, event);
410 	kfree(event);
411 
412 	/* avoid race conditions by holding the lock while disabling and
413 	 * re-enabling the VSI
414 	 */
415 	if (!locked)
416 		rtnl_lock();
417 
418 	/* disable VSIs affected by DCB changes */
419 	ice_dcb_ena_dis_vsi(pf, false, true);
420 
421 	memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg));
422 	memcpy(&curr_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec));
423 	memcpy(&new_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec));
424 
425 	/* Only send new config to HW if we are in SW LLDP mode. Otherwise,
426 	 * the new config came from the HW in the first place.
427 	 */
428 	if (pf->hw.port_info->qos_cfg.is_sw_lldp) {
429 		ret = ice_set_dcb_cfg(pf->hw.port_info);
430 		if (ret) {
431 			dev_err(dev, "Set DCB Config failed\n");
432 			/* Restore previous settings to local config */
433 			memcpy(curr_cfg, old_cfg, sizeof(*curr_cfg));
434 			goto out;
435 		}
436 	}
437 
438 	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
439 	if (ret) {
440 		dev_err(dev, "Query Port ETS failed\n");
441 		goto out;
442 	}
443 
444 	ice_pf_dcb_recfg(pf, false);
445 
446 out:
447 	/* enable previously downed VSIs */
448 	ice_dcb_ena_dis_vsi(pf, true, true);
449 	if (!locked)
450 		rtnl_unlock();
451 free_cfg:
452 	kfree(old_cfg);
453 	return ret;
454 }
455 
456 /**
457  * ice_cfg_etsrec_defaults - Set default ETS recommended DCB config
458  * @pi: port information structure
459  */
460 static void ice_cfg_etsrec_defaults(struct ice_port_info *pi)
461 {
462 	struct ice_dcbx_cfg *dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
463 	u8 i;
464 
465 	/* Ensure ETS recommended DCB configuration is not already set */
466 	if (dcbcfg->etsrec.maxtcs)
467 		return;
468 
469 	/* In CEE mode, set the default to 1 TC */
470 	dcbcfg->etsrec.maxtcs = 1;
471 	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
472 		dcbcfg->etsrec.tcbwtable[i] = i ? 0 : 100;
473 		dcbcfg->etsrec.tsatable[i] = i ? ICE_IEEE_TSA_STRICT :
474 						 ICE_IEEE_TSA_ETS;
475 	}
476 }
477 
478 /**
479  * ice_dcb_need_recfg - Check if DCB needs reconfig
480  * @pf: board private structure
481  * @old_cfg: current DCB config
482  * @new_cfg: new DCB config
483  */
484 static bool
485 ice_dcb_need_recfg(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
486 		   struct ice_dcbx_cfg *new_cfg)
487 {
488 	struct device *dev = ice_pf_to_dev(pf);
489 	bool need_reconfig = false;
490 
491 	/* Check if ETS configuration has changed */
492 	if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg,
493 		   sizeof(new_cfg->etscfg))) {
494 		/* If Priority Table has changed reconfig is needed */
495 		if (memcmp(&new_cfg->etscfg.prio_table,
496 			   &old_cfg->etscfg.prio_table,
497 			   sizeof(new_cfg->etscfg.prio_table))) {
498 			need_reconfig = true;
499 			dev_dbg(dev, "ETS UP2TC changed.\n");
500 		}
501 
502 		if (memcmp(&new_cfg->etscfg.tcbwtable,
503 			   &old_cfg->etscfg.tcbwtable,
504 			   sizeof(new_cfg->etscfg.tcbwtable)))
505 			dev_dbg(dev, "ETS TC BW Table changed.\n");
506 
507 		if (memcmp(&new_cfg->etscfg.tsatable,
508 			   &old_cfg->etscfg.tsatable,
509 			   sizeof(new_cfg->etscfg.tsatable)))
510 			dev_dbg(dev, "ETS TSA Table changed.\n");
511 	}
512 
513 	/* Check if PFC configuration has changed */
514 	if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) {
515 		need_reconfig = true;
516 		dev_dbg(dev, "PFC config change detected.\n");
517 	}
518 
519 	/* Check if APP Table has changed */
520 	if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app))) {
521 		need_reconfig = true;
522 		dev_dbg(dev, "APP Table change detected.\n");
523 	}
524 
525 	dev_dbg(dev, "dcb need_reconfig=%d\n", need_reconfig);
526 	return need_reconfig;
527 }
528 
529 /**
530  * ice_dcb_rebuild - rebuild DCB post reset
531  * @pf: physical function instance
532  */
533 void ice_dcb_rebuild(struct ice_pf *pf)
534 {
535 	struct ice_aqc_port_ets_elem buf = { 0 };
536 	struct device *dev = ice_pf_to_dev(pf);
537 	struct ice_dcbx_cfg *err_cfg;
538 	int ret;
539 
540 	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
541 	if (ret) {
542 		dev_err(dev, "Query Port ETS failed\n");
543 		goto dcb_error;
544 	}
545 
546 	mutex_lock(&pf->tc_mutex);
547 
548 	if (!pf->hw.port_info->qos_cfg.is_sw_lldp)
549 		ice_cfg_etsrec_defaults(pf->hw.port_info);
550 
551 	ret = ice_set_dcb_cfg(pf->hw.port_info);
552 	if (ret) {
553 		dev_err(dev, "Failed to set DCB config in rebuild\n");
554 		goto dcb_error;
555 	}
556 
557 	if (!pf->hw.port_info->qos_cfg.is_sw_lldp) {
558 		ret = ice_cfg_lldp_mib_change(&pf->hw, true);
559 		if (ret && !pf->hw.port_info->qos_cfg.is_sw_lldp) {
560 			dev_err(dev, "Failed to register for MIB changes\n");
561 			goto dcb_error;
562 		}
563 	}
564 
565 	dev_info(dev, "DCB info restored\n");
566 	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
567 	if (ret) {
568 		dev_err(dev, "Query Port ETS failed\n");
569 		goto dcb_error;
570 	}
571 
572 	mutex_unlock(&pf->tc_mutex);
573 
574 	return;
575 
576 dcb_error:
577 	dev_err(dev, "Disabling DCB until new settings occur\n");
578 	err_cfg = kzalloc(sizeof(*err_cfg), GFP_KERNEL);
579 	if (!err_cfg) {
580 		mutex_unlock(&pf->tc_mutex);
581 		return;
582 	}
583 
584 	err_cfg->etscfg.willing = true;
585 	err_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW;
586 	err_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
587 	memcpy(&err_cfg->etsrec, &err_cfg->etscfg, sizeof(err_cfg->etsrec));
588 	/* Coverity warns the return code of ice_pf_dcb_cfg() is not checked
589 	 * here as is done for other calls to that function. That check is
590 	 * not necessary since this is in this function's error cleanup path.
591 	 * Suppress the Coverity warning with the following comment...
592 	 */
593 	/* coverity[check_return] */
594 	ice_pf_dcb_cfg(pf, err_cfg, false);
595 	kfree(err_cfg);
596 
597 	mutex_unlock(&pf->tc_mutex);
598 }
599 
600 /**
601  * ice_dcb_init_cfg - set the initial DCB config in SW
602  * @pf: PF to apply config to
603  * @locked: Is the RTNL held
604  */
605 static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked)
606 {
607 	struct ice_dcbx_cfg *newcfg;
608 	struct ice_port_info *pi;
609 	int ret = 0;
610 
611 	pi = pf->hw.port_info;
612 	newcfg = kmemdup(&pi->qos_cfg.local_dcbx_cfg, sizeof(*newcfg),
613 			 GFP_KERNEL);
614 	if (!newcfg)
615 		return -ENOMEM;
616 
617 	memset(&pi->qos_cfg.local_dcbx_cfg, 0, sizeof(*newcfg));
618 
619 	dev_info(ice_pf_to_dev(pf), "Configuring initial DCB values\n");
620 	if (ice_pf_dcb_cfg(pf, newcfg, locked))
621 		ret = -EINVAL;
622 
623 	kfree(newcfg);
624 
625 	return ret;
626 }
627 
628 /**
629  * ice_dcb_sw_dflt_cfg - Apply a default DCB config
630  * @pf: PF to apply config to
631  * @ets_willing: configure ETS willing
632  * @locked: was this function called with RTNL held
633  */
634 int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked)
635 {
636 	struct ice_aqc_port_ets_elem buf = { 0 };
637 	struct ice_dcbx_cfg *dcbcfg;
638 	struct ice_port_info *pi;
639 	struct ice_hw *hw;
640 	int ret;
641 
642 	hw = &pf->hw;
643 	pi = hw->port_info;
644 	dcbcfg = kzalloc(sizeof(*dcbcfg), GFP_KERNEL);
645 	if (!dcbcfg)
646 		return -ENOMEM;
647 
648 	memset(&pi->qos_cfg.local_dcbx_cfg, 0, sizeof(*dcbcfg));
649 
650 	dcbcfg->etscfg.willing = ets_willing ? 1 : 0;
651 	dcbcfg->etscfg.maxtcs = hw->func_caps.common_cap.maxtc;
652 	dcbcfg->etscfg.tcbwtable[0] = 100;
653 	dcbcfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
654 
655 	memcpy(&dcbcfg->etsrec, &dcbcfg->etscfg,
656 	       sizeof(dcbcfg->etsrec));
657 	dcbcfg->etsrec.willing = 0;
658 
659 	dcbcfg->pfc.willing = 1;
660 	dcbcfg->pfc.pfccap = hw->func_caps.common_cap.maxtc;
661 
662 	dcbcfg->numapps = 1;
663 	dcbcfg->app[0].selector = ICE_APP_SEL_ETHTYPE;
664 	dcbcfg->app[0].priority = 3;
665 	dcbcfg->app[0].prot_id = ETH_P_FCOE;
666 
667 	ret = ice_pf_dcb_cfg(pf, dcbcfg, locked);
668 	kfree(dcbcfg);
669 	if (ret)
670 		return ret;
671 
672 	return ice_query_port_ets(pi, &buf, sizeof(buf), NULL);
673 }
674 
675 /**
676  * ice_dcb_tc_contig - Check that TCs are contiguous
677  * @prio_table: pointer to priority table
678  *
679  * Check if TCs begin with TC0 and are contiguous
680  */
681 static bool ice_dcb_tc_contig(u8 *prio_table)
682 {
683 	bool found_empty = false;
684 	u8 used_tc = 0;
685 	int i;
686 
687 	/* Create a bitmap of used TCs */
688 	for (i = 0; i < CEE_DCBX_MAX_PRIO; i++)
689 		used_tc |= BIT(prio_table[i]);
690 
691 	for (i = 0; i < CEE_DCBX_MAX_PRIO; i++) {
692 		if (used_tc & BIT(i)) {
693 			if (found_empty)
694 				return false;
695 		} else {
696 			found_empty = true;
697 		}
698 	}
699 
700 	return true;
701 }
702 
703 /**
704  * ice_dcb_noncontig_cfg - Configure DCB for non-contiguous TCs
705  * @pf: pointer to the PF struct
706  *
707  * If non-contiguous TCs, then configure SW DCB with TC0 and ETS non-willing
708  */
709 static int ice_dcb_noncontig_cfg(struct ice_pf *pf)
710 {
711 	struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
712 	struct device *dev = ice_pf_to_dev(pf);
713 	int ret;
714 
715 	/* Configure SW DCB default with ETS non-willing */
716 	ret = ice_dcb_sw_dflt_cfg(pf, false, true);
717 	if (ret) {
718 		dev_err(dev, "Failed to set local DCB config %d\n", ret);
719 		return ret;
720 	}
721 
722 	/* Reconfigure with ETS willing so that FW will send LLDP MIB event */
723 	dcbcfg->etscfg.willing = 1;
724 	ret = ice_set_dcb_cfg(pf->hw.port_info);
725 	if (ret)
726 		dev_err(dev, "Failed to set DCB to unwilling\n");
727 
728 	return ret;
729 }
730 
731 /**
732  * ice_pf_dcb_recfg - Reconfigure all VEBs and VSIs
733  * @pf: pointer to the PF struct
734  * @locked: is adev device lock held
735  *
736  * Assumed caller has already disabled all VSIs before
737  * calling this function. Reconfiguring DCB based on
738  * local_dcbx_cfg.
739  */
740 void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked)
741 {
742 	struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
743 	struct iidc_event *event;
744 	u8 tc_map = 0;
745 	int v, ret;
746 
747 	/* Update each VSI */
748 	ice_for_each_vsi(pf, v) {
749 		struct ice_vsi *vsi = pf->vsi[v];
750 
751 		if (!vsi)
752 			continue;
753 
754 		if (vsi->type == ICE_VSI_PF) {
755 			tc_map = ice_dcb_get_ena_tc(dcbcfg);
756 
757 			/* If DCBX request non-contiguous TC, then configure
758 			 * default TC
759 			 */
760 			if (!ice_dcb_tc_contig(dcbcfg->etscfg.prio_table)) {
761 				tc_map = ICE_DFLT_TRAFFIC_CLASS;
762 				ice_dcb_noncontig_cfg(pf);
763 			}
764 		} else if (vsi->type == ICE_VSI_CHNL) {
765 			tc_map = BIT(ice_get_first_droptc(vsi));
766 		} else {
767 			tc_map = ICE_DFLT_TRAFFIC_CLASS;
768 		}
769 
770 		ret = ice_vsi_cfg_tc(vsi, tc_map);
771 		if (ret) {
772 			dev_err(ice_pf_to_dev(pf), "Failed to config TC for VSI index: %d\n",
773 				vsi->idx);
774 			continue;
775 		}
776 		/* no need to proceed with remaining cfg if it is CHNL
777 		 * or switchdev VSI
778 		 */
779 		if (vsi->type == ICE_VSI_CHNL)
780 			continue;
781 
782 		ice_vsi_map_rings_to_vectors(vsi);
783 		if (vsi->type == ICE_VSI_PF)
784 			ice_dcbnl_set_all(vsi);
785 	}
786 	if (!locked) {
787 		/* Notify the AUX drivers that TC change is finished */
788 		event = kzalloc(sizeof(*event), GFP_KERNEL);
789 		if (!event)
790 			return;
791 
792 		set_bit(IIDC_EVENT_AFTER_TC_CHANGE, event->type);
793 		ice_send_event_to_aux(pf, event);
794 		kfree(event);
795 	}
796 }
797 
798 /**
799  * ice_init_pf_dcb - initialize DCB for a PF
800  * @pf: PF to initialize DCB for
801  * @locked: Was function called with RTNL held
802  */
803 int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
804 {
805 	struct device *dev = ice_pf_to_dev(pf);
806 	struct ice_port_info *port_info;
807 	struct ice_hw *hw = &pf->hw;
808 	int err;
809 
810 	port_info = hw->port_info;
811 
812 	err = ice_init_dcb(hw, false);
813 	if (err && !port_info->qos_cfg.is_sw_lldp) {
814 		dev_err(dev, "Error initializing DCB %d\n", err);
815 		goto dcb_init_err;
816 	}
817 
818 	dev_info(dev, "DCB is enabled in the hardware, max number of TCs supported on this port are %d\n",
819 		 pf->hw.func_caps.common_cap.maxtc);
820 	if (err) {
821 		struct ice_vsi *pf_vsi;
822 
823 		/* FW LLDP is disabled, activate SW DCBX/LLDP mode */
824 		dev_info(dev, "FW LLDP is disabled, DCBx/LLDP in SW mode.\n");
825 		clear_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags);
826 		err = ice_aq_set_pfc_mode(&pf->hw, ICE_AQC_PFC_VLAN_BASED_PFC,
827 					  NULL);
828 		if (err)
829 			dev_info(dev, "Failed to set VLAN PFC mode\n");
830 
831 		err = ice_dcb_sw_dflt_cfg(pf, true, locked);
832 		if (err) {
833 			dev_err(dev, "Failed to set local DCB config %d\n",
834 				err);
835 			err = -EIO;
836 			goto dcb_init_err;
837 		}
838 
839 		/* If the FW DCBX engine is not running then Rx LLDP packets
840 		 * need to be redirected up the stack.
841 		 */
842 		pf_vsi = ice_get_main_vsi(pf);
843 		if (!pf_vsi) {
844 			dev_err(dev, "Failed to set local DCB config\n");
845 			err = -EIO;
846 			goto dcb_init_err;
847 		}
848 
849 		ice_cfg_sw_lldp(pf_vsi, false, true);
850 
851 		pf->dcbx_cap = ice_dcb_get_mode(port_info, true);
852 		return 0;
853 	}
854 
855 	set_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags);
856 
857 	/* DCBX/LLDP enabled in FW, set DCBNL mode advertisement */
858 	pf->dcbx_cap = ice_dcb_get_mode(port_info, false);
859 
860 	err = ice_dcb_init_cfg(pf, locked);
861 	if (err)
862 		goto dcb_init_err;
863 
864 	return 0;
865 
866 dcb_init_err:
867 	dev_err(dev, "DCB init failed\n");
868 	return err;
869 }
870 
871 /**
872  * ice_update_dcb_stats - Update DCB stats counters
873  * @pf: PF whose stats needs to be updated
874  */
875 void ice_update_dcb_stats(struct ice_pf *pf)
876 {
877 	struct ice_hw_port_stats *prev_ps, *cur_ps;
878 	struct ice_hw *hw = &pf->hw;
879 	u8 port;
880 	int i;
881 
882 	port = hw->port_info->lport;
883 	prev_ps = &pf->stats_prev;
884 	cur_ps = &pf->stats;
885 
886 	if (ice_is_reset_in_progress(pf->state))
887 		pf->stat_prev_loaded = false;
888 
889 	for (i = 0; i < 8; i++) {
890 		ice_stat_update32(hw, GLPRT_PXOFFRXC(port, i),
891 				  pf->stat_prev_loaded,
892 				  &prev_ps->priority_xoff_rx[i],
893 				  &cur_ps->priority_xoff_rx[i]);
894 		ice_stat_update32(hw, GLPRT_PXONRXC(port, i),
895 				  pf->stat_prev_loaded,
896 				  &prev_ps->priority_xon_rx[i],
897 				  &cur_ps->priority_xon_rx[i]);
898 		ice_stat_update32(hw, GLPRT_PXONTXC(port, i),
899 				  pf->stat_prev_loaded,
900 				  &prev_ps->priority_xon_tx[i],
901 				  &cur_ps->priority_xon_tx[i]);
902 		ice_stat_update32(hw, GLPRT_PXOFFTXC(port, i),
903 				  pf->stat_prev_loaded,
904 				  &prev_ps->priority_xoff_tx[i],
905 				  &cur_ps->priority_xoff_tx[i]);
906 		ice_stat_update32(hw, GLPRT_RXON2OFFCNT(port, i),
907 				  pf->stat_prev_loaded,
908 				  &prev_ps->priority_xon_2_xoff[i],
909 				  &cur_ps->priority_xon_2_xoff[i]);
910 	}
911 }
912 
913 /**
914  * ice_tx_prepare_vlan_flags_dcb - prepare VLAN tagging for DCB
915  * @tx_ring: ring to send buffer on
916  * @first: pointer to struct ice_tx_buf
917  *
918  * This should not be called if the outer VLAN is software offloaded as the VLAN
919  * tag will already be configured with the correct ID and priority bits
920  */
921 void
922 ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring,
923 			      struct ice_tx_buf *first)
924 {
925 	struct sk_buff *skb = first->skb;
926 
927 	if (!test_bit(ICE_FLAG_DCB_ENA, tx_ring->vsi->back->flags))
928 		return;
929 
930 	/* Insert 802.1p priority into VLAN header */
931 	if ((first->tx_flags & ICE_TX_FLAGS_HW_VLAN ||
932 	     first->tx_flags & ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN) ||
933 	    skb->priority != TC_PRIO_CONTROL) {
934 		first->vid &= ~VLAN_PRIO_MASK;
935 		/* Mask the lower 3 bits to set the 802.1p priority */
936 		first->vid |= FIELD_PREP(VLAN_PRIO_MASK, skb->priority);
937 		/* if this is not already set it means a VLAN 0 + priority needs
938 		 * to be offloaded
939 		 */
940 		if (tx_ring->flags & ICE_TX_FLAGS_RING_VLAN_L2TAG2)
941 			first->tx_flags |= ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN;
942 		else
943 			first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
944 	}
945 }
946 
947 /**
948  * ice_dcb_is_mib_change_pending - Check if MIB change is pending
949  * @state: MIB change state
950  */
951 static bool ice_dcb_is_mib_change_pending(u8 state)
952 {
953 	return ICE_AQ_LLDP_MIB_CHANGE_PENDING ==
954 		FIELD_GET(ICE_AQ_LLDP_MIB_CHANGE_STATE_M, state);
955 }
956 
957 /**
958  * ice_dcb_process_lldp_set_mib_change - Process MIB change
959  * @pf: ptr to ice_pf
960  * @event: pointer to the admin queue receive event
961  */
962 void
963 ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
964 				    struct ice_rq_event_info *event)
965 {
966 	struct ice_aqc_port_ets_elem buf = { 0 };
967 	struct device *dev = ice_pf_to_dev(pf);
968 	struct ice_aqc_lldp_get_mib *mib;
969 	struct ice_dcbx_cfg tmp_dcbx_cfg;
970 	bool pending_handled = true;
971 	bool need_reconfig = false;
972 	struct ice_port_info *pi;
973 	u8 mib_type;
974 	int ret;
975 
976 	/* Not DCB capable or capability disabled */
977 	if (!(test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)))
978 		return;
979 
980 	if (pf->dcbx_cap & DCB_CAP_DCBX_HOST) {
981 		dev_dbg(dev, "MIB Change Event in HOST mode\n");
982 		return;
983 	}
984 
985 	pi = pf->hw.port_info;
986 	mib = (struct ice_aqc_lldp_get_mib *)&event->desc.params.raw;
987 
988 	/* Ignore if event is not for Nearest Bridge */
989 	mib_type = FIELD_GET(ICE_AQ_LLDP_BRID_TYPE_M, mib->type);
990 	dev_dbg(dev, "LLDP event MIB bridge type 0x%x\n", mib_type);
991 	if (mib_type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID)
992 		return;
993 
994 	/* A pending change event contains accurate config information, and
995 	 * the FW setting has not been updaed yet, so detect if change is
996 	 * pending to determine where to pull config information from
997 	 * (FW vs event)
998 	 */
999 	if (ice_dcb_is_mib_change_pending(mib->state))
1000 		pending_handled = false;
1001 
1002 	/* Check MIB Type and return if event for Remote MIB update */
1003 	mib_type = FIELD_GET(ICE_AQ_LLDP_MIB_TYPE_M, mib->type);
1004 	dev_dbg(dev, "LLDP event mib type %s\n", mib_type ? "remote" : "local");
1005 	if (mib_type == ICE_AQ_LLDP_MIB_REMOTE) {
1006 		/* Update the remote cached instance and return */
1007 		if (!pending_handled) {
1008 			ice_get_dcb_cfg_from_mib_change(pi, event);
1009 		} else {
1010 			ret =
1011 			  ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
1012 					     ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID,
1013 					     &pi->qos_cfg.remote_dcbx_cfg);
1014 			if (ret)
1015 				dev_dbg(dev, "Failed to get remote DCB config\n");
1016 		}
1017 		return;
1018 	}
1019 
1020 	/* That a DCB change has happened is now determined */
1021 	mutex_lock(&pf->tc_mutex);
1022 
1023 	/* store the old configuration */
1024 	tmp_dcbx_cfg = pi->qos_cfg.local_dcbx_cfg;
1025 
1026 	/* Reset the old DCBX configuration data */
1027 	memset(&pi->qos_cfg.local_dcbx_cfg, 0,
1028 	       sizeof(pi->qos_cfg.local_dcbx_cfg));
1029 
1030 	/* Get updated DCBX data from firmware */
1031 	if (!pending_handled) {
1032 		ice_get_dcb_cfg_from_mib_change(pi, event);
1033 	} else {
1034 		ret = ice_get_dcb_cfg(pi);
1035 		if (ret) {
1036 			dev_err(dev, "Failed to get DCB config\n");
1037 			goto out;
1038 		}
1039 	}
1040 
1041 	/* No change detected in DCBX configs */
1042 	if (!memcmp(&tmp_dcbx_cfg, &pi->qos_cfg.local_dcbx_cfg,
1043 		    sizeof(tmp_dcbx_cfg))) {
1044 		dev_dbg(dev, "No change detected in DCBX configuration.\n");
1045 		goto out;
1046 	}
1047 
1048 	pf->dcbx_cap = ice_dcb_get_mode(pi, false);
1049 
1050 	need_reconfig = ice_dcb_need_recfg(pf, &tmp_dcbx_cfg,
1051 					   &pi->qos_cfg.local_dcbx_cfg);
1052 	ice_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &pi->qos_cfg.local_dcbx_cfg);
1053 	if (!need_reconfig)
1054 		goto out;
1055 
1056 	/* Enable DCB tagging only when more than one TC */
1057 	if (ice_dcb_get_num_tc(&pi->qos_cfg.local_dcbx_cfg) > 1) {
1058 		dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n");
1059 		set_bit(ICE_FLAG_DCB_ENA, pf->flags);
1060 	} else {
1061 		dev_dbg(dev, "DCB tagging disabled (num TC = 1)\n");
1062 		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
1063 	}
1064 
1065 	/* Send Execute Pending MIB Change event if it is a Pending event */
1066 	if (!pending_handled) {
1067 		ice_lldp_execute_pending_mib(&pf->hw);
1068 		pending_handled = true;
1069 	}
1070 
1071 	rtnl_lock();
1072 	/* disable VSIs affected by DCB changes */
1073 	ice_dcb_ena_dis_vsi(pf, false, true);
1074 
1075 	ret = ice_query_port_ets(pi, &buf, sizeof(buf), NULL);
1076 	if (ret) {
1077 		dev_err(dev, "Query Port ETS failed\n");
1078 		goto unlock_rtnl;
1079 	}
1080 
1081 	/* changes in configuration update VSI */
1082 	ice_pf_dcb_recfg(pf, false);
1083 
1084 	/* enable previously downed VSIs */
1085 	ice_dcb_ena_dis_vsi(pf, true, true);
1086 unlock_rtnl:
1087 	rtnl_unlock();
1088 out:
1089 	mutex_unlock(&pf->tc_mutex);
1090 
1091 	/* Send Execute Pending MIB Change event if it is a Pending event */
1092 	if (!pending_handled)
1093 		ice_lldp_execute_pending_mib(&pf->hw);
1094 }
1095