xref: /linux/drivers/net/ethernet/intel/ice/ice_lag.c (revision 34138ea02a608dc39f04e364f3249e12f6002bad)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (C) 2018-2021, Intel Corporation. */
3 
4 /* Link Aggregation code */
5 
6 #include "ice.h"
7 #include "ice_lib.h"
8 #include "ice_lag.h"
9 
10 #define ICE_LAG_RES_SHARED	BIT(14)
11 #define ICE_LAG_RES_VALID	BIT(15)
12 
13 #define ICE_TRAIN_PKT_LEN		16
14 static const u8 lacp_train_pkt[ICE_TRAIN_PKT_LEN] = { 0, 0, 0, 0, 0, 0,
15 						      0, 0, 0, 0, 0, 0,
16 						      0x88, 0x09, 0, 0 };
17 static const u8 act_act_train_pkt[ICE_TRAIN_PKT_LEN] = { 0, 0, 0, 0, 0, 0,
18 							 0, 0, 0, 0, 0, 0,
19 							 0, 0, 0, 0 };
20 
21 #define ICE_RECIPE_LEN			64
22 #define ICE_LAG_SRIOV_CP_RECIPE		10
23 
24 static const u8 ice_dflt_vsi_rcp[ICE_RECIPE_LEN] = {
25 	0x05, 0, 0, 0, 0x20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26 	0x85, 0, 0x01, 0, 0, 0, 0xff, 0xff, 0x08, 0, 0, 0, 0, 0, 0, 0,
27 	0, 0, 0, 0, 0, 0, 0x30 };
28 static const u8 ice_lport_rcp[ICE_RECIPE_LEN] = {
29 	0x05, 0, 0, 0, 0x20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30 	0x85, 0, 0x16, 0, 0, 0, 0xff, 0xff, 0x07, 0, 0, 0, 0, 0, 0, 0,
31 	0, 0, 0, 0, 0, 0, 0x30 };
32 
33 /**
34  * ice_lag_set_primary - set PF LAG state as Primary
35  * @lag: LAG info struct
36  */
37 static void ice_lag_set_primary(struct ice_lag *lag)
38 {
39 	struct ice_pf *pf = lag->pf;
40 
41 	if (!pf)
42 		return;
43 
44 	if (lag->role != ICE_LAG_UNSET && lag->role != ICE_LAG_BACKUP) {
45 		dev_warn(ice_pf_to_dev(pf), "%s: Attempt to be Primary, but incompatible state.\n",
46 			 netdev_name(lag->netdev));
47 		return;
48 	}
49 
50 	lag->role = ICE_LAG_PRIMARY;
51 }
52 
53 /**
54  * ice_lag_set_bkup - set PF LAG state to Backup
55  * @lag: LAG info struct
56  */
57 static void ice_lag_set_bkup(struct ice_lag *lag)
58 {
59 	struct ice_pf *pf = lag->pf;
60 
61 	if (!pf)
62 		return;
63 
64 	if (lag->role != ICE_LAG_UNSET && lag->role != ICE_LAG_PRIMARY) {
65 		dev_dbg(ice_pf_to_dev(pf), "%s: Attempt to be Backup, but incompatible state\n",
66 			netdev_name(lag->netdev));
67 		return;
68 	}
69 
70 	lag->role = ICE_LAG_BACKUP;
71 }
72 
73 /**
74  * netif_is_same_ice - determine if netdev is on the same ice NIC as local PF
75  * @pf: local PF struct
76  * @netdev: netdev we are evaluating
77  */
78 static bool netif_is_same_ice(struct ice_pf *pf, struct net_device *netdev)
79 {
80 	struct ice_netdev_priv *np;
81 	struct ice_pf *test_pf;
82 	struct ice_vsi *vsi;
83 
84 	if (!netif_is_ice(netdev))
85 		return false;
86 
87 	np = netdev_priv(netdev);
88 	if (!np)
89 		return false;
90 
91 	vsi = np->vsi;
92 	if (!vsi)
93 		return false;
94 
95 	test_pf = vsi->back;
96 	if (!test_pf)
97 		return false;
98 
99 	if (pf->pdev->bus != test_pf->pdev->bus ||
100 	    pf->pdev->slot != test_pf->pdev->slot)
101 		return false;
102 
103 	return true;
104 }
105 
106 /**
107  * ice_lag_config_eswitch - configure eswitch to work with LAG
108  * @lag: lag info struct
109  * @netdev: active network interface device struct
110  *
111  * Updates all port representors in eswitch to use @netdev for Tx.
112  *
113  * Configures the netdev to keep dst metadata (also used in representor Tx).
114  * This is required for an uplink without switchdev mode configured.
115  */
116 static void ice_lag_config_eswitch(struct ice_lag *lag,
117 				   struct net_device *netdev)
118 {
119 	struct ice_repr *repr;
120 	unsigned long id;
121 
122 	xa_for_each(&lag->pf->eswitch.reprs, id, repr)
123 		repr->dst->u.port_info.lower_dev = netdev;
124 
125 	netif_keep_dst(netdev);
126 }
127 
128 /**
129  * ice_netdev_to_lag - return pointer to associated lag struct from netdev
130  * @netdev: pointer to net_device struct to query
131  */
132 static struct ice_lag *ice_netdev_to_lag(struct net_device *netdev)
133 {
134 	struct ice_netdev_priv *np;
135 	struct ice_vsi *vsi;
136 
137 	if (!netif_is_ice(netdev))
138 		return NULL;
139 
140 	np = netdev_priv(netdev);
141 	if (!np)
142 		return NULL;
143 
144 	vsi = np->vsi;
145 	if (!vsi)
146 		return NULL;
147 
148 	return vsi->back->lag;
149 }
150 
151 /**
152  * ice_lag_find_hw_by_lport - return an hw struct from bond members lport
153  * @lag: lag struct
154  * @lport: lport value to search for
155  */
156 static struct ice_hw *
157 ice_lag_find_hw_by_lport(struct ice_lag *lag, u8 lport)
158 {
159 	struct ice_lag_netdev_list *entry;
160 	struct net_device *tmp_netdev;
161 	struct ice_netdev_priv *np;
162 	struct ice_hw *hw;
163 
164 	list_for_each_entry(entry, lag->netdev_head, node) {
165 		tmp_netdev = entry->netdev;
166 		if (!tmp_netdev || !netif_is_ice(tmp_netdev))
167 			continue;
168 
169 		np = netdev_priv(tmp_netdev);
170 		if (!np || !np->vsi)
171 			continue;
172 
173 		hw = &np->vsi->back->hw;
174 		if (hw->port_info->lport == lport)
175 			return hw;
176 	}
177 
178 	return NULL;
179 }
180 
181 /**
182  * ice_pkg_has_lport_extract - check if lport extraction supported
183  * @hw: HW struct
184  */
185 static bool ice_pkg_has_lport_extract(struct ice_hw *hw)
186 {
187 	int i;
188 
189 	for (i = 0; i < hw->blk[ICE_BLK_SW].es.count; i++) {
190 		u16 offset;
191 		u8 fv_prot;
192 
193 		ice_find_prot_off(hw, ICE_BLK_SW, ICE_SW_DEFAULT_PROFILE, i,
194 				  &fv_prot, &offset);
195 		if (fv_prot == ICE_FV_PROT_MDID &&
196 		    offset == ICE_LP_EXT_BUF_OFFSET)
197 			return true;
198 	}
199 	return false;
200 }
201 
202 /**
203  * ice_lag_find_primary - returns pointer to primary interfaces lag struct
204  * @lag: local interfaces lag struct
205  */
206 static struct ice_lag *ice_lag_find_primary(struct ice_lag *lag)
207 {
208 	struct ice_lag *primary_lag = NULL;
209 	struct list_head *tmp;
210 
211 	list_for_each(tmp, lag->netdev_head) {
212 		struct ice_lag_netdev_list *entry;
213 		struct ice_lag *tmp_lag;
214 
215 		entry = list_entry(tmp, struct ice_lag_netdev_list, node);
216 		tmp_lag = ice_netdev_to_lag(entry->netdev);
217 		if (tmp_lag && tmp_lag->primary) {
218 			primary_lag = tmp_lag;
219 			break;
220 		}
221 	}
222 
223 	return primary_lag;
224 }
225 
226 /**
227  * ice_lag_cfg_fltr - Add/Remove rule for LAG
228  * @lag: lag struct for local interface
229  * @act: rule action
230  * @recipe_id: recipe id for the new rule
231  * @rule_idx: pointer to rule index
232  * @direction: ICE_FLTR_RX or ICE_FLTR_TX
233  * @add: boolean on whether we are adding filters
234  */
235 static int
236 ice_lag_cfg_fltr(struct ice_lag *lag, u32 act, u16 recipe_id, u16 *rule_idx,
237 		 u8 direction, bool add)
238 {
239 	struct ice_sw_rule_lkup_rx_tx *s_rule;
240 	struct ice_hw *hw = &lag->pf->hw;
241 	u16 s_rule_sz, vsi_num;
242 	u8 *eth_hdr;
243 	u32 opc;
244 	int err;
245 
246 	vsi_num = ice_get_hw_vsi_num(hw, 0);
247 
248 	s_rule_sz = ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule);
249 	s_rule = kzalloc(s_rule_sz, GFP_KERNEL);
250 	if (!s_rule) {
251 		dev_err(ice_pf_to_dev(lag->pf), "error allocating rule for LAG\n");
252 		return -ENOMEM;
253 	}
254 
255 	if (add) {
256 		eth_hdr = s_rule->hdr_data;
257 		ice_fill_eth_hdr(eth_hdr);
258 
259 		act |= FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M, vsi_num);
260 
261 		s_rule->recipe_id = cpu_to_le16(recipe_id);
262 		if (direction == ICE_FLTR_RX) {
263 			s_rule->hdr.type =
264 				cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX);
265 			s_rule->src = cpu_to_le16(hw->port_info->lport);
266 		} else {
267 			s_rule->hdr.type =
268 				cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX);
269 			s_rule->src = cpu_to_le16(vsi_num);
270 		}
271 		s_rule->act = cpu_to_le32(act);
272 		s_rule->hdr_len = cpu_to_le16(DUMMY_ETH_HDR_LEN);
273 		opc = ice_aqc_opc_add_sw_rules;
274 	} else {
275 		s_rule->index = cpu_to_le16(*rule_idx);
276 		opc = ice_aqc_opc_remove_sw_rules;
277 	}
278 
279 	err = ice_aq_sw_rules(&lag->pf->hw, s_rule, s_rule_sz, 1, opc, NULL);
280 	if (err)
281 		goto dflt_fltr_free;
282 
283 	if (add)
284 		*rule_idx = le16_to_cpu(s_rule->index);
285 	else
286 		*rule_idx = 0;
287 
288 dflt_fltr_free:
289 	kfree(s_rule);
290 	return err;
291 }
292 
293 /**
294  * ice_lag_cfg_dflt_fltr - Add/Remove default VSI rule for LAG
295  * @lag: lag struct for local interface
296  * @add: boolean on whether to add filter
297  */
298 static int
299 ice_lag_cfg_dflt_fltr(struct ice_lag *lag, bool add)
300 {
301 	u32 act = ICE_SINGLE_ACT_VSI_FORWARDING |
302 		ICE_SINGLE_ACT_VALID_BIT | ICE_SINGLE_ACT_LAN_ENABLE;
303 	int err;
304 
305 	err = ice_lag_cfg_fltr(lag, act, lag->pf_recipe, &lag->pf_rx_rule_id,
306 			       ICE_FLTR_RX, add);
307 	if (err)
308 		goto err_rx;
309 
310 	act = ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT |
311 	      ICE_SINGLE_ACT_LB_ENABLE;
312 	err = ice_lag_cfg_fltr(lag, act, lag->pf_recipe, &lag->pf_tx_rule_id,
313 			       ICE_FLTR_TX, add);
314 	if (err)
315 		goto err_tx;
316 
317 	return 0;
318 
319 err_tx:
320 	ice_lag_cfg_fltr(lag, act, lag->pf_recipe, &lag->pf_rx_rule_id,
321 			 ICE_FLTR_RX, !add);
322 err_rx:
323 	return err;
324 }
325 
326 /**
327  * ice_lag_cfg_drop_fltr - Add/Remove lport drop rule
328  * @lag: lag struct for local interface
329  * @add: boolean on whether to add filter
330  */
331 static int
332 ice_lag_cfg_drop_fltr(struct ice_lag *lag, bool add)
333 {
334 	u32 act = ICE_SINGLE_ACT_VSI_FORWARDING |
335 		  ICE_SINGLE_ACT_VALID_BIT |
336 		  ICE_SINGLE_ACT_DROP;
337 
338 	return ice_lag_cfg_fltr(lag, act, lag->lport_recipe,
339 				&lag->lport_rule_idx, ICE_FLTR_RX, add);
340 }
341 
342 /**
343  * ice_lag_cfg_pf_fltrs_act_bkup - set filters up for new active port
344  * @lag: local interfaces lag struct
345  * @bonding_info: netdev event bonding info
346  */
347 static void
348 ice_lag_cfg_pf_fltrs_act_bkup(struct ice_lag *lag,
349 			      struct netdev_bonding_info *bonding_info)
350 {
351 	struct device *dev = ice_pf_to_dev(lag->pf);
352 
353 	/* interface not active - remove old default VSI rule */
354 	if (bonding_info->slave.state && lag->pf_rx_rule_id) {
355 		if (ice_lag_cfg_dflt_fltr(lag, false))
356 			dev_err(dev, "Error removing old default VSI filter\n");
357 		if (ice_lag_cfg_drop_fltr(lag, true))
358 			dev_err(dev, "Error adding new drop filter\n");
359 		return;
360 	}
361 
362 	/* interface becoming active - add new default VSI rule */
363 	if (!bonding_info->slave.state && !lag->pf_rx_rule_id) {
364 		if (ice_lag_cfg_dflt_fltr(lag, true))
365 			dev_err(dev, "Error adding new default VSI filter\n");
366 		if (lag->lport_rule_idx && ice_lag_cfg_drop_fltr(lag, false))
367 			dev_err(dev, "Error removing old drop filter\n");
368 	}
369 }
370 
371 /**
372  * ice_lag_cfg_lp_fltr - configure lport filters
373  * @lag: local interface's lag struct
374  * @add: add or remove rule
375  * @cp: control packet only or general PF lport rule
376  */
377 static void
378 ice_lag_cfg_lp_fltr(struct ice_lag *lag, bool add, bool cp)
379 {
380 	struct ice_sw_rule_lkup_rx_tx *s_rule;
381 	struct ice_vsi *vsi = lag->pf->vsi[0];
382 	u16 buf_len, opc;
383 
384 	buf_len = ICE_SW_RULE_RX_TX_HDR_SIZE(s_rule, ICE_TRAIN_PKT_LEN);
385 	s_rule = kzalloc(buf_len, GFP_KERNEL);
386 	if (!s_rule) {
387 		netdev_warn(lag->netdev, "-ENOMEM error configuring CP filter\n");
388 		return;
389 	}
390 
391 	if (add) {
392 		if (cp) {
393 			s_rule->recipe_id =
394 				cpu_to_le16(ICE_LAG_SRIOV_CP_RECIPE);
395 			memcpy(s_rule->hdr_data, lacp_train_pkt,
396 			       ICE_TRAIN_PKT_LEN);
397 		} else {
398 			s_rule->recipe_id = cpu_to_le16(lag->act_act_recipe);
399 			memcpy(s_rule->hdr_data, act_act_train_pkt,
400 			       ICE_TRAIN_PKT_LEN);
401 		}
402 
403 		s_rule->src = cpu_to_le16(vsi->port_info->lport);
404 		s_rule->act = cpu_to_le32(ICE_FWD_TO_VSI |
405 					  ICE_SINGLE_ACT_LAN_ENABLE |
406 					  ICE_SINGLE_ACT_VALID_BIT |
407 					  FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M,
408 						     vsi->vsi_num));
409 		s_rule->hdr_len = cpu_to_le16(ICE_TRAIN_PKT_LEN);
410 		s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX);
411 		opc = ice_aqc_opc_add_sw_rules;
412 	} else {
413 		opc = ice_aqc_opc_remove_sw_rules;
414 		if (cp)
415 			s_rule->index = cpu_to_le16(lag->cp_rule_idx);
416 		else
417 			s_rule->index = cpu_to_le16(lag->act_act_rule_idx);
418 	}
419 	if (ice_aq_sw_rules(&lag->pf->hw, s_rule, buf_len, 1, opc, NULL)) {
420 		netdev_warn(lag->netdev, "Error %s %s rule for aggregate\n",
421 			    add ? "ADDING" : "REMOVING",
422 			    cp ? "CONTROL PACKET" : "LPORT");
423 		goto err_cp_free;
424 	}
425 
426 	if (add) {
427 		if (cp)
428 			lag->cp_rule_idx = le16_to_cpu(s_rule->index);
429 		else
430 			lag->act_act_rule_idx = le16_to_cpu(s_rule->index);
431 	} else {
432 		if (cp)
433 			lag->cp_rule_idx = 0;
434 		else
435 			lag->act_act_rule_idx = 0;
436 	}
437 
438 err_cp_free:
439 	kfree(s_rule);
440 }
441 
442 /**
443  * ice_lag_cfg_pf_fltrs - set filters up for PF traffic
444  * @lag: local interfaces lag struct
445  * @ptr: opaque data containing notifier event
446  */
447 static void
448 ice_lag_cfg_pf_fltrs(struct ice_lag *lag, void *ptr)
449 {
450 	struct netdev_notifier_bonding_info *info = ptr;
451 	struct netdev_bonding_info *bonding_info;
452 	struct net_device *event_netdev;
453 
454 	event_netdev = netdev_notifier_info_to_dev(ptr);
455 	if (event_netdev != lag->netdev)
456 		return;
457 
458 	bonding_info = &info->bonding_info;
459 
460 	if (lag->bond_aa) {
461 		if (lag->need_fltr_cfg) {
462 			ice_lag_cfg_lp_fltr(lag, true, false);
463 			lag->need_fltr_cfg = false;
464 		}
465 	} else {
466 		ice_lag_cfg_pf_fltrs_act_bkup(lag, bonding_info);
467 	}
468 }
469 
470 /**
471  * ice_display_lag_info - print LAG info
472  * @lag: LAG info struct
473  */
474 static void ice_display_lag_info(struct ice_lag *lag)
475 {
476 	const char *name, *upper, *role, *bonded, *primary;
477 	struct device *dev = &lag->pf->pdev->dev;
478 
479 	name = lag->netdev ? netdev_name(lag->netdev) : "unset";
480 	upper = lag->upper_netdev ? netdev_name(lag->upper_netdev) : "unset";
481 	primary = lag->primary ? "TRUE" : "FALSE";
482 	bonded = lag->bonded ? "BONDED" : "UNBONDED";
483 
484 	switch (lag->role) {
485 	case ICE_LAG_NONE:
486 		role = "NONE";
487 		break;
488 	case ICE_LAG_PRIMARY:
489 		role = "PRIMARY";
490 		break;
491 	case ICE_LAG_BACKUP:
492 		role = "BACKUP";
493 		break;
494 	case ICE_LAG_UNSET:
495 		role = "UNSET";
496 		break;
497 	default:
498 		role = "ERROR";
499 	}
500 
501 	dev_dbg(dev, "%s %s, upper:%s, role:%s, primary:%s\n", name, bonded,
502 		upper, role, primary);
503 }
504 
505 /**
506  * ice_lag_qbuf_recfg - generate a buffer of queues for a reconfigure command
507  * @hw: HW struct that contains the queue contexts
508  * @qbuf: pointer to buffer to populate
509  * @vsi_num: index of the VSI in PF space
510  * @numq: number of queues to search for
511  * @tc: traffic class that contains the queues
512  *
513  * function returns the number of valid queues in buffer
514  */
515 static u16
516 ice_lag_qbuf_recfg(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *qbuf,
517 		   u16 vsi_num, u16 numq, u8 tc)
518 {
519 	struct ice_pf *pf = hw->back;
520 	struct ice_q_ctx *q_ctx;
521 	u16 qid, count = 0;
522 	int i;
523 
524 	for (i = 0; i < numq; i++) {
525 		q_ctx = ice_get_lan_q_ctx(hw, vsi_num, tc, i);
526 		if (!q_ctx) {
527 			dev_dbg(ice_hw_to_dev(hw), "%s queue %d NO Q CONTEXT\n",
528 				__func__, i);
529 			continue;
530 		}
531 		if (q_ctx->q_teid == ICE_INVAL_TEID) {
532 			dev_dbg(ice_hw_to_dev(hw), "%s queue %d INVAL TEID\n",
533 				__func__, i);
534 			continue;
535 		}
536 		if (q_ctx->q_handle == ICE_INVAL_Q_HANDLE) {
537 			dev_dbg(ice_hw_to_dev(hw), "%s queue %d INVAL Q HANDLE\n",
538 				__func__, i);
539 			continue;
540 		}
541 
542 		qid = pf->vsi[vsi_num]->txq_map[q_ctx->q_handle];
543 		qbuf->queue_info[count].q_handle = cpu_to_le16(qid);
544 		qbuf->queue_info[count].tc = tc;
545 		qbuf->queue_info[count].q_teid = cpu_to_le32(q_ctx->q_teid);
546 		count++;
547 	}
548 
549 	return count;
550 }
551 
552 /**
553  * ice_lag_get_sched_parent - locate or create a sched node parent
554  * @hw: HW struct for getting parent in
555  * @tc: traffic class on parent/node
556  */
557 static struct ice_sched_node *
558 ice_lag_get_sched_parent(struct ice_hw *hw, u8 tc)
559 {
560 	struct ice_sched_node *tc_node, *aggnode, *parent = NULL;
561 	u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
562 	struct ice_port_info *pi = hw->port_info;
563 	struct device *dev;
564 	u8 aggl, vsil;
565 	int n;
566 
567 	dev = ice_hw_to_dev(hw);
568 
569 	tc_node = ice_sched_get_tc_node(pi, tc);
570 	if (!tc_node) {
571 		dev_warn(dev, "Failure to find TC node for LAG move\n");
572 		return parent;
573 	}
574 
575 	aggnode = ice_sched_get_agg_node(pi, tc_node, ICE_DFLT_AGG_ID);
576 	if (!aggnode) {
577 		dev_warn(dev, "Failure to find aggregate node for LAG move\n");
578 		return parent;
579 	}
580 
581 	aggl = ice_sched_get_agg_layer(hw);
582 	vsil = ice_sched_get_vsi_layer(hw);
583 
584 	for (n = aggl + 1; n < vsil; n++)
585 		num_nodes[n] = 1;
586 
587 	for (n = 0; n < aggnode->num_children; n++) {
588 		parent = ice_sched_get_free_vsi_parent(hw, aggnode->children[n],
589 						       num_nodes);
590 		if (parent)
591 			return parent;
592 	}
593 
594 	/* if free parent not found - add one */
595 	parent = aggnode;
596 	for (n = aggl + 1; n < vsil; n++) {
597 		u16 num_nodes_added;
598 		u32 first_teid;
599 		int err;
600 
601 		err = ice_sched_add_nodes_to_layer(pi, tc_node, parent, n,
602 						   num_nodes[n], &first_teid,
603 						   &num_nodes_added);
604 		if (err || num_nodes[n] != num_nodes_added)
605 			return NULL;
606 
607 		if (num_nodes_added)
608 			parent = ice_sched_find_node_by_teid(tc_node,
609 							     first_teid);
610 		else
611 			parent = parent->children[0];
612 		if (!parent) {
613 			dev_warn(dev, "Failure to add new parent for LAG move\n");
614 			return parent;
615 		}
616 	}
617 
618 	return parent;
619 }
620 
621 /**
622  * ice_lag_move_vf_node_tc - move scheduling nodes for one VF on one TC
623  * @lag: lag info struct
624  * @oldport: lport of previous nodes location
625  * @newport: lport of destination nodes location
626  * @vsi_num: array index of VSI in PF space
627  * @tc: traffic class to move
628  */
629 static void
630 ice_lag_move_vf_node_tc(struct ice_lag *lag, u8 oldport, u8 newport,
631 			u16 vsi_num, u8 tc)
632 {
633 	DEFINE_RAW_FLEX(struct ice_aqc_move_elem, buf, teid, 1);
634 	struct device *dev = ice_pf_to_dev(lag->pf);
635 	u16 numq, valq, num_moved, qbuf_size;
636 	u16 buf_size = __struct_size(buf);
637 	struct ice_aqc_cfg_txqs_buf *qbuf;
638 	struct ice_sched_node *n_prt;
639 	struct ice_hw *new_hw = NULL;
640 	__le32 teid, parent_teid;
641 	struct ice_vsi_ctx *ctx;
642 	u32 tmp_teid;
643 
644 	ctx = ice_get_vsi_ctx(&lag->pf->hw, vsi_num);
645 	if (!ctx) {
646 		dev_warn(dev, "Unable to locate VSI context for LAG failover\n");
647 		return;
648 	}
649 
650 	/* check to see if this VF is enabled on this TC */
651 	if (!ctx->sched.vsi_node[tc])
652 		return;
653 
654 	/* locate HW struct for destination port */
655 	new_hw = ice_lag_find_hw_by_lport(lag, newport);
656 	if (!new_hw) {
657 		dev_warn(dev, "Unable to locate HW struct for LAG node destination\n");
658 		return;
659 	}
660 
661 	numq = ctx->num_lan_q_entries[tc];
662 	teid = ctx->sched.vsi_node[tc]->info.node_teid;
663 	tmp_teid = le32_to_cpu(teid);
664 	parent_teid = ctx->sched.vsi_node[tc]->info.parent_teid;
665 	/* if no teid assigned or numq == 0, then this TC is not active */
666 	if (!tmp_teid || !numq)
667 		return;
668 
669 	/* suspend VSI subtree for Traffic Class "tc" on
670 	 * this VF's VSI
671 	 */
672 	if (ice_sched_suspend_resume_elems(&lag->pf->hw, 1, &tmp_teid, true))
673 		dev_dbg(dev, "Problem suspending traffic for LAG node move\n");
674 
675 	/* reconfigure all VF's queues on this Traffic Class
676 	 * to new port
677 	 */
678 	qbuf_size = struct_size(qbuf, queue_info, numq);
679 	qbuf = kzalloc(qbuf_size, GFP_KERNEL);
680 	if (!qbuf) {
681 		dev_warn(dev, "Failure allocating memory for VF queue recfg buffer\n");
682 		goto resume_traffic;
683 	}
684 
685 	/* add the per queue info for the reconfigure command buffer */
686 	valq = ice_lag_qbuf_recfg(&lag->pf->hw, qbuf, vsi_num, numq, tc);
687 	if (!valq) {
688 		dev_dbg(dev, "No valid queues found for LAG failover\n");
689 		goto qbuf_none;
690 	}
691 
692 	if (ice_aq_cfg_lan_txq(&lag->pf->hw, qbuf, qbuf_size, valq, oldport,
693 			       newport, ICE_AQC_Q_CFG_TC_CHNG, NULL)) {
694 		dev_warn(dev, "Failure to configure queues for LAG failover\n");
695 		goto qbuf_err;
696 	}
697 
698 qbuf_none:
699 	kfree(qbuf);
700 
701 	/* find new parent in destination port's tree for VF VSI node on this
702 	 * Traffic Class
703 	 */
704 	n_prt = ice_lag_get_sched_parent(new_hw, tc);
705 	if (!n_prt)
706 		goto resume_traffic;
707 
708 	/* Move Vf's VSI node for this TC to newport's scheduler tree */
709 	buf->hdr.src_parent_teid = parent_teid;
710 	buf->hdr.dest_parent_teid = n_prt->info.node_teid;
711 	buf->hdr.num_elems = cpu_to_le16(1);
712 	buf->hdr.mode = ICE_AQC_MOVE_ELEM_MODE_KEEP_OWN;
713 	buf->teid[0] = teid;
714 
715 	if (ice_aq_move_sched_elems(&lag->pf->hw, buf, buf_size, &num_moved))
716 		dev_warn(dev, "Failure to move VF nodes for failover\n");
717 	else
718 		ice_sched_update_parent(n_prt, ctx->sched.vsi_node[tc]);
719 
720 	goto resume_traffic;
721 
722 qbuf_err:
723 	kfree(qbuf);
724 
725 resume_traffic:
726 	/* restart traffic for VSI node */
727 	if (ice_sched_suspend_resume_elems(&lag->pf->hw, 1, &tmp_teid, false))
728 		dev_dbg(dev, "Problem restarting traffic for LAG node move\n");
729 }
730 
731 /**
732  * ice_lag_build_netdev_list - populate the lag struct's netdev list
733  * @lag: local lag struct
734  * @ndlist: pointer to netdev list to populate
735  */
736 static void ice_lag_build_netdev_list(struct ice_lag *lag,
737 				      struct ice_lag_netdev_list *ndlist)
738 {
739 	struct ice_lag_netdev_list *nl;
740 	struct net_device *tmp_nd;
741 
742 	INIT_LIST_HEAD(&ndlist->node);
743 	rcu_read_lock();
744 	for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) {
745 		nl = kzalloc(sizeof(*nl), GFP_ATOMIC);
746 		if (!nl)
747 			break;
748 
749 		nl->netdev = tmp_nd;
750 		list_add(&nl->node, &ndlist->node);
751 	}
752 	rcu_read_unlock();
753 	lag->netdev_head = &ndlist->node;
754 }
755 
756 /**
757  * ice_lag_destroy_netdev_list - free lag struct's netdev list
758  * @lag: pointer to local lag struct
759  * @ndlist: pointer to lag struct netdev list
760  */
761 static void ice_lag_destroy_netdev_list(struct ice_lag *lag,
762 					struct ice_lag_netdev_list *ndlist)
763 {
764 	struct ice_lag_netdev_list *entry, *n;
765 
766 	rcu_read_lock();
767 	list_for_each_entry_safe(entry, n, &ndlist->node, node) {
768 		list_del(&entry->node);
769 		kfree(entry);
770 	}
771 	rcu_read_unlock();
772 	lag->netdev_head = NULL;
773 }
774 
775 /**
776  * ice_lag_move_single_vf_nodes - Move Tx scheduling nodes for single VF
777  * @lag: primary interface LAG struct
778  * @oldport: lport of previous interface
779  * @newport: lport of destination interface
780  * @vsi_num: SW index of VF's VSI
781  */
782 static void
783 ice_lag_move_single_vf_nodes(struct ice_lag *lag, u8 oldport, u8 newport,
784 			     u16 vsi_num)
785 {
786 	u8 tc;
787 
788 	ice_for_each_traffic_class(tc)
789 		ice_lag_move_vf_node_tc(lag, oldport, newport, vsi_num, tc);
790 }
791 
792 /**
793  * ice_lag_move_vf_nodes - move Tx scheduling nodes for all VFs to new port
794  * @lag: lag info struct
795  * @oldport: lport of previous interface
796  * @newport: lport of destination interface
797  */
798 static void ice_lag_move_vf_nodes(struct ice_lag *lag, u8 oldport, u8 newport)
799 {
800 	struct ice_pf *pf;
801 	int i;
802 
803 	if (!lag->primary)
804 		return;
805 
806 	pf = lag->pf;
807 	ice_for_each_vsi(pf, i)
808 		if (pf->vsi[i] && pf->vsi[i]->type == ICE_VSI_VF)
809 			ice_lag_move_single_vf_nodes(lag, oldport, newport, i);
810 }
811 
812 /**
813  * ice_lag_move_vf_nodes_cfg - move vf nodes outside LAG netdev event context
814  * @lag: local lag struct
815  * @src_prt: lport value for source port
816  * @dst_prt: lport value for destination port
817  *
818  * This function is used to move nodes during an out-of-netdev-event situation,
819  * primarily when the driver needs to reconfigure or recreate resources.
820  *
821  * Must be called while holding the lag_mutex to avoid lag events from
822  * processing while out-of-sync moves are happening.  Also, paired moves,
823  * such as used in a reset flow, should both be called under the same mutex
824  * lock to avoid changes between start of reset and end of reset.
825  */
826 void ice_lag_move_vf_nodes_cfg(struct ice_lag *lag, u8 src_prt, u8 dst_prt)
827 {
828 	struct ice_lag_netdev_list ndlist;
829 
830 	ice_lag_build_netdev_list(lag, &ndlist);
831 	ice_lag_move_vf_nodes(lag, src_prt, dst_prt);
832 	ice_lag_destroy_netdev_list(lag, &ndlist);
833 }
834 
835 /**
836  * ice_lag_prepare_vf_reset - helper to adjust vf lag for reset
837  * @lag: lag struct for interface that owns VF
838  *
839  * Context: must be called with the lag_mutex lock held.
840  *
841  * Return: active lport value or ICE_LAG_INVALID_PORT if nothing moved.
842  */
843 u8 ice_lag_prepare_vf_reset(struct ice_lag *lag)
844 {
845 	u8 pri_prt, act_prt;
846 
847 	if (lag && lag->bonded && lag->primary && lag->upper_netdev) {
848 		if (!lag->bond_aa) {
849 			pri_prt = lag->pf->hw.port_info->lport;
850 			act_prt = lag->active_port;
851 			if (act_prt != pri_prt &&
852 			    act_prt != ICE_LAG_INVALID_PORT) {
853 				ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt);
854 				return act_prt;
855 			}
856 		} else {
857 			if (lag->port_bitmap & ICE_LAGS_M) {
858 				lag->port_bitmap &= ~ICE_LAGS_M;
859 				ice_lag_aa_failover(lag, ICE_LAGP_IDX, NULL);
860 				lag->port_bitmap |= ICE_LAGS_M;
861 			}
862 		}
863 	}
864 
865 	return ICE_LAG_INVALID_PORT;
866 }
867 
868 /**
869  * ice_lag_complete_vf_reset - helper for lag after reset
870  * @lag: lag struct for primary interface
871  * @act_prt: which port should be active for lag
872  *
873  * Context: must be called while holding the lag_mutex.
874  */
875 void ice_lag_complete_vf_reset(struct ice_lag *lag, u8 act_prt)
876 {
877 	u8 pri_prt;
878 
879 	if (lag && lag->bonded && lag->primary) {
880 		if (!lag->bond_aa) {
881 			pri_prt = lag->pf->hw.port_info->lport;
882 			if (act_prt != ICE_LAG_INVALID_PORT)
883 				ice_lag_move_vf_nodes_cfg(lag, pri_prt,
884 							  act_prt);
885 		} else {
886 			ice_lag_aa_failover(lag, ICE_LAGS_IDX, NULL);
887 		}
888 	}
889 }
890 
891 /**
892  * ice_lag_info_event - handle NETDEV_BONDING_INFO event
893  * @lag: LAG info struct
894  * @ptr: opaque data pointer
895  *
896  * ptr is to be cast to (netdev_notifier_bonding_info *)
897  */
898 static void ice_lag_info_event(struct ice_lag *lag, void *ptr)
899 {
900 	struct netdev_notifier_bonding_info *info = ptr;
901 	struct netdev_bonding_info *bonding_info;
902 	struct net_device *event_netdev;
903 	const char *lag_netdev_name;
904 
905 	event_netdev = netdev_notifier_info_to_dev(ptr);
906 	lag_netdev_name = netdev_name(lag->netdev);
907 	bonding_info = &info->bonding_info;
908 
909 	if (event_netdev != lag->netdev || !lag->bonded || !lag->upper_netdev)
910 		return;
911 
912 	if (bonding_info->master.bond_mode != BOND_MODE_ACTIVEBACKUP) {
913 		netdev_dbg(lag->netdev, "Bonding event recv, but mode not active/backup\n");
914 		goto lag_out;
915 	}
916 
917 	if (strcmp(bonding_info->slave.slave_name, lag_netdev_name)) {
918 		netdev_dbg(lag->netdev, "Bonding event recv, but secondary info not for us\n");
919 		goto lag_out;
920 	}
921 
922 	if (bonding_info->slave.state)
923 		ice_lag_set_bkup(lag);
924 	else
925 		ice_lag_set_primary(lag);
926 
927 lag_out:
928 	ice_display_lag_info(lag);
929 }
930 
931 /**
932  * ice_lag_aa_qbuf_recfg - fill a single queue buffer for recfg cmd
933  * @hw: HW struct that contains the queue context
934  * @qbuf: pointer to single queue buffer
935  * @vsi_num: index of the VF VSI in PF space
936  * @qnum: queue index
937  *
938  * Return: Zero on success, error code on failure.
939  */
940 static int
941 ice_lag_aa_qbuf_recfg(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *qbuf,
942 		      u16 vsi_num, int qnum)
943 {
944 	struct ice_pf *pf = hw->back;
945 	struct ice_q_ctx *q_ctx;
946 	u16 q_id;
947 
948 	q_ctx = ice_get_lan_q_ctx(hw, vsi_num, 0, qnum);
949 	if (!q_ctx) {
950 		dev_dbg(ice_hw_to_dev(hw), "LAG queue %d no Q context\n", qnum);
951 		return -ENOENT;
952 	}
953 
954 	if (q_ctx->q_teid == ICE_INVAL_TEID) {
955 		dev_dbg(ice_hw_to_dev(hw), "LAG queue %d INVAL TEID\n", qnum);
956 		return -EINVAL;
957 	}
958 
959 	if (q_ctx->q_handle == ICE_INVAL_Q_HANDLE) {
960 		dev_dbg(ice_hw_to_dev(hw), "LAG queue %d INVAL Q HANDLE\n", qnum);
961 		return -EINVAL;
962 	}
963 
964 	q_id = pf->vsi[vsi_num]->txq_map[q_ctx->q_handle];
965 	qbuf->queue_info[0].q_handle = cpu_to_le16(q_id);
966 	qbuf->queue_info[0].tc = 0;
967 	qbuf->queue_info[0].q_teid = cpu_to_le32(q_ctx->q_teid);
968 
969 	return 0;
970 }
971 
972 /**
973  * ice_lag_aa_move_vf_qs - Move some/all VF queues to destination
974  * @lag: primary interface's lag struct
975  * @dest: index of destination port
976  * @vsi_num: index of VF VSI in PF space
977  * @all: if true move all queues to destination
978  * @odd: VF wide q indicator for odd/even
979  * @e_pf: PF struct for the event interface
980  *
981  * the parameter "all" is to control whether we are splitting the queues
982  * between two interfaces or moving them all to the destination interface
983  */
984 static void ice_lag_aa_move_vf_qs(struct ice_lag *lag, u8 dest, u16 vsi_num,
985 				  bool all, bool *odd, struct ice_pf *e_pf)
986 {
987 	DEFINE_RAW_FLEX(struct ice_aqc_cfg_txqs_buf, qbuf, queue_info, 1);
988 	struct ice_hw *old_hw, *new_hw, *pri_hw, *sec_hw;
989 	struct device *dev = ice_pf_to_dev(lag->pf);
990 	struct ice_vsi_ctx *pv_ctx, *sv_ctx;
991 	struct ice_lag_netdev_list ndlist;
992 	u16 num_q, qbuf_size, sec_vsi_num;
993 	u8 pri_lport, sec_lport;
994 	u32 pvf_teid, svf_teid;
995 	u16 vf_id;
996 
997 	vf_id = lag->pf->vsi[vsi_num]->vf->vf_id;
998 	/* If sec_vf[] not defined, then no second interface to share with */
999 	if (lag->sec_vf[vf_id])
1000 		sec_vsi_num = lag->sec_vf[vf_id]->idx;
1001 	else
1002 		return;
1003 
1004 	pri_lport = lag->bond_lport_pri;
1005 	sec_lport = lag->bond_lport_sec;
1006 
1007 	if (pri_lport == ICE_LAG_INVALID_PORT ||
1008 	    sec_lport == ICE_LAG_INVALID_PORT)
1009 		return;
1010 
1011 	if (!e_pf)
1012 		ice_lag_build_netdev_list(lag, &ndlist);
1013 
1014 	pri_hw = &lag->pf->hw;
1015 	if (e_pf && lag->pf != e_pf)
1016 		sec_hw = &e_pf->hw;
1017 	else
1018 		sec_hw = ice_lag_find_hw_by_lport(lag, sec_lport);
1019 
1020 	if (!pri_hw || !sec_hw)
1021 		return;
1022 
1023 	if (dest == ICE_LAGP_IDX) {
1024 		struct ice_vsi *vsi;
1025 
1026 		vsi = ice_get_main_vsi(lag->pf);
1027 		if (!vsi)
1028 			return;
1029 
1030 		old_hw = sec_hw;
1031 		new_hw = pri_hw;
1032 		ice_lag_config_eswitch(lag, vsi->netdev);
1033 	} else {
1034 		struct ice_pf *sec_pf = sec_hw->back;
1035 		struct ice_vsi *vsi;
1036 
1037 		vsi = ice_get_main_vsi(sec_pf);
1038 		if (!vsi)
1039 			return;
1040 
1041 		old_hw = pri_hw;
1042 		new_hw = sec_hw;
1043 		ice_lag_config_eswitch(lag, vsi->netdev);
1044 	}
1045 
1046 	pv_ctx = ice_get_vsi_ctx(pri_hw, vsi_num);
1047 	if (!pv_ctx) {
1048 		dev_warn(dev, "Unable to locate primary VSI %d context for LAG failover\n",
1049 			 vsi_num);
1050 		return;
1051 	}
1052 
1053 	sv_ctx = ice_get_vsi_ctx(sec_hw, sec_vsi_num);
1054 	if (!sv_ctx) {
1055 		dev_warn(dev, "Unable to locate secondary VSI %d context for LAG failover\n",
1056 			 vsi_num);
1057 		return;
1058 	}
1059 
1060 	num_q = pv_ctx->num_lan_q_entries[0];
1061 	qbuf_size = __struct_size(qbuf);
1062 
1063 	/* Suspend traffic for primary VSI VF */
1064 	pvf_teid = le32_to_cpu(pv_ctx->sched.vsi_node[0]->info.node_teid);
1065 	ice_sched_suspend_resume_elems(pri_hw, 1, &pvf_teid, true);
1066 
1067 	/* Suspend traffic for secondary VSI VF */
1068 	svf_teid = le32_to_cpu(sv_ctx->sched.vsi_node[0]->info.node_teid);
1069 	ice_sched_suspend_resume_elems(sec_hw, 1, &svf_teid, true);
1070 
1071 	for (int i = 0; i < num_q; i++) {
1072 		struct ice_sched_node *n_prt, *q_node, *parent;
1073 		struct ice_port_info *pi, *new_pi;
1074 		struct ice_vsi_ctx *src_ctx;
1075 		struct ice_sched_node *p;
1076 		struct ice_q_ctx *q_ctx;
1077 		u16 dst_vsi_num;
1078 
1079 		pi = old_hw->port_info;
1080 		new_pi = new_hw->port_info;
1081 
1082 		*odd = !(*odd);
1083 		if ((dest == ICE_LAGP_IDX && *odd && !all) ||
1084 		    (dest == ICE_LAGS_IDX && !(*odd) && !all) ||
1085 		    lag->q_home[vf_id][i] == dest)
1086 			continue;
1087 
1088 		if (dest == ICE_LAGP_IDX)
1089 			dst_vsi_num = vsi_num;
1090 		else
1091 			dst_vsi_num = sec_vsi_num;
1092 
1093 		n_prt = ice_sched_get_free_qparent(new_hw->port_info,
1094 						   dst_vsi_num, 0,
1095 						   ICE_SCHED_NODE_OWNER_LAN);
1096 		if (!n_prt)
1097 			continue;
1098 
1099 		q_ctx = ice_get_lan_q_ctx(pri_hw, vsi_num, 0, i);
1100 		if (!q_ctx)
1101 			continue;
1102 
1103 		if (dest == ICE_LAGP_IDX)
1104 			src_ctx = sv_ctx;
1105 		else
1106 			src_ctx = pv_ctx;
1107 
1108 		q_node = ice_sched_find_node_by_teid(src_ctx->sched.vsi_node[0],
1109 						     q_ctx->q_teid);
1110 		if (!q_node)
1111 			continue;
1112 
1113 		qbuf->src_parent_teid = q_node->info.parent_teid;
1114 		qbuf->dst_parent_teid = n_prt->info.node_teid;
1115 
1116 		/* Move the node in the HW/FW */
1117 		if (ice_lag_aa_qbuf_recfg(pri_hw, qbuf, vsi_num, i))
1118 			continue;
1119 
1120 		if (dest == ICE_LAGP_IDX)
1121 			ice_aq_cfg_lan_txq(pri_hw, qbuf, qbuf_size, 1,
1122 					   sec_lport, pri_lport,
1123 					   ICE_AQC_Q_CFG_MOVE_TC_CHNG,
1124 					   NULL);
1125 		else
1126 			ice_aq_cfg_lan_txq(pri_hw, qbuf, qbuf_size, 1,
1127 					   pri_lport, sec_lport,
1128 					   ICE_AQC_Q_CFG_MOVE_TC_CHNG,
1129 					   NULL);
1130 
1131 		/* Move the node in the SW */
1132 		parent = q_node->parent;
1133 		if (!parent)
1134 			continue;
1135 
1136 		for (int n = 0; n < parent->num_children; n++) {
1137 			int j;
1138 
1139 			if (parent->children[n] != q_node)
1140 				continue;
1141 
1142 			for (j = n + 1; j < parent->num_children;
1143 			     j++) {
1144 				parent->children[j - 1] =
1145 					parent->children[j];
1146 			}
1147 			parent->children[j] = NULL;
1148 			parent->num_children--;
1149 			break;
1150 		}
1151 
1152 		p = pi->sib_head[0][q_node->tx_sched_layer];
1153 		while (p) {
1154 			if (p->sibling == q_node) {
1155 				p->sibling = q_node->sibling;
1156 				break;
1157 			}
1158 			p = p->sibling;
1159 		}
1160 
1161 		if (pi->sib_head[0][q_node->tx_sched_layer] == q_node)
1162 			pi->sib_head[0][q_node->tx_sched_layer] =
1163 				q_node->sibling;
1164 
1165 		q_node->parent = n_prt;
1166 		q_node->info.parent_teid = n_prt->info.node_teid;
1167 		q_node->sibling = NULL;
1168 		p = new_pi->sib_head[0][q_node->tx_sched_layer];
1169 		if (p) {
1170 			while (p) {
1171 				if (!p->sibling) {
1172 					p->sibling = q_node;
1173 					break;
1174 				}
1175 				p = p->sibling;
1176 			}
1177 		} else {
1178 			new_pi->sib_head[0][q_node->tx_sched_layer] =
1179 				q_node;
1180 		}
1181 
1182 		n_prt->children[n_prt->num_children++] = q_node;
1183 		lag->q_home[vf_id][i] = dest;
1184 	}
1185 
1186 	ice_sched_suspend_resume_elems(pri_hw, 1, &pvf_teid, false);
1187 	ice_sched_suspend_resume_elems(sec_hw, 1, &svf_teid, false);
1188 
1189 	if (!e_pf)
1190 		ice_lag_destroy_netdev_list(lag, &ndlist);
1191 }
1192 
1193 /**
1194  * ice_lag_aa_failover - move VF queues in A/A mode
1195  * @lag: primary lag struct
1196  * @dest: index of destination port
1197  * @e_pf: PF struct for event port
1198  */
1199 void ice_lag_aa_failover(struct ice_lag *lag, u8 dest, struct ice_pf *e_pf)
1200 {
1201 	bool odd = true, all = false;
1202 	int i;
1203 
1204 	/* Primary can be a target if down (cleanup), but secondary can't */
1205 	if (dest == ICE_LAGS_IDX && !(lag->port_bitmap & ICE_LAGS_M))
1206 		return;
1207 
1208 	/* Move all queues to a destination if only one port is active,
1209 	 * or no ports are active and dest is primary.
1210 	 */
1211 	if ((lag->port_bitmap ^ (ICE_LAGP_M | ICE_LAGS_M)) ||
1212 	    (!lag->port_bitmap && dest == ICE_LAGP_IDX))
1213 		all = true;
1214 
1215 	ice_for_each_vsi(lag->pf, i)
1216 		if (lag->pf->vsi[i] && lag->pf->vsi[i]->type == ICE_VSI_VF)
1217 			ice_lag_aa_move_vf_qs(lag, dest, i, all, &odd, e_pf);
1218 }
1219 
1220 /**
1221  * ice_lag_reclaim_vf_tc - move scheduling nodes back to primary interface
1222  * @lag: primary interface lag struct
1223  * @src_hw: HW struct current node location
1224  * @vsi_num: VSI index in PF space
1225  * @tc: traffic class to move
1226  */
1227 static void
1228 ice_lag_reclaim_vf_tc(struct ice_lag *lag, struct ice_hw *src_hw, u16 vsi_num,
1229 		      u8 tc)
1230 {
1231 	DEFINE_RAW_FLEX(struct ice_aqc_move_elem, buf, teid, 1);
1232 	struct device *dev = ice_pf_to_dev(lag->pf);
1233 	u16 numq, valq, num_moved, qbuf_size;
1234 	u16 buf_size = __struct_size(buf);
1235 	struct ice_aqc_cfg_txqs_buf *qbuf;
1236 	struct ice_hw *hw = &lag->pf->hw;
1237 	struct ice_sched_node *n_prt;
1238 	__le32 teid, parent_teid;
1239 	struct ice_vsi_ctx *ctx;
1240 	u32 tmp_teid;
1241 
1242 	ctx = ice_get_vsi_ctx(hw, vsi_num);
1243 	if (!ctx) {
1244 		dev_warn(dev, "Unable to locate VSI context for LAG reclaim\n");
1245 		return;
1246 	}
1247 
1248 	/* check to see if this VF is enabled on this TC */
1249 	if (!ctx->sched.vsi_node[tc])
1250 		return;
1251 
1252 	numq = ctx->num_lan_q_entries[tc];
1253 	teid = ctx->sched.vsi_node[tc]->info.node_teid;
1254 	tmp_teid = le32_to_cpu(teid);
1255 	parent_teid = ctx->sched.vsi_node[tc]->info.parent_teid;
1256 
1257 	/* if !teid or !numq, then this TC is not active */
1258 	if (!tmp_teid || !numq)
1259 		return;
1260 
1261 	/* suspend traffic */
1262 	if (ice_sched_suspend_resume_elems(hw, 1, &tmp_teid, true))
1263 		dev_dbg(dev, "Problem suspending traffic for LAG node move\n");
1264 
1265 	/* reconfig queues for new port */
1266 	qbuf_size = struct_size(qbuf, queue_info, numq);
1267 	qbuf = kzalloc(qbuf_size, GFP_KERNEL);
1268 	if (!qbuf) {
1269 		dev_warn(dev, "Failure allocating memory for VF queue recfg buffer\n");
1270 		goto resume_reclaim;
1271 	}
1272 
1273 	/* add the per queue info for the reconfigure command buffer */
1274 	valq = ice_lag_qbuf_recfg(hw, qbuf, vsi_num, numq, tc);
1275 	if (!valq) {
1276 		dev_dbg(dev, "No valid queues found for LAG reclaim\n");
1277 		goto reclaim_none;
1278 	}
1279 
1280 	if (ice_aq_cfg_lan_txq(hw, qbuf, qbuf_size, numq,
1281 			       src_hw->port_info->lport, hw->port_info->lport,
1282 			       ICE_AQC_Q_CFG_TC_CHNG, NULL)) {
1283 		dev_warn(dev, "Failure to configure queues for LAG failover\n");
1284 		goto reclaim_qerr;
1285 	}
1286 
1287 reclaim_none:
1288 	kfree(qbuf);
1289 
1290 	/* find parent in primary tree */
1291 	n_prt = ice_lag_get_sched_parent(hw, tc);
1292 	if (!n_prt)
1293 		goto resume_reclaim;
1294 
1295 	/* Move node to new parent */
1296 	buf->hdr.src_parent_teid = parent_teid;
1297 	buf->hdr.dest_parent_teid = n_prt->info.node_teid;
1298 	buf->hdr.num_elems = cpu_to_le16(1);
1299 	buf->hdr.mode = ICE_AQC_MOVE_ELEM_MODE_KEEP_OWN;
1300 	buf->teid[0] = teid;
1301 
1302 	if (ice_aq_move_sched_elems(&lag->pf->hw, buf, buf_size, &num_moved))
1303 		dev_warn(dev, "Failure to move VF nodes for LAG reclaim\n");
1304 	else
1305 		ice_sched_update_parent(n_prt, ctx->sched.vsi_node[tc]);
1306 
1307 	goto resume_reclaim;
1308 
1309 reclaim_qerr:
1310 	kfree(qbuf);
1311 
1312 resume_reclaim:
1313 	/* restart traffic */
1314 	if (ice_sched_suspend_resume_elems(hw, 1, &tmp_teid, false))
1315 		dev_warn(dev, "Problem restarting traffic for LAG node reclaim\n");
1316 }
1317 
1318 /**
1319  * ice_lag_reclaim_vf_nodes - When interface leaving bond primary reclaims nodes
1320  * @lag: primary interface lag struct
1321  * @src_hw: HW struct for current node location
1322  */
1323 static void
1324 ice_lag_reclaim_vf_nodes(struct ice_lag *lag, struct ice_hw *src_hw)
1325 {
1326 	struct ice_pf *pf;
1327 	int i, tc;
1328 
1329 	if (!lag->primary || !src_hw)
1330 		return;
1331 
1332 	pf = lag->pf;
1333 	ice_for_each_vsi(pf, i)
1334 		if (pf->vsi[i] && pf->vsi[i]->type == ICE_VSI_VF)
1335 			ice_for_each_traffic_class(tc)
1336 				ice_lag_reclaim_vf_tc(lag, src_hw, i, tc);
1337 }
1338 
1339 /**
1340  * ice_lag_link - handle LAG link event
1341  * @lag: LAG info struct
1342  */
1343 static void ice_lag_link(struct ice_lag *lag)
1344 {
1345 	struct ice_pf *pf = lag->pf;
1346 
1347 	if (lag->bonded)
1348 		dev_warn(ice_pf_to_dev(pf), "%s Already part of a bond\n",
1349 			 netdev_name(lag->netdev));
1350 
1351 	lag->bonded = true;
1352 	lag->role = ICE_LAG_UNSET;
1353 	lag->need_fltr_cfg = true;
1354 	netdev_info(lag->netdev, "Shared SR-IOV resources in bond are active\n");
1355 }
1356 
1357 /**
1358  * ice_lag_act_bkup_unlink - handle unlink event for A/B bond
1359  * @lag: LAG info struct
1360  */
1361 static void ice_lag_act_bkup_unlink(struct ice_lag *lag)
1362 {
1363 	u8 pri_port, act_port, loc_port;
1364 	struct ice_pf *pf = lag->pf;
1365 
1366 	if (!lag->bonded) {
1367 		netdev_dbg(lag->netdev, "bonding unlink event on non-LAG netdev\n");
1368 		return;
1369 	}
1370 
1371 	if (lag->primary) {
1372 		act_port = lag->active_port;
1373 		pri_port = lag->pf->hw.port_info->lport;
1374 		if (act_port != pri_port && act_port != ICE_LAG_INVALID_PORT)
1375 			ice_lag_move_vf_nodes(lag, act_port, pri_port);
1376 		lag->primary = false;
1377 		lag->active_port = ICE_LAG_INVALID_PORT;
1378 
1379 		/* Config primary's eswitch back to normal operation. */
1380 		ice_lag_config_eswitch(lag, lag->netdev);
1381 	} else {
1382 		struct ice_lag *primary_lag;
1383 
1384 		primary_lag = ice_lag_find_primary(lag);
1385 		if (primary_lag) {
1386 			act_port = primary_lag->active_port;
1387 			pri_port = primary_lag->pf->hw.port_info->lport;
1388 			loc_port = pf->hw.port_info->lport;
1389 			if (act_port == loc_port &&
1390 			    act_port != ICE_LAG_INVALID_PORT) {
1391 				ice_lag_reclaim_vf_nodes(primary_lag,
1392 							 &lag->pf->hw);
1393 				primary_lag->active_port = ICE_LAG_INVALID_PORT;
1394 			}
1395 		}
1396 	}
1397 }
1398 
1399 /**
1400  * ice_lag_aa_unlink - handle unlink event for Active-Active bond
1401  * @lag: LAG info struct
1402  */
1403 static void ice_lag_aa_unlink(struct ice_lag *lag)
1404 {
1405 	struct ice_lag *pri_lag;
1406 
1407 	if (lag->primary) {
1408 		pri_lag = lag;
1409 		lag->port_bitmap &= ~ICE_LAGP_M;
1410 	} else {
1411 		pri_lag = ice_lag_find_primary(lag);
1412 		if (pri_lag)
1413 			pri_lag->port_bitmap &= ICE_LAGS_M;
1414 	}
1415 
1416 	if (pri_lag) {
1417 		ice_lag_aa_failover(pri_lag, ICE_LAGP_IDX, lag->pf);
1418 		if (lag->primary)
1419 			pri_lag->bond_lport_pri = ICE_LAG_INVALID_PORT;
1420 		else
1421 			pri_lag->bond_lport_sec = ICE_LAG_INVALID_PORT;
1422 	}
1423 }
1424 
1425 /**
1426  * ice_lag_link_unlink - helper function to call lag_link/unlink
1427  * @lag: lag info struct
1428  * @ptr: opaque pointer data
1429  */
1430 static void ice_lag_link_unlink(struct ice_lag *lag, void *ptr)
1431 {
1432 	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
1433 	struct netdev_notifier_changeupper_info *info = ptr;
1434 
1435 	if (netdev != lag->netdev)
1436 		return;
1437 
1438 	if (info->linking) {
1439 		ice_lag_link(lag);
1440 	} else {
1441 		if (lag->bond_aa)
1442 			ice_lag_aa_unlink(lag);
1443 		else
1444 			ice_lag_act_bkup_unlink(lag);
1445 
1446 		lag->bonded = false;
1447 		lag->role = ICE_LAG_NONE;
1448 		lag->upper_netdev = NULL;
1449 		lag->bond_aa = false;
1450 		lag->need_fltr_cfg = false;
1451 	}
1452 }
1453 
1454 /**
1455  * ice_lag_set_swid - set the SWID on secondary interface
1456  * @primary_swid: primary interface's SWID
1457  * @local_lag: local interfaces LAG struct
1458  * @link: Is this a linking activity
1459  *
1460  * If link is false, then primary_swid should be expected to not be valid
1461  * This function should never be called in interrupt context.
1462  */
1463 static void
1464 ice_lag_set_swid(u16 primary_swid, struct ice_lag *local_lag,
1465 		 bool link)
1466 {
1467 	struct ice_aqc_alloc_free_res_elem *buf;
1468 	struct ice_aqc_set_port_params *cmd;
1469 	struct libie_aq_desc desc;
1470 	u16 buf_len, swid;
1471 	int status, i;
1472 
1473 	buf_len = struct_size(buf, elem, 1);
1474 	buf = kzalloc(buf_len, GFP_KERNEL);
1475 	if (!buf) {
1476 		dev_err(ice_pf_to_dev(local_lag->pf), "-ENOMEM error setting SWID\n");
1477 		return;
1478 	}
1479 
1480 	buf->num_elems = cpu_to_le16(1);
1481 	buf->res_type = cpu_to_le16(ICE_AQC_RES_TYPE_SWID);
1482 	/* if unlinnking need to free the shared resource */
1483 	if (!link && local_lag->bond_swid) {
1484 		buf->elem[0].e.sw_resp = cpu_to_le16(local_lag->bond_swid);
1485 		status = ice_aq_alloc_free_res(&local_lag->pf->hw, buf,
1486 					       buf_len, ice_aqc_opc_free_res);
1487 		if (status)
1488 			dev_err(ice_pf_to_dev(local_lag->pf), "Error freeing SWID during LAG unlink\n");
1489 		local_lag->bond_swid = 0;
1490 	}
1491 
1492 	if (link) {
1493 		buf->res_type |=  cpu_to_le16(ICE_LAG_RES_SHARED |
1494 					      ICE_LAG_RES_VALID);
1495 		/* store the primary's SWID in case it leaves bond first */
1496 		local_lag->bond_swid = primary_swid;
1497 		buf->elem[0].e.sw_resp = cpu_to_le16(local_lag->bond_swid);
1498 	} else {
1499 		buf->elem[0].e.sw_resp =
1500 			cpu_to_le16(local_lag->pf->hw.port_info->sw_id);
1501 	}
1502 
1503 	status = ice_aq_alloc_free_res(&local_lag->pf->hw, buf, buf_len,
1504 				       ice_aqc_opc_alloc_res);
1505 	if (status)
1506 		dev_err(ice_pf_to_dev(local_lag->pf), "Error subscribing to SWID 0x%04X\n",
1507 			local_lag->bond_swid);
1508 
1509 	kfree(buf);
1510 
1511 	/* Configure port param SWID to correct value */
1512 	if (link)
1513 		swid = primary_swid;
1514 	else
1515 		swid = local_lag->pf->hw.port_info->sw_id;
1516 
1517 	cmd = libie_aq_raw(&desc);
1518 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_port_params);
1519 
1520 	cmd->swid = cpu_to_le16(ICE_AQC_PORT_SWID_VALID | swid);
1521 	/* If this is happening in reset context, it is possible that the
1522 	 * primary interface has not finished setting its SWID to SHARED
1523 	 * yet.  Allow retries to account for this timing issue between
1524 	 * interfaces.
1525 	 */
1526 	for (i = 0; i < ICE_LAG_RESET_RETRIES; i++) {
1527 		status = ice_aq_send_cmd(&local_lag->pf->hw, &desc, NULL, 0,
1528 					 NULL);
1529 		if (!status)
1530 			break;
1531 
1532 		usleep_range(1000, 2000);
1533 	}
1534 
1535 	if (status)
1536 		dev_err(ice_pf_to_dev(local_lag->pf), "Error setting SWID in port params %d\n",
1537 			status);
1538 }
1539 
1540 /**
1541  * ice_lag_primary_swid - set/clear the SHARED attrib of primary's SWID
1542  * @lag: primary interface's lag struct
1543  * @link: is this a linking activity
1544  *
1545  * Implement setting primary SWID as shared using 0x020B
1546  */
1547 static void ice_lag_primary_swid(struct ice_lag *lag, bool link)
1548 {
1549 	struct ice_hw *hw = &lag->pf->hw;
1550 	u16 swid = hw->port_info->sw_id;
1551 
1552 	if (ice_share_res(hw, ICE_AQC_RES_TYPE_SWID, link, swid))
1553 		dev_warn(ice_pf_to_dev(lag->pf), "Failure to set primary interface shared status\n");
1554 }
1555 
1556 /**
1557  * ice_lag_add_prune_list - Adds event_pf's VSI to primary's prune list
1558  * @lag: lag info struct
1559  * @event_pf: PF struct for VSI we are adding to primary's prune list
1560  */
1561 static void ice_lag_add_prune_list(struct ice_lag *lag, struct ice_pf *event_pf)
1562 {
1563 	u16 rule_buf_sz, vsi_list_id, event_vsi_num, prim_vsi_idx, num_vsi = 1;
1564 	struct ice_sw_rule_vsi_list *s_rule;
1565 	struct device *dev;
1566 
1567 	dev = ice_pf_to_dev(lag->pf);
1568 	event_vsi_num = event_pf->vsi[0]->vsi_num;
1569 	prim_vsi_idx = lag->pf->vsi[0]->idx;
1570 
1571 	if (!ice_find_vsi_list_entry(&lag->pf->hw, ICE_SW_LKUP_VLAN,
1572 				     prim_vsi_idx, &vsi_list_id)) {
1573 		dev_warn(dev, "Could not locate prune list when setting up SRIOV LAG\n");
1574 		return;
1575 	}
1576 
1577 	rule_buf_sz = (u16)ICE_SW_RULE_VSI_LIST_SIZE(s_rule, num_vsi);
1578 	s_rule = kzalloc(rule_buf_sz, GFP_KERNEL);
1579 	if (!s_rule) {
1580 		dev_warn(dev, "Error allocating space for prune list when configuring SRIOV LAG\n");
1581 		return;
1582 	}
1583 
1584 	s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_PRUNE_LIST_SET);
1585 	s_rule->index = cpu_to_le16(vsi_list_id);
1586 	s_rule->number_vsi = cpu_to_le16(num_vsi);
1587 	s_rule->vsi[0] = cpu_to_le16(event_vsi_num);
1588 
1589 	if (ice_aq_sw_rules(&event_pf->hw, s_rule, rule_buf_sz, 1,
1590 			    ice_aqc_opc_update_sw_rules, NULL))
1591 		dev_warn(dev, "Error adding VSI prune list\n");
1592 	kfree(s_rule);
1593 }
1594 
1595 /**
1596  * ice_lag_del_prune_list - Remove secondary's vsi from primary's prune list
1597  * @lag: primary interface's ice_lag struct
1598  * @event_pf: PF struct for unlinking interface
1599  */
1600 static void ice_lag_del_prune_list(struct ice_lag *lag, struct ice_pf *event_pf)
1601 {
1602 	u16 vsi_num, vsi_idx, rule_buf_sz, vsi_list_id, num_vsi = 1;
1603 	struct ice_sw_rule_vsi_list *s_rule;
1604 	struct device *dev;
1605 
1606 	dev = ice_pf_to_dev(lag->pf);
1607 	vsi_num = event_pf->vsi[0]->vsi_num;
1608 	vsi_idx = lag->pf->vsi[0]->idx;
1609 
1610 	if (!ice_find_vsi_list_entry(&lag->pf->hw, ICE_SW_LKUP_VLAN,
1611 				     vsi_idx, &vsi_list_id)) {
1612 		dev_warn(dev, "Could not locate prune list when unwinding SRIOV LAG\n");
1613 		return;
1614 	}
1615 
1616 	rule_buf_sz = (u16)ICE_SW_RULE_VSI_LIST_SIZE(s_rule, num_vsi);
1617 	s_rule = kzalloc(rule_buf_sz, GFP_KERNEL);
1618 	if (!s_rule) {
1619 		dev_warn(dev, "Error allocating prune list when unwinding SRIOV LAG\n");
1620 		return;
1621 	}
1622 
1623 	s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR);
1624 	s_rule->index = cpu_to_le16(vsi_list_id);
1625 	s_rule->number_vsi = cpu_to_le16(num_vsi);
1626 	s_rule->vsi[0] = cpu_to_le16(vsi_num);
1627 
1628 	if (ice_aq_sw_rules(&event_pf->hw, (struct ice_aqc_sw_rules *)s_rule,
1629 			    rule_buf_sz, 1, ice_aqc_opc_update_sw_rules, NULL))
1630 		dev_warn(dev, "Error clearing VSI prune list\n");
1631 
1632 	kfree(s_rule);
1633 }
1634 
1635 /**
1636  * ice_lag_init_feature_support_flag - Check for package and NVM support for LAG
1637  * @pf: PF struct
1638  */
1639 static void ice_lag_init_feature_support_flag(struct ice_pf *pf)
1640 {
1641 	struct ice_hw_common_caps *caps;
1642 
1643 	caps = &pf->hw.dev_caps.common_cap;
1644 	if (caps->roce_lag)
1645 		ice_set_feature_support(pf, ICE_F_ROCE_LAG);
1646 	else
1647 		ice_clear_feature_support(pf, ICE_F_ROCE_LAG);
1648 
1649 	if (caps->sriov_lag && ice_pkg_has_lport_extract(&pf->hw))
1650 		ice_set_feature_support(pf, ICE_F_SRIOV_LAG);
1651 	else
1652 		ice_clear_feature_support(pf, ICE_F_SRIOV_LAG);
1653 
1654 	if (caps->sriov_aa_lag && ice_pkg_has_lport_extract(&pf->hw))
1655 		ice_set_feature_support(pf, ICE_F_SRIOV_AA_LAG);
1656 	else
1657 		ice_clear_feature_support(pf, ICE_F_SRIOV_AA_LAG);
1658 }
1659 
1660 /**
1661  * ice_lag_changeupper_event - handle LAG changeupper event
1662  * @lag: LAG info struct
1663  * @ptr: opaque pointer data
1664  */
1665 static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr)
1666 {
1667 	struct netdev_notifier_changeupper_info *info = ptr;
1668 	struct ice_lag *primary_lag;
1669 	struct net_device *netdev;
1670 
1671 	netdev = netdev_notifier_info_to_dev(ptr);
1672 
1673 	/* not for this netdev */
1674 	if (netdev != lag->netdev)
1675 		return;
1676 
1677 	primary_lag = ice_lag_find_primary(lag);
1678 	if (info->linking) {
1679 		lag->upper_netdev = info->upper_dev;
1680 		/* If there is not already a primary interface in the LAG,
1681 		 * then mark this one as primary.
1682 		 */
1683 		if (!primary_lag) {
1684 			lag->primary = true;
1685 			if (!ice_is_switchdev_running(lag->pf))
1686 				return;
1687 
1688 			/* Configure primary's SWID to be shared */
1689 			ice_lag_primary_swid(lag, true);
1690 			primary_lag = lag;
1691 			lag->bond_lport_pri = lag->pf->hw.port_info->lport;
1692 			lag->bond_lport_sec = ICE_LAG_INVALID_PORT;
1693 			lag->port_bitmap = 0;
1694 		} else {
1695 			u16 swid;
1696 
1697 			if (!ice_is_switchdev_running(primary_lag->pf))
1698 				return;
1699 
1700 			swid = primary_lag->pf->hw.port_info->sw_id;
1701 			ice_lag_set_swid(swid, lag, true);
1702 			ice_lag_add_prune_list(primary_lag, lag->pf);
1703 			primary_lag->bond_lport_sec =
1704 				lag->pf->hw.port_info->lport;
1705 		}
1706 		/* add filter for primary control packets */
1707 		ice_lag_cfg_lp_fltr(lag, true, true);
1708 	} else {
1709 		if (!primary_lag && lag->primary)
1710 			primary_lag = lag;
1711 
1712 		if (primary_lag) {
1713 			for (int i = 0; i < ICE_MAX_SRIOV_VFS; i++) {
1714 				if (primary_lag->sec_vf[i]) {
1715 					ice_vsi_release(primary_lag->sec_vf[i]);
1716 					primary_lag->sec_vf[i] = NULL;
1717 				}
1718 			}
1719 		}
1720 
1721 		if (!lag->primary) {
1722 			ice_lag_set_swid(0, lag, false);
1723 			if (primary_lag)
1724 				primary_lag->bond_lport_sec =
1725 					ICE_LAG_INVALID_PORT;
1726 		} else {
1727 			if (primary_lag && lag->primary) {
1728 				ice_lag_primary_swid(lag, false);
1729 				ice_lag_del_prune_list(primary_lag, lag->pf);
1730 			}
1731 		}
1732 		/* remove filter for control packets */
1733 		ice_lag_cfg_lp_fltr(lag, false, !lag->bond_aa);
1734 	}
1735 }
1736 
1737 /**
1738  * ice_lag_monitor_link - monitor interfaces entering/leaving the aggregate
1739  * @lag: lag info struct
1740  * @ptr: opaque data containing notifier event
1741  *
1742  * This function only operates after a primary has been set.
1743  */
1744 static void ice_lag_monitor_link(struct ice_lag *lag, void *ptr)
1745 {
1746 	struct netdev_notifier_changeupper_info *info = ptr;
1747 	struct ice_hw *prim_hw, *active_hw;
1748 	struct net_device *event_netdev;
1749 	struct ice_pf *pf;
1750 	u8 prim_port;
1751 
1752 	if (!lag->primary)
1753 		return;
1754 
1755 	event_netdev = netdev_notifier_info_to_dev(ptr);
1756 	if (!netif_is_same_ice(lag->pf, event_netdev))
1757 		return;
1758 
1759 	if (info->upper_dev != lag->upper_netdev)
1760 		return;
1761 
1762 	if (info->linking)
1763 		return;
1764 
1765 	pf = lag->pf;
1766 	prim_hw = &pf->hw;
1767 	prim_port = prim_hw->port_info->lport;
1768 
1769 	/* Since there are only two interfaces allowed in SRIOV+LAG, if
1770 	 * one port is leaving, then nodes need to be on primary
1771 	 * interface.
1772 	 */
1773 	if (lag->bond_aa) {
1774 		struct ice_netdev_priv *e_ndp;
1775 		struct ice_pf *e_pf;
1776 
1777 		e_ndp = netdev_priv(event_netdev);
1778 		e_pf = e_ndp->vsi->back;
1779 
1780 		if (lag->bond_lport_pri != ICE_LAG_INVALID_PORT &&
1781 		    lag->port_bitmap & ICE_LAGS_M) {
1782 			lag->port_bitmap &= ~ICE_LAGS_M;
1783 			ice_lag_aa_failover(lag, ICE_LAGP_IDX, e_pf);
1784 			lag->bond_lport_sec = ICE_LAG_INVALID_PORT;
1785 		}
1786 	} else {
1787 		if (prim_port != lag->active_port &&
1788 		    lag->active_port != ICE_LAG_INVALID_PORT) {
1789 			active_hw = ice_lag_find_hw_by_lport(lag,
1790 							     lag->active_port);
1791 			ice_lag_reclaim_vf_nodes(lag, active_hw);
1792 			lag->active_port = ICE_LAG_INVALID_PORT;
1793 		}
1794 	}
1795 }
1796 
1797 /**
1798  * ice_lag_monitor_act_bkup - keep track of which port is active in A/B LAG
1799  * @lag: lag info struct
1800  * @b_info: bonding info
1801  * @event_netdev: net_device got target netdev
1802  *
1803  * This function is for the primary PF to monitor changes in which port is
1804  * active and handle changes for SRIOV VF functionality
1805  */
1806 static void ice_lag_monitor_act_bkup(struct ice_lag *lag,
1807 				     struct netdev_bonding_info *b_info,
1808 				     struct net_device *event_netdev)
1809 {
1810 	struct ice_netdev_priv *event_np;
1811 	struct ice_pf *pf, *event_pf;
1812 	u8 prim_port, event_port;
1813 
1814 	pf = lag->pf;
1815 	if (!pf)
1816 		return;
1817 
1818 	event_np = netdev_priv(event_netdev);
1819 	event_pf = event_np->vsi->back;
1820 	event_port = event_pf->hw.port_info->lport;
1821 	prim_port = pf->hw.port_info->lport;
1822 
1823 	if (!b_info->slave.state) {
1824 		/* if no port is currently active, then nodes and filters exist
1825 		 * on primary port, check if we need to move them
1826 		 */
1827 		if (lag->active_port == ICE_LAG_INVALID_PORT) {
1828 			if (event_port != prim_port)
1829 				ice_lag_move_vf_nodes(lag, prim_port,
1830 						      event_port);
1831 			lag->active_port = event_port;
1832 			ice_lag_config_eswitch(lag, event_netdev);
1833 			return;
1834 		}
1835 
1836 		/* active port is already set and is current event port */
1837 		if (lag->active_port == event_port)
1838 			return;
1839 		/* new active port */
1840 		ice_lag_move_vf_nodes(lag, lag->active_port, event_port);
1841 		lag->active_port = event_port;
1842 		ice_lag_config_eswitch(lag, event_netdev);
1843 	} else {
1844 		/* port not set as currently active (e.g. new active port
1845 		 * has already claimed the nodes and filters
1846 		 */
1847 		if (lag->active_port != event_port)
1848 			return;
1849 		/* This is the case when neither port is active (both link down)
1850 		 * Link down on the bond - set active port to invalid and move
1851 		 * nodes and filters back to primary if not already there
1852 		 */
1853 		if (event_port != prim_port)
1854 			ice_lag_move_vf_nodes(lag, event_port, prim_port);
1855 		lag->active_port = ICE_LAG_INVALID_PORT;
1856 	}
1857 }
1858 
1859 /**
1860  * ice_lag_aa_clear_spoof - adjust the placeholder VSI spoofing for A/A LAG
1861  * @vsi: placeholder VSI to adjust
1862  */
1863 static void ice_lag_aa_clear_spoof(struct ice_vsi *vsi)
1864 {
1865 	ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof);
1866 }
1867 
1868 /**
1869  * ice_lag_monitor_act_act - Keep track of active ports in A/A LAG
1870  * @lag: lag struct for primary interface
1871  * @b_info: bonding_info for event
1872  * @event_netdev: net_device for target netdev
1873  */
1874 static void ice_lag_monitor_act_act(struct ice_lag *lag,
1875 				    struct netdev_bonding_info *b_info,
1876 				    struct net_device *event_netdev)
1877 {
1878 	struct ice_netdev_priv *event_np;
1879 	u8 prim_port, event_port;
1880 	struct ice_pf *event_pf;
1881 
1882 	event_np = netdev_priv(event_netdev);
1883 	event_pf = event_np->vsi->back;
1884 	event_port = event_pf->hw.port_info->lport;
1885 	prim_port = lag->pf->hw.port_info->lport;
1886 
1887 	if (b_info->slave.link == BOND_LINK_UP) {
1888 		/* Port is coming up */
1889 		if (prim_port == event_port) {
1890 			/* Processing event for primary interface */
1891 			if (lag->bond_lport_pri == ICE_LAG_INVALID_PORT)
1892 				return;
1893 
1894 			if (!(lag->port_bitmap & ICE_LAGP_M)) {
1895 				/* Primary port was not marked up before, move
1896 				 * some|all VF queues to it and mark as up
1897 				 */
1898 				lag->port_bitmap |= ICE_LAGP_M;
1899 				ice_lag_aa_failover(lag, ICE_LAGP_IDX, event_pf);
1900 			}
1901 		} else {
1902 			if (lag->bond_lport_sec == ICE_LAG_INVALID_PORT)
1903 				return;
1904 
1905 			/* Create placeholder VSIs on secondary PF.
1906 			 * The placeholder is necessary so that we have
1907 			 * an element that represents the VF on the secondary
1908 			 * interface's scheduling tree.  This will be a tree
1909 			 * root for scheduling nodes when they are moved to
1910 			 * the secondary interface.
1911 			 */
1912 			if (!lag->sec_vf[0]) {
1913 				struct ice_vsi_cfg_params params = {};
1914 				struct ice_vsi *nvsi;
1915 				struct ice_vf *vf;
1916 				unsigned int bkt;
1917 
1918 				params.type = ICE_VSI_VF;
1919 				params.port_info = event_pf->hw.port_info;
1920 				params.flags = ICE_VSI_FLAG_INIT;
1921 
1922 				ice_for_each_vf(lag->pf, bkt, vf) {
1923 					params.vf = vf;
1924 					nvsi = ice_vsi_setup(event_pf,
1925 							     &params);
1926 					ice_lag_aa_clear_spoof(nvsi);
1927 					lag->sec_vf[vf->vf_id] = nvsi;
1928 				}
1929 			}
1930 
1931 			if (!(lag->port_bitmap & ICE_LAGS_M)) {
1932 				/* Secondary port was not marked up before,
1933 				 * move some|all VF queues to it and mark as up
1934 				 */
1935 				lag->port_bitmap |= ICE_LAGS_M;
1936 				ice_lag_aa_failover(lag, ICE_LAGS_IDX, event_pf);
1937 			}
1938 		}
1939 	} else {
1940 		/* Port is going down */
1941 		if (prim_port == event_port) {
1942 			lag->port_bitmap &= ~ICE_LAGP_M;
1943 			ice_lag_aa_failover(lag, ICE_LAGS_IDX, event_pf);
1944 		} else {
1945 			lag->port_bitmap &= ~ICE_LAGS_M;
1946 			ice_lag_aa_failover(lag, ICE_LAGP_IDX, event_pf);
1947 		}
1948 	}
1949 }
1950 
1951 /**
1952  * ice_lag_monitor_info - Calls relevant A/A or A/B monitoring function
1953  * @lag: lag info struct
1954  * @ptr: opaque data containing notifier event
1955  *
1956  * This function is for the primary PF to monitor changes in which port is
1957  * active and handle changes for SRIOV VF functionality
1958  */
1959 static void ice_lag_monitor_info(struct ice_lag *lag, void *ptr)
1960 {
1961 	struct netdev_notifier_bonding_info *info = ptr;
1962 	struct net_device *event_netdev, *event_upper;
1963 	struct netdev_bonding_info *bonding_info;
1964 
1965 	if (!lag->primary)
1966 		return;
1967 
1968 	event_netdev = netdev_notifier_info_to_dev(ptr);
1969 	bonding_info = &info->bonding_info;
1970 	rcu_read_lock();
1971 	event_upper = netdev_master_upper_dev_get_rcu(event_netdev);
1972 	rcu_read_unlock();
1973 	if (!netif_is_ice(event_netdev) || event_upper != lag->upper_netdev)
1974 		return;
1975 
1976 	if (lag->bond_aa)
1977 		ice_lag_monitor_act_act(lag, bonding_info, event_netdev);
1978 	else
1979 		ice_lag_monitor_act_bkup(lag, bonding_info, event_netdev);
1980 }
1981 /**
1982  * ice_lag_chk_comp - evaluate bonded interface for feature support
1983  * @lag: lag info struct
1984  * @ptr: opaque data for netdev event info
1985  */
1986 static bool
1987 ice_lag_chk_comp(struct ice_lag *lag, void *ptr)
1988 {
1989 	struct netdev_notifier_bonding_info *info = ptr;
1990 	struct net_device *event_netdev, *event_upper;
1991 	struct netdev_bonding_info *bonding_info;
1992 	struct list_head *tmp;
1993 	struct device *dev;
1994 	int count = 0;
1995 
1996 	/* All members need to know if bond A/A or A/B */
1997 	bonding_info = &info->bonding_info;
1998 	lag->bond_mode = bonding_info->master.bond_mode;
1999 	if (lag->bond_mode != BOND_MODE_ACTIVEBACKUP)
2000 		lag->bond_aa = true;
2001 	else
2002 		lag->bond_aa = false;
2003 
2004 	if (!lag->primary)
2005 		return true;
2006 
2007 	event_netdev = netdev_notifier_info_to_dev(ptr);
2008 	rcu_read_lock();
2009 	event_upper = netdev_master_upper_dev_get_rcu(event_netdev);
2010 	rcu_read_unlock();
2011 	if (event_upper != lag->upper_netdev)
2012 		return true;
2013 
2014 	dev = ice_pf_to_dev(lag->pf);
2015 
2016 	/* only supporting switchdev mode for SRIOV VF LAG.
2017 	 * primary interface has to be in switchdev mode
2018 	 */
2019 	if (!ice_is_switchdev_running(lag->pf)) {
2020 		dev_info(dev, "Primary interface not in switchdev mode - VF LAG disabled\n");
2021 		return false;
2022 	}
2023 
2024 	if (lag->bond_aa && !ice_is_feature_supported(lag->pf,
2025 						      ICE_F_SRIOV_AA_LAG))
2026 		return false;
2027 
2028 	list_for_each(tmp, lag->netdev_head) {
2029 		struct ice_dcbx_cfg *dcb_cfg, *peer_dcb_cfg;
2030 		struct ice_lag_netdev_list *entry;
2031 		struct ice_netdev_priv *peer_np;
2032 		struct net_device *peer_netdev;
2033 		struct ice_vsi *vsi, *peer_vsi;
2034 		struct ice_pf *peer_pf;
2035 
2036 		entry = list_entry(tmp, struct ice_lag_netdev_list, node);
2037 		peer_netdev = entry->netdev;
2038 		if (!netif_is_ice(peer_netdev)) {
2039 			dev_info(dev, "Found %s non-ice netdev in LAG - VF LAG disabled\n",
2040 				 netdev_name(peer_netdev));
2041 			return false;
2042 		}
2043 
2044 		count++;
2045 		if (count > 2) {
2046 			dev_info(dev, "Found more than two netdevs in LAG - VF LAG disabled\n");
2047 			return false;
2048 		}
2049 
2050 		peer_np = netdev_priv(peer_netdev);
2051 		vsi = ice_get_main_vsi(lag->pf);
2052 		peer_vsi = peer_np->vsi;
2053 		if (lag->pf->pdev->bus != peer_vsi->back->pdev->bus ||
2054 		    lag->pf->pdev->slot != peer_vsi->back->pdev->slot) {
2055 			dev_info(dev, "Found %s on different device in LAG - VF LAG disabled\n",
2056 				 netdev_name(peer_netdev));
2057 			return false;
2058 		}
2059 
2060 		dcb_cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg;
2061 		peer_dcb_cfg = &peer_vsi->port_info->qos_cfg.local_dcbx_cfg;
2062 		if (memcmp(dcb_cfg, peer_dcb_cfg,
2063 			   sizeof(struct ice_dcbx_cfg))) {
2064 			dev_info(dev, "Found %s with different DCB in LAG - VF LAG disabled\n",
2065 				 netdev_name(peer_netdev));
2066 			return false;
2067 		}
2068 
2069 		peer_pf = peer_vsi->back;
2070 		if (test_bit(ICE_FLAG_FW_LLDP_AGENT, peer_pf->flags)) {
2071 			dev_warn(dev, "Found %s with FW LLDP agent active - VF LAG disabled\n",
2072 				 netdev_name(peer_netdev));
2073 			return false;
2074 		}
2075 	}
2076 
2077 	return true;
2078 }
2079 
2080 /**
2081  * ice_lag_unregister - handle netdev unregister events
2082  * @lag: LAG info struct
2083  * @event_netdev: netdev struct for target of notifier event
2084  */
2085 static void
2086 ice_lag_unregister(struct ice_lag *lag, struct net_device *event_netdev)
2087 {
2088 	struct ice_netdev_priv *np;
2089 	struct ice_pf *event_pf;
2090 	struct ice_lag *p_lag;
2091 
2092 	p_lag = ice_lag_find_primary(lag);
2093 	np = netdev_priv(event_netdev);
2094 	event_pf = np->vsi->back;
2095 
2096 	if (p_lag) {
2097 		if (p_lag->active_port != p_lag->pf->hw.port_info->lport &&
2098 		    p_lag->active_port != ICE_LAG_INVALID_PORT) {
2099 			struct ice_hw *active_hw;
2100 
2101 			active_hw = ice_lag_find_hw_by_lport(lag,
2102 							     p_lag->active_port);
2103 			if (active_hw)
2104 				ice_lag_reclaim_vf_nodes(p_lag, active_hw);
2105 			lag->active_port = ICE_LAG_INVALID_PORT;
2106 		}
2107 	}
2108 
2109 	/* primary processing for primary */
2110 	if (lag->primary && lag->netdev == event_netdev)
2111 		ice_lag_primary_swid(lag, false);
2112 
2113 	/* primary processing for secondary */
2114 	if (lag->primary && lag->netdev != event_netdev)
2115 		ice_lag_del_prune_list(lag, event_pf);
2116 
2117 	/* secondary processing for secondary */
2118 	if (!lag->primary && lag->netdev == event_netdev)
2119 		ice_lag_set_swid(0, lag, false);
2120 }
2121 
2122 /**
2123  * ice_lag_monitor_rdma - set and clear rdma functionality
2124  * @lag: pointer to lag struct
2125  * @ptr: opaque data for netdev event info
2126  */
2127 static void
2128 ice_lag_monitor_rdma(struct ice_lag *lag, void *ptr)
2129 {
2130 	struct netdev_notifier_changeupper_info *info = ptr;
2131 	struct net_device *netdev;
2132 
2133 	netdev = netdev_notifier_info_to_dev(ptr);
2134 
2135 	if (netdev != lag->netdev)
2136 		return;
2137 
2138 	if (info->linking)
2139 		ice_clear_rdma_cap(lag->pf);
2140 	else
2141 		ice_set_rdma_cap(lag->pf);
2142 }
2143 
2144 /**
2145  * ice_lag_chk_disabled_bond - monitor interfaces entering/leaving disabled bond
2146  * @lag: lag info struct
2147  * @ptr: opaque data containing event
2148  *
2149  * as interfaces enter a bond - determine if the bond is currently
2150  * SRIOV LAG compliant and flag if not.  As interfaces leave the
2151  * bond, reset their compliant status.
2152  */
2153 static void ice_lag_chk_disabled_bond(struct ice_lag *lag, void *ptr)
2154 {
2155 	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
2156 	struct netdev_notifier_changeupper_info *info = ptr;
2157 	struct ice_lag *prim_lag;
2158 
2159 	if (netdev != lag->netdev)
2160 		return;
2161 
2162 	if (info->linking) {
2163 		prim_lag = ice_lag_find_primary(lag);
2164 		if (prim_lag &&
2165 		    !ice_is_feature_supported(prim_lag->pf, ICE_F_SRIOV_LAG)) {
2166 			ice_clear_feature_support(lag->pf, ICE_F_SRIOV_LAG);
2167 			netdev_info(netdev, "Interface added to non-compliant SRIOV LAG aggregate\n");
2168 		}
2169 	} else {
2170 		ice_lag_init_feature_support_flag(lag->pf);
2171 	}
2172 }
2173 
2174 /**
2175  * ice_lag_disable_sriov_bond - set members of bond as not supporting SRIOV LAG
2176  * @lag: primary interfaces lag struct
2177  */
2178 static void ice_lag_disable_sriov_bond(struct ice_lag *lag)
2179 {
2180 	struct ice_netdev_priv *np = netdev_priv(lag->netdev);
2181 	struct ice_pf *pf = np->vsi->back;
2182 
2183 	ice_clear_feature_support(pf, ICE_F_SRIOV_LAG);
2184 	ice_clear_feature_support(pf, ICE_F_SRIOV_AA_LAG);
2185 }
2186 
2187 /**
2188  * ice_lag_preset_drop_fltr - preset drop filter for A/B bonds
2189  * @lag: local lag struct
2190  * @ptr: opaque data containing event
2191  *
2192  * Sets the initial drop filter for secondary interface in an
2193  * active-backup bond
2194  */
2195 static void ice_lag_preset_drop_fltr(struct ice_lag *lag, void *ptr)
2196 {
2197 	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
2198 
2199 	if (netdev != lag->netdev || lag->primary || !lag->need_fltr_cfg)
2200 		return;
2201 
2202 	ice_lag_cfg_drop_fltr(lag, true);
2203 	lag->need_fltr_cfg = false;
2204 }
2205 
2206 /**
2207  * ice_lag_process_event - process a task assigned to the lag_wq
2208  * @work: pointer to work_struct
2209  */
2210 static void ice_lag_process_event(struct work_struct *work)
2211 {
2212 	struct netdev_notifier_changeupper_info *info;
2213 	struct ice_lag_work *lag_work;
2214 	struct net_device *netdev;
2215 	struct list_head *tmp, *n;
2216 	struct ice_pf *pf;
2217 
2218 	lag_work = container_of(work, struct ice_lag_work, lag_task);
2219 	pf = lag_work->lag->pf;
2220 
2221 	mutex_lock(&pf->lag_mutex);
2222 	lag_work->lag->netdev_head = &lag_work->netdev_list.node;
2223 
2224 	switch (lag_work->event) {
2225 	case NETDEV_CHANGEUPPER:
2226 		info = &lag_work->info.changeupper_info;
2227 		ice_lag_chk_disabled_bond(lag_work->lag, info);
2228 		if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) {
2229 			ice_lag_monitor_link(lag_work->lag, info);
2230 			ice_lag_changeupper_event(lag_work->lag, info);
2231 			ice_lag_link_unlink(lag_work->lag, info);
2232 		}
2233 		ice_lag_monitor_rdma(lag_work->lag, info);
2234 		break;
2235 	case NETDEV_BONDING_INFO:
2236 		if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) {
2237 			if (!ice_lag_chk_comp(lag_work->lag,
2238 					      &lag_work->info.bonding_info)) {
2239 				netdev = lag_work->info.bonding_info.info.dev;
2240 				ice_lag_disable_sriov_bond(lag_work->lag);
2241 				ice_lag_unregister(lag_work->lag, netdev);
2242 				goto lag_cleanup;
2243 			}
2244 			ice_lag_cfg_pf_fltrs(lag_work->lag,
2245 					     &lag_work->info.bonding_info);
2246 			ice_lag_preset_drop_fltr(lag_work->lag,
2247 						 &lag_work->info.bonding_info);
2248 			ice_lag_monitor_info(lag_work->lag,
2249 					     &lag_work->info.bonding_info);
2250 		}
2251 		ice_lag_info_event(lag_work->lag, &lag_work->info.bonding_info);
2252 		break;
2253 	case NETDEV_UNREGISTER:
2254 		if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) {
2255 			netdev = lag_work->info.bonding_info.info.dev;
2256 			if ((netdev == lag_work->lag->netdev ||
2257 			     lag_work->lag->primary) && lag_work->lag->bonded)
2258 				ice_lag_unregister(lag_work->lag, netdev);
2259 		}
2260 		break;
2261 	default:
2262 		break;
2263 	}
2264 
2265 lag_cleanup:
2266 	/* cleanup resources allocated for this work item */
2267 	list_for_each_safe(tmp, n, &lag_work->netdev_list.node) {
2268 		struct ice_lag_netdev_list *entry;
2269 
2270 		entry = list_entry(tmp, struct ice_lag_netdev_list, node);
2271 		list_del(&entry->node);
2272 		kfree(entry);
2273 	}
2274 	lag_work->lag->netdev_head = NULL;
2275 
2276 	mutex_unlock(&pf->lag_mutex);
2277 
2278 	kfree(lag_work);
2279 }
2280 
2281 /**
2282  * ice_lag_event_handler - handle LAG events from netdev
2283  * @notif_blk: notifier block registered by this netdev
2284  * @event: event type
2285  * @ptr: opaque data containing notifier event
2286  */
2287 static int
2288 ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event,
2289 		      void *ptr)
2290 {
2291 	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
2292 	struct net_device *upper_netdev;
2293 	struct ice_lag_work *lag_work;
2294 	struct ice_lag *lag;
2295 
2296 	if (!netif_is_ice(netdev))
2297 		return NOTIFY_DONE;
2298 
2299 	if (event != NETDEV_CHANGEUPPER && event != NETDEV_BONDING_INFO &&
2300 	    event != NETDEV_UNREGISTER)
2301 		return NOTIFY_DONE;
2302 
2303 	if (!(netdev->priv_flags & IFF_BONDING))
2304 		return NOTIFY_DONE;
2305 
2306 	lag = container_of(notif_blk, struct ice_lag, notif_block);
2307 	if (!lag->netdev)
2308 		return NOTIFY_DONE;
2309 
2310 	if (!net_eq(dev_net(netdev), &init_net))
2311 		return NOTIFY_DONE;
2312 
2313 	/* This memory will be freed at the end of ice_lag_process_event */
2314 	lag_work = kzalloc(sizeof(*lag_work), GFP_KERNEL);
2315 	if (!lag_work)
2316 		return -ENOMEM;
2317 
2318 	lag_work->event_netdev = netdev;
2319 	lag_work->lag = lag;
2320 	lag_work->event = event;
2321 	if (event == NETDEV_CHANGEUPPER) {
2322 		struct netdev_notifier_changeupper_info *info = ptr;
2323 
2324 		upper_netdev = info->upper_dev;
2325 	} else {
2326 		upper_netdev = netdev_master_upper_dev_get(netdev);
2327 	}
2328 
2329 	INIT_LIST_HEAD(&lag_work->netdev_list.node);
2330 	if (upper_netdev) {
2331 		struct ice_lag_netdev_list *nd_list;
2332 		struct net_device *tmp_nd;
2333 
2334 		rcu_read_lock();
2335 		for_each_netdev_in_bond_rcu(upper_netdev, tmp_nd) {
2336 			nd_list = kzalloc(sizeof(*nd_list), GFP_ATOMIC);
2337 			if (!nd_list)
2338 				break;
2339 
2340 			nd_list->netdev = tmp_nd;
2341 			list_add(&nd_list->node, &lag_work->netdev_list.node);
2342 		}
2343 		rcu_read_unlock();
2344 	}
2345 
2346 	switch (event) {
2347 	case NETDEV_CHANGEUPPER:
2348 		lag_work->info.changeupper_info =
2349 			*((struct netdev_notifier_changeupper_info *)ptr);
2350 		break;
2351 	case NETDEV_BONDING_INFO:
2352 		lag_work->info.bonding_info =
2353 			*((struct netdev_notifier_bonding_info *)ptr);
2354 		break;
2355 	default:
2356 		lag_work->info.notifier_info =
2357 			*((struct netdev_notifier_info *)ptr);
2358 		break;
2359 	}
2360 
2361 	INIT_WORK(&lag_work->lag_task, ice_lag_process_event);
2362 	queue_work(ice_lag_wq, &lag_work->lag_task);
2363 
2364 	return NOTIFY_DONE;
2365 }
2366 
2367 /**
2368  * ice_register_lag_handler - register LAG handler on netdev
2369  * @lag: LAG struct
2370  */
2371 static int ice_register_lag_handler(struct ice_lag *lag)
2372 {
2373 	struct notifier_block *notif_blk = &lag->notif_block;
2374 	struct device *dev = ice_pf_to_dev(lag->pf);
2375 
2376 	if (!notif_blk->notifier_call) {
2377 		notif_blk->notifier_call = ice_lag_event_handler;
2378 		if (register_netdevice_notifier(notif_blk)) {
2379 			notif_blk->notifier_call = NULL;
2380 			dev_err(dev, "FAIL register LAG event handler!\n");
2381 			return -EINVAL;
2382 		}
2383 		dev_dbg(dev, "LAG event handler registered\n");
2384 	}
2385 	return 0;
2386 }
2387 
2388 /**
2389  * ice_unregister_lag_handler - unregister LAG handler on netdev
2390  * @lag: LAG struct
2391  */
2392 static void ice_unregister_lag_handler(struct ice_lag *lag)
2393 {
2394 	struct notifier_block *notif_blk = &lag->notif_block;
2395 	struct device *dev = ice_pf_to_dev(lag->pf);
2396 
2397 	if (notif_blk->notifier_call) {
2398 		unregister_netdevice_notifier(notif_blk);
2399 		dev_dbg(dev, "LAG event handler unregistered\n");
2400 	}
2401 }
2402 
2403 /**
2404  * ice_create_lag_recipe
2405  * @hw: pointer to HW struct
2406  * @rid: pointer to u16 to pass back recipe index
2407  * @base_recipe: recipe to base the new recipe on
2408  * @prio: priority for new recipe
2409  *
2410  * function returns 0 on error
2411  */
2412 static int ice_create_lag_recipe(struct ice_hw *hw, u16 *rid,
2413 				 const u8 *base_recipe, u8 prio)
2414 {
2415 	struct ice_aqc_recipe_data_elem *new_rcp;
2416 	int err;
2417 
2418 	err = ice_alloc_recipe(hw, rid);
2419 	if (err)
2420 		return err;
2421 
2422 	new_rcp = kzalloc(ICE_RECIPE_LEN * ICE_MAX_NUM_RECIPES, GFP_KERNEL);
2423 	if (!new_rcp)
2424 		return -ENOMEM;
2425 
2426 	memcpy(new_rcp, base_recipe, ICE_RECIPE_LEN);
2427 	new_rcp->content.act_ctrl_fwd_priority = prio;
2428 	new_rcp->content.rid = *rid | ICE_AQ_RECIPE_ID_IS_ROOT;
2429 	new_rcp->recipe_indx = *rid;
2430 	bitmap_zero((unsigned long *)new_rcp->recipe_bitmap,
2431 		    ICE_MAX_NUM_RECIPES);
2432 	set_bit(*rid, (unsigned long *)new_rcp->recipe_bitmap);
2433 
2434 	err = ice_aq_add_recipe(hw, new_rcp, 1, NULL);
2435 	if (err)
2436 		*rid = 0;
2437 
2438 	kfree(new_rcp);
2439 	return err;
2440 }
2441 
2442 /**
2443  * ice_lag_move_vf_nodes_tc_sync - move a VF's nodes for a tc during reset
2444  * @lag: primary interfaces lag struct
2445  * @dest_hw: HW struct for destination's interface
2446  * @vsi_num: VSI index in PF space
2447  * @tc: traffic class to move
2448  */
2449 static void
2450 ice_lag_move_vf_nodes_tc_sync(struct ice_lag *lag, struct ice_hw *dest_hw,
2451 			      u16 vsi_num, u8 tc)
2452 {
2453 	DEFINE_RAW_FLEX(struct ice_aqc_move_elem, buf, teid, 1);
2454 	struct device *dev = ice_pf_to_dev(lag->pf);
2455 	u16 numq, valq, num_moved, qbuf_size;
2456 	u16 buf_size = __struct_size(buf);
2457 	struct ice_aqc_cfg_txqs_buf *qbuf;
2458 	struct ice_hw *hw = &lag->pf->hw;
2459 	struct ice_sched_node *n_prt;
2460 	__le32 teid, parent_teid;
2461 	struct ice_vsi_ctx *ctx;
2462 	u32 tmp_teid;
2463 
2464 	ctx = ice_get_vsi_ctx(hw, vsi_num);
2465 	if (!ctx) {
2466 		dev_warn(dev, "LAG rebuild failed after reset due to VSI Context failure\n");
2467 		return;
2468 	}
2469 
2470 	if (!ctx->sched.vsi_node[tc])
2471 		return;
2472 
2473 	numq = ctx->num_lan_q_entries[tc];
2474 	teid = ctx->sched.vsi_node[tc]->info.node_teid;
2475 	tmp_teid = le32_to_cpu(teid);
2476 	parent_teid = ctx->sched.vsi_node[tc]->info.parent_teid;
2477 
2478 	if (!tmp_teid || !numq)
2479 		return;
2480 
2481 	if (ice_sched_suspend_resume_elems(hw, 1, &tmp_teid, true))
2482 		dev_dbg(dev, "Problem suspending traffic during reset rebuild\n");
2483 
2484 	/* reconfig queues for new port */
2485 	qbuf_size = struct_size(qbuf, queue_info, numq);
2486 	qbuf = kzalloc(qbuf_size, GFP_KERNEL);
2487 	if (!qbuf) {
2488 		dev_warn(dev, "Failure allocating VF queue recfg buffer for reset rebuild\n");
2489 		goto resume_sync;
2490 	}
2491 
2492 	/* add the per queue info for the reconfigure command buffer */
2493 	valq = ice_lag_qbuf_recfg(hw, qbuf, vsi_num, numq, tc);
2494 	if (!valq) {
2495 		dev_warn(dev, "Failure to reconfig queues for LAG reset rebuild\n");
2496 		goto sync_none;
2497 	}
2498 
2499 	if (ice_aq_cfg_lan_txq(hw, qbuf, qbuf_size, numq, hw->port_info->lport,
2500 			       dest_hw->port_info->lport,
2501 			       ICE_AQC_Q_CFG_TC_CHNG, NULL)) {
2502 		dev_warn(dev, "Failure to configure queues for LAG reset rebuild\n");
2503 		goto sync_qerr;
2504 	}
2505 
2506 sync_none:
2507 	kfree(qbuf);
2508 
2509 	/* find parent in destination tree */
2510 	n_prt = ice_lag_get_sched_parent(dest_hw, tc);
2511 	if (!n_prt)
2512 		goto resume_sync;
2513 
2514 	/* Move node to new parent */
2515 	buf->hdr.src_parent_teid = parent_teid;
2516 	buf->hdr.dest_parent_teid = n_prt->info.node_teid;
2517 	buf->hdr.num_elems = cpu_to_le16(1);
2518 	buf->hdr.mode = ICE_AQC_MOVE_ELEM_MODE_KEEP_OWN;
2519 	buf->teid[0] = teid;
2520 
2521 	if (ice_aq_move_sched_elems(&lag->pf->hw, buf, buf_size, &num_moved))
2522 		dev_warn(dev, "Failure to move VF nodes for LAG reset rebuild\n");
2523 	else
2524 		ice_sched_update_parent(n_prt, ctx->sched.vsi_node[tc]);
2525 
2526 	goto resume_sync;
2527 
2528 sync_qerr:
2529 	kfree(qbuf);
2530 
2531 resume_sync:
2532 	if (ice_sched_suspend_resume_elems(hw, 1, &tmp_teid, false))
2533 		dev_warn(dev, "Problem restarting traffic for LAG node reset rebuild\n");
2534 }
2535 
2536 /**
2537  * ice_lag_move_vf_nodes_sync - move vf nodes to active interface
2538  * @lag: primary interfaces lag struct
2539  * @dest_hw: lport value for currently active port
2540  *
2541  * This function is used in a reset context, outside of event handling,
2542  * to move the VF nodes to the secondary interface when that interface
2543  * is the active interface during a reset rebuild
2544  */
2545 static void
2546 ice_lag_move_vf_nodes_sync(struct ice_lag *lag, struct ice_hw *dest_hw)
2547 {
2548 	struct ice_pf *pf;
2549 	int i, tc;
2550 
2551 	if (!lag->primary || !dest_hw)
2552 		return;
2553 
2554 	pf = lag->pf;
2555 	ice_for_each_vsi(pf, i)
2556 		if (pf->vsi[i] && pf->vsi[i]->type == ICE_VSI_VF)
2557 			ice_for_each_traffic_class(tc)
2558 				ice_lag_move_vf_nodes_tc_sync(lag, dest_hw, i,
2559 							      tc);
2560 }
2561 
2562 /**
2563  * ice_init_lag - initialize support for LAG
2564  * @pf: PF struct
2565  *
2566  * Alloc memory for LAG structs and initialize the elements.
2567  * Memory will be freed in ice_deinit_lag
2568  */
2569 int ice_init_lag(struct ice_pf *pf)
2570 {
2571 	struct device *dev = ice_pf_to_dev(pf);
2572 	struct ice_lag *lag;
2573 	struct ice_vsi *vsi;
2574 	u64 recipe_bits = 0;
2575 	int n, err;
2576 
2577 	ice_lag_init_feature_support_flag(pf);
2578 	if (!ice_is_feature_supported(pf, ICE_F_SRIOV_LAG))
2579 		return 0;
2580 
2581 	pf->lag = kzalloc(sizeof(*lag), GFP_KERNEL);
2582 	if (!pf->lag)
2583 		return -ENOMEM;
2584 	lag = pf->lag;
2585 
2586 	vsi = ice_get_main_vsi(pf);
2587 	if (!vsi) {
2588 		dev_err(dev, "couldn't get main vsi, link aggregation init fail\n");
2589 		err = -EIO;
2590 		goto lag_error;
2591 	}
2592 
2593 	lag->pf = pf;
2594 	lag->netdev = vsi->netdev;
2595 	lag->role = ICE_LAG_NONE;
2596 	lag->active_port = ICE_LAG_INVALID_PORT;
2597 	lag->port_bitmap = 0x0;
2598 	lag->bonded = false;
2599 	lag->bond_aa = false;
2600 	lag->need_fltr_cfg = false;
2601 	lag->upper_netdev = NULL;
2602 	lag->notif_block.notifier_call = NULL;
2603 	memset(lag->sec_vf, 0, sizeof(lag->sec_vf));
2604 
2605 	err = ice_register_lag_handler(lag);
2606 	if (err) {
2607 		dev_warn(dev, "INIT LAG: Failed to register event handler\n");
2608 		goto lag_error;
2609 	}
2610 
2611 	err = ice_create_lag_recipe(&pf->hw, &lag->pf_recipe,
2612 				    ice_dflt_vsi_rcp, 1);
2613 	if (err)
2614 		goto lag_error;
2615 
2616 	err = ice_create_lag_recipe(&pf->hw, &lag->lport_recipe,
2617 				    ice_lport_rcp, 3);
2618 	if (err)
2619 		goto free_rcp_res;
2620 
2621 	err = ice_create_lag_recipe(&pf->hw, &lag->act_act_recipe,
2622 				    ice_lport_rcp, 1);
2623 	if (err)
2624 		goto  free_lport_res;
2625 
2626 	/* associate recipes to profiles */
2627 	for (n = 0; n < ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER; n++) {
2628 		err = ice_aq_get_recipe_to_profile(&pf->hw, n,
2629 						   &recipe_bits, NULL);
2630 		if (err)
2631 			continue;
2632 
2633 		if (recipe_bits & BIT(ICE_SW_LKUP_DFLT)) {
2634 			recipe_bits |= BIT(lag->pf_recipe) |
2635 				       BIT(lag->lport_recipe) |
2636 				       BIT(lag->act_act_recipe);
2637 			ice_aq_map_recipe_to_profile(&pf->hw, n,
2638 						     recipe_bits, NULL);
2639 		}
2640 	}
2641 
2642 	ice_display_lag_info(lag);
2643 
2644 	dev_dbg(dev, "INIT LAG complete\n");
2645 	return 0;
2646 
2647 free_lport_res:
2648 	ice_free_hw_res(&pf->hw, ICE_AQC_RES_TYPE_RECIPE, 1,
2649 			&lag->lport_recipe);
2650 
2651 free_rcp_res:
2652 	ice_free_hw_res(&pf->hw, ICE_AQC_RES_TYPE_RECIPE, 1,
2653 			&lag->pf_recipe);
2654 lag_error:
2655 	kfree(lag);
2656 	pf->lag = NULL;
2657 	return err;
2658 }
2659 
2660 /**
2661  * ice_deinit_lag - Clean up LAG
2662  * @pf: PF struct
2663  *
2664  * Clean up kernel LAG info and free memory
2665  * This function is meant to only be called on driver remove/shutdown
2666  */
2667 void ice_deinit_lag(struct ice_pf *pf)
2668 {
2669 	struct ice_lag *lag = pf->lag;
2670 
2671 	if (!lag)
2672 		return;
2673 
2674 	if (lag->pf)
2675 		ice_unregister_lag_handler(lag);
2676 
2677 	flush_workqueue(ice_lag_wq);
2678 
2679 	ice_free_hw_res(&pf->hw, ICE_AQC_RES_TYPE_RECIPE, 1,
2680 			&pf->lag->pf_recipe);
2681 	ice_free_hw_res(&pf->hw, ICE_AQC_RES_TYPE_RECIPE, 1,
2682 			&pf->lag->lport_recipe);
2683 
2684 	kfree(lag);
2685 
2686 	pf->lag = NULL;
2687 }
2688 
2689 /**
2690  * ice_lag_rebuild - rebuild lag resources after reset
2691  * @pf: pointer to local pf struct
2692  *
2693  * PF resets are promoted to CORER resets when interface in an aggregate.  This
2694  * means that we need to rebuild the PF resources for the interface.  Since
2695  * this will happen outside the normal event processing, need to acquire the lag
2696  * lock.
2697  *
2698  * This function will also evaluate the VF resources if this is the primary
2699  * interface.
2700  */
2701 void ice_lag_rebuild(struct ice_pf *pf)
2702 {
2703 	struct ice_lag_netdev_list ndlist;
2704 	struct ice_lag *lag, *prim_lag;
2705 	u8 act_port, loc_port;
2706 
2707 	if (!pf->lag || !pf->lag->bonded)
2708 		return;
2709 
2710 	mutex_lock(&pf->lag_mutex);
2711 
2712 	lag = pf->lag;
2713 	if (lag->primary) {
2714 		prim_lag = lag;
2715 	} else {
2716 		ice_lag_build_netdev_list(lag, &ndlist);
2717 		prim_lag = ice_lag_find_primary(lag);
2718 	}
2719 
2720 	if (!prim_lag) {
2721 		dev_dbg(ice_pf_to_dev(pf), "No primary interface in aggregate, can't rebuild\n");
2722 		goto lag_rebuild_out;
2723 	}
2724 
2725 	act_port = prim_lag->active_port;
2726 	loc_port = lag->pf->hw.port_info->lport;
2727 
2728 	/* configure SWID for this port */
2729 	if (lag->primary) {
2730 		ice_lag_primary_swid(lag, true);
2731 	} else {
2732 		ice_lag_set_swid(prim_lag->pf->hw.port_info->sw_id, lag, true);
2733 		ice_lag_add_prune_list(prim_lag, pf);
2734 		if (act_port == loc_port)
2735 			ice_lag_move_vf_nodes_sync(prim_lag, &pf->hw);
2736 	}
2737 
2738 	if (!lag->bond_aa) {
2739 		ice_lag_cfg_lp_fltr(lag, true, true);
2740 		if (lag->pf_rx_rule_id)
2741 			if (ice_lag_cfg_dflt_fltr(lag, true))
2742 				dev_err(ice_pf_to_dev(pf), "Error adding default VSI rule in rebuild\n");
2743 	} else {
2744 		ice_lag_cfg_lp_fltr(lag, true, false);
2745 	}
2746 
2747 
2748 	ice_clear_rdma_cap(pf);
2749 lag_rebuild_out:
2750 	ice_lag_destroy_netdev_list(lag, &ndlist);
2751 	mutex_unlock(&pf->lag_mutex);
2752 }
2753 
2754 /**
2755  * ice_lag_is_switchdev_running
2756  * @pf: pointer to PF structure
2757  *
2758  * Check if switchdev is running on any of the interfaces connected to lag.
2759  */
2760 bool ice_lag_is_switchdev_running(struct ice_pf *pf)
2761 {
2762 	struct ice_lag *lag = pf->lag;
2763 	struct net_device *tmp_nd;
2764 
2765 	if (!ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) ||
2766 	    !lag || !lag->upper_netdev)
2767 		return false;
2768 
2769 	rcu_read_lock();
2770 	for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) {
2771 		struct ice_netdev_priv *priv = netdev_priv(tmp_nd);
2772 
2773 		if (!netif_is_ice(tmp_nd) || !priv || !priv->vsi ||
2774 		    !priv->vsi->back)
2775 			continue;
2776 
2777 		if (ice_is_switchdev_running(priv->vsi->back)) {
2778 			rcu_read_unlock();
2779 			return true;
2780 		}
2781 	}
2782 	rcu_read_unlock();
2783 
2784 	return false;
2785 }
2786