xref: /linux/drivers/net/ethernet/intel/ice/ice_lag.c (revision 7a7c52645ce62314cdd69815e9d8fcb33e0042d5)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (C) 2018-2021, Intel Corporation. */
3 
4 /* Link Aggregation code */
5 
6 #include "ice.h"
7 #include "ice_lib.h"
8 #include "ice_lag.h"
9 
10 #define ICE_LAG_RES_SHARED	BIT(14)
11 #define ICE_LAG_RES_VALID	BIT(15)
12 
13 #define ICE_TRAIN_PKT_LEN		16
14 static const u8 lacp_train_pkt[ICE_TRAIN_PKT_LEN] = { 0, 0, 0, 0, 0, 0,
15 						      0, 0, 0, 0, 0, 0,
16 						      0x88, 0x09, 0, 0 };
17 static const u8 act_act_train_pkt[ICE_TRAIN_PKT_LEN] = { 0, 0, 0, 0, 0, 0,
18 							 0, 0, 0, 0, 0, 0,
19 							 0, 0, 0, 0 };
20 
21 #define ICE_RECIPE_LEN			64
22 #define ICE_LAG_SRIOV_CP_RECIPE		10
23 
24 static const u8 ice_dflt_vsi_rcp[ICE_RECIPE_LEN] = {
25 	0x05, 0, 0, 0, 0x20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26 	0x85, 0, 0x01, 0, 0, 0, 0xff, 0xff, 0x08, 0, 0, 0, 0, 0, 0, 0,
27 	0, 0, 0, 0, 0, 0, 0x30 };
28 static const u8 ice_lport_rcp[ICE_RECIPE_LEN] = {
29 	0x05, 0, 0, 0, 0x20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30 	0x85, 0, 0x16, 0, 0, 0, 0xff, 0xff, 0x07, 0, 0, 0, 0, 0, 0, 0,
31 	0, 0, 0, 0, 0, 0, 0x30 };
32 
33 /**
34  * ice_lag_set_primary - set PF LAG state as Primary
35  * @lag: LAG info struct
36  */
37 static void ice_lag_set_primary(struct ice_lag *lag)
38 {
39 	struct ice_pf *pf = lag->pf;
40 
41 	if (!pf)
42 		return;
43 
44 	if (lag->role != ICE_LAG_UNSET && lag->role != ICE_LAG_BACKUP) {
45 		dev_warn(ice_pf_to_dev(pf), "%s: Attempt to be Primary, but incompatible state.\n",
46 			 netdev_name(lag->netdev));
47 		return;
48 	}
49 
50 	lag->role = ICE_LAG_PRIMARY;
51 }
52 
53 /**
54  * ice_lag_set_bkup - set PF LAG state to Backup
55  * @lag: LAG info struct
56  */
57 static void ice_lag_set_bkup(struct ice_lag *lag)
58 {
59 	struct ice_pf *pf = lag->pf;
60 
61 	if (!pf)
62 		return;
63 
64 	if (lag->role != ICE_LAG_UNSET && lag->role != ICE_LAG_PRIMARY) {
65 		dev_dbg(ice_pf_to_dev(pf), "%s: Attempt to be Backup, but incompatible state\n",
66 			netdev_name(lag->netdev));
67 		return;
68 	}
69 
70 	lag->role = ICE_LAG_BACKUP;
71 }
72 
73 /**
74  * netif_is_same_ice - determine if netdev is on the same ice NIC as local PF
75  * @pf: local PF struct
76  * @netdev: netdev we are evaluating
77  */
78 static bool netif_is_same_ice(struct ice_pf *pf, struct net_device *netdev)
79 {
80 	struct ice_netdev_priv *np;
81 	struct ice_pf *test_pf;
82 	struct ice_vsi *vsi;
83 
84 	if (!netif_is_ice(netdev))
85 		return false;
86 
87 	np = netdev_priv(netdev);
88 	if (!np)
89 		return false;
90 
91 	vsi = np->vsi;
92 	if (!vsi)
93 		return false;
94 
95 	test_pf = vsi->back;
96 	if (!test_pf)
97 		return false;
98 
99 	if (pf->pdev->bus != test_pf->pdev->bus ||
100 	    pf->pdev->slot != test_pf->pdev->slot)
101 		return false;
102 
103 	return true;
104 }
105 
106 /**
107  * ice_lag_config_eswitch - configure eswitch to work with LAG
108  * @lag: lag info struct
109  * @netdev: active network interface device struct
110  *
111  * Updates all port representors in eswitch to use @netdev for Tx.
112  *
113  * Configures the netdev to keep dst metadata (also used in representor Tx).
114  * This is required for an uplink without switchdev mode configured.
115  */
116 static void ice_lag_config_eswitch(struct ice_lag *lag,
117 				   struct net_device *netdev)
118 {
119 	struct ice_repr *repr;
120 	unsigned long id;
121 
122 	xa_for_each(&lag->pf->eswitch.reprs, id, repr)
123 		repr->dst->u.port_info.lower_dev = netdev;
124 
125 	netif_keep_dst(netdev);
126 }
127 
128 /**
129  * ice_netdev_to_lag - return pointer to associated lag struct from netdev
130  * @netdev: pointer to net_device struct to query
131  */
132 static struct ice_lag *ice_netdev_to_lag(struct net_device *netdev)
133 {
134 	struct ice_netdev_priv *np;
135 	struct ice_vsi *vsi;
136 
137 	if (!netif_is_ice(netdev))
138 		return NULL;
139 
140 	np = netdev_priv(netdev);
141 	if (!np)
142 		return NULL;
143 
144 	vsi = np->vsi;
145 	if (!vsi)
146 		return NULL;
147 
148 	return vsi->back->lag;
149 }
150 
151 /**
152  * ice_lag_find_hw_by_lport - return an hw struct from bond members lport
153  * @lag: lag struct
154  * @lport: lport value to search for
155  */
156 static struct ice_hw *
157 ice_lag_find_hw_by_lport(struct ice_lag *lag, u8 lport)
158 {
159 	struct ice_lag_netdev_list *entry;
160 	struct net_device *tmp_netdev;
161 	struct ice_netdev_priv *np;
162 	struct ice_hw *hw;
163 
164 	list_for_each_entry(entry, lag->netdev_head, node) {
165 		tmp_netdev = entry->netdev;
166 		if (!tmp_netdev || !netif_is_ice(tmp_netdev))
167 			continue;
168 
169 		np = netdev_priv(tmp_netdev);
170 		if (!np || !np->vsi)
171 			continue;
172 
173 		hw = &np->vsi->back->hw;
174 		if (hw->port_info->lport == lport)
175 			return hw;
176 	}
177 
178 	return NULL;
179 }
180 
181 /**
182  * ice_pkg_has_lport_extract - check if lport extraction supported
183  * @hw: HW struct
184  */
185 static bool ice_pkg_has_lport_extract(struct ice_hw *hw)
186 {
187 	int i;
188 
189 	for (i = 0; i < hw->blk[ICE_BLK_SW].es.count; i++) {
190 		u16 offset;
191 		u8 fv_prot;
192 
193 		ice_find_prot_off(hw, ICE_BLK_SW, ICE_SW_DEFAULT_PROFILE, i,
194 				  &fv_prot, &offset);
195 		if (fv_prot == ICE_FV_PROT_MDID &&
196 		    offset == ICE_LP_EXT_BUF_OFFSET)
197 			return true;
198 	}
199 	return false;
200 }
201 
202 /**
203  * ice_lag_find_primary - returns pointer to primary interfaces lag struct
204  * @lag: local interfaces lag struct
205  */
206 static struct ice_lag *ice_lag_find_primary(struct ice_lag *lag)
207 {
208 	struct ice_lag *primary_lag = NULL;
209 	struct list_head *tmp;
210 
211 	list_for_each(tmp, lag->netdev_head) {
212 		struct ice_lag_netdev_list *entry;
213 		struct ice_lag *tmp_lag;
214 
215 		entry = list_entry(tmp, struct ice_lag_netdev_list, node);
216 		tmp_lag = ice_netdev_to_lag(entry->netdev);
217 		if (tmp_lag && tmp_lag->primary) {
218 			primary_lag = tmp_lag;
219 			break;
220 		}
221 	}
222 
223 	return primary_lag;
224 }
225 
226 /**
227  * ice_lag_cfg_fltr - Add/Remove rule for LAG
228  * @lag: lag struct for local interface
229  * @act: rule action
230  * @recipe_id: recipe id for the new rule
231  * @rule_idx: pointer to rule index
232  * @direction: ICE_FLTR_RX or ICE_FLTR_TX
233  * @add: boolean on whether we are adding filters
234  */
235 static int
236 ice_lag_cfg_fltr(struct ice_lag *lag, u32 act, u16 recipe_id, u16 *rule_idx,
237 		 u8 direction, bool add)
238 {
239 	struct ice_sw_rule_lkup_rx_tx *s_rule;
240 	struct ice_hw *hw = &lag->pf->hw;
241 	u16 s_rule_sz, vsi_num;
242 	u8 *eth_hdr;
243 	u32 opc;
244 	int err;
245 
246 	vsi_num = ice_get_hw_vsi_num(hw, 0);
247 
248 	s_rule_sz = ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule);
249 	s_rule = kzalloc(s_rule_sz, GFP_KERNEL);
250 	if (!s_rule) {
251 		dev_err(ice_pf_to_dev(lag->pf), "error allocating rule for LAG\n");
252 		return -ENOMEM;
253 	}
254 
255 	if (add) {
256 		eth_hdr = s_rule->hdr_data;
257 		ice_fill_eth_hdr(eth_hdr);
258 
259 		act |= FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M, vsi_num);
260 
261 		s_rule->recipe_id = cpu_to_le16(recipe_id);
262 		if (direction == ICE_FLTR_RX) {
263 			s_rule->hdr.type =
264 				cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX);
265 			s_rule->src = cpu_to_le16(hw->port_info->lport);
266 		} else {
267 			s_rule->hdr.type =
268 				cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX);
269 			s_rule->src = cpu_to_le16(vsi_num);
270 		}
271 		s_rule->act = cpu_to_le32(act);
272 		s_rule->hdr_len = cpu_to_le16(DUMMY_ETH_HDR_LEN);
273 		opc = ice_aqc_opc_add_sw_rules;
274 	} else {
275 		s_rule->index = cpu_to_le16(*rule_idx);
276 		opc = ice_aqc_opc_remove_sw_rules;
277 	}
278 
279 	err = ice_aq_sw_rules(&lag->pf->hw, s_rule, s_rule_sz, 1, opc, NULL);
280 	if (err)
281 		goto dflt_fltr_free;
282 
283 	if (add)
284 		*rule_idx = le16_to_cpu(s_rule->index);
285 	else
286 		*rule_idx = 0;
287 
288 dflt_fltr_free:
289 	kfree(s_rule);
290 	return err;
291 }
292 
293 /**
294  * ice_lag_cfg_dflt_fltr - Add/Remove default VSI rule for LAG
295  * @lag: lag struct for local interface
296  * @add: boolean on whether to add filter
297  */
298 static int
299 ice_lag_cfg_dflt_fltr(struct ice_lag *lag, bool add)
300 {
301 	u32 act = ICE_SINGLE_ACT_VSI_FORWARDING |
302 		ICE_SINGLE_ACT_VALID_BIT | ICE_SINGLE_ACT_LAN_ENABLE;
303 	int err;
304 
305 	err = ice_lag_cfg_fltr(lag, act, lag->pf_recipe, &lag->pf_rx_rule_id,
306 			       ICE_FLTR_RX, add);
307 	if (err)
308 		goto err_rx;
309 
310 	act = ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT |
311 	      ICE_SINGLE_ACT_LB_ENABLE;
312 	err = ice_lag_cfg_fltr(lag, act, lag->pf_recipe, &lag->pf_tx_rule_id,
313 			       ICE_FLTR_TX, add);
314 	if (err)
315 		goto err_tx;
316 
317 	return 0;
318 
319 err_tx:
320 	ice_lag_cfg_fltr(lag, act, lag->pf_recipe, &lag->pf_rx_rule_id,
321 			 ICE_FLTR_RX, !add);
322 err_rx:
323 	return err;
324 }
325 
326 /**
327  * ice_lag_cfg_drop_fltr - Add/Remove lport drop rule
328  * @lag: lag struct for local interface
329  * @add: boolean on whether to add filter
330  */
331 static int
332 ice_lag_cfg_drop_fltr(struct ice_lag *lag, bool add)
333 {
334 	u32 act = ICE_SINGLE_ACT_VSI_FORWARDING |
335 		  ICE_SINGLE_ACT_VALID_BIT |
336 		  ICE_SINGLE_ACT_DROP;
337 
338 	return ice_lag_cfg_fltr(lag, act, lag->lport_recipe,
339 				&lag->lport_rule_idx, ICE_FLTR_RX, add);
340 }
341 
342 /**
343  * ice_lag_cfg_pf_fltrs_act_bkup - set filters up for new active port
344  * @lag: local interfaces lag struct
345  * @bonding_info: netdev event bonding info
346  */
347 static void
348 ice_lag_cfg_pf_fltrs_act_bkup(struct ice_lag *lag,
349 			      struct netdev_bonding_info *bonding_info)
350 {
351 	struct device *dev = ice_pf_to_dev(lag->pf);
352 
353 	/* interface not active - remove old default VSI rule */
354 	if (bonding_info->slave.state && lag->pf_rx_rule_id) {
355 		if (ice_lag_cfg_dflt_fltr(lag, false))
356 			dev_err(dev, "Error removing old default VSI filter\n");
357 		if (ice_lag_cfg_drop_fltr(lag, true))
358 			dev_err(dev, "Error adding new drop filter\n");
359 		return;
360 	}
361 
362 	/* interface becoming active - add new default VSI rule */
363 	if (!bonding_info->slave.state && !lag->pf_rx_rule_id) {
364 		if (ice_lag_cfg_dflt_fltr(lag, true))
365 			dev_err(dev, "Error adding new default VSI filter\n");
366 		if (lag->lport_rule_idx && ice_lag_cfg_drop_fltr(lag, false))
367 			dev_err(dev, "Error removing old drop filter\n");
368 	}
369 }
370 
371 /**
372  * ice_lag_cfg_lp_fltr - configure lport filters
373  * @lag: local interface's lag struct
374  * @add: add or remove rule
375  * @cp: control packet only or general PF lport rule
376  */
377 static void
378 ice_lag_cfg_lp_fltr(struct ice_lag *lag, bool add, bool cp)
379 {
380 	struct ice_sw_rule_lkup_rx_tx *s_rule;
381 	struct ice_vsi *vsi = lag->pf->vsi[0];
382 	u16 buf_len, opc;
383 
384 	buf_len = ICE_SW_RULE_RX_TX_HDR_SIZE(s_rule, ICE_TRAIN_PKT_LEN);
385 	s_rule = kzalloc(buf_len, GFP_KERNEL);
386 	if (!s_rule) {
387 		netdev_warn(lag->netdev, "-ENOMEM error configuring CP filter\n");
388 		return;
389 	}
390 
391 	if (add) {
392 		if (cp) {
393 			s_rule->recipe_id =
394 				cpu_to_le16(ICE_LAG_SRIOV_CP_RECIPE);
395 			memcpy(s_rule->hdr_data, lacp_train_pkt,
396 			       ICE_TRAIN_PKT_LEN);
397 		} else {
398 			s_rule->recipe_id = cpu_to_le16(lag->act_act_recipe);
399 			memcpy(s_rule->hdr_data, act_act_train_pkt,
400 			       ICE_TRAIN_PKT_LEN);
401 		}
402 
403 		s_rule->src = cpu_to_le16(vsi->port_info->lport);
404 		s_rule->act = cpu_to_le32(ICE_FWD_TO_VSI |
405 					  ICE_SINGLE_ACT_LAN_ENABLE |
406 					  ICE_SINGLE_ACT_VALID_BIT |
407 					  FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M,
408 						     vsi->vsi_num));
409 		s_rule->hdr_len = cpu_to_le16(ICE_TRAIN_PKT_LEN);
410 		s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX);
411 		opc = ice_aqc_opc_add_sw_rules;
412 	} else {
413 		opc = ice_aqc_opc_remove_sw_rules;
414 		if (cp)
415 			s_rule->index = cpu_to_le16(lag->cp_rule_idx);
416 		else
417 			s_rule->index = cpu_to_le16(lag->act_act_rule_idx);
418 	}
419 	if (ice_aq_sw_rules(&lag->pf->hw, s_rule, buf_len, 1, opc, NULL)) {
420 		netdev_warn(lag->netdev, "Error %s %s rule for aggregate\n",
421 			    add ? "ADDING" : "REMOVING",
422 			    cp ? "CONTROL PACKET" : "LPORT");
423 		goto err_cp_free;
424 	}
425 
426 	if (add) {
427 		if (cp)
428 			lag->cp_rule_idx = le16_to_cpu(s_rule->index);
429 		else
430 			lag->act_act_rule_idx = le16_to_cpu(s_rule->index);
431 	} else {
432 		if (cp)
433 			lag->cp_rule_idx = 0;
434 		else
435 			lag->act_act_rule_idx = 0;
436 	}
437 
438 err_cp_free:
439 	kfree(s_rule);
440 }
441 
442 /**
443  * ice_lag_cfg_pf_fltrs - set filters up for PF traffic
444  * @lag: local interfaces lag struct
445  * @ptr: opaque data containing notifier event
446  */
447 static void
448 ice_lag_cfg_pf_fltrs(struct ice_lag *lag, void *ptr)
449 {
450 	struct netdev_notifier_bonding_info *info = ptr;
451 	struct netdev_bonding_info *bonding_info;
452 	struct net_device *event_netdev;
453 
454 	event_netdev = netdev_notifier_info_to_dev(ptr);
455 	if (event_netdev != lag->netdev)
456 		return;
457 
458 	bonding_info = &info->bonding_info;
459 
460 	if (lag->bond_aa) {
461 		if (lag->need_fltr_cfg) {
462 			ice_lag_cfg_lp_fltr(lag, true, false);
463 			lag->need_fltr_cfg = false;
464 		}
465 	} else {
466 		ice_lag_cfg_pf_fltrs_act_bkup(lag, bonding_info);
467 	}
468 }
469 
470 /**
471  * ice_display_lag_info - print LAG info
472  * @lag: LAG info struct
473  */
474 static void ice_display_lag_info(struct ice_lag *lag)
475 {
476 	const char *name, *upper, *role, *bonded, *primary;
477 	struct device *dev = &lag->pf->pdev->dev;
478 
479 	name = lag->netdev ? netdev_name(lag->netdev) : "unset";
480 	upper = lag->upper_netdev ? netdev_name(lag->upper_netdev) : "unset";
481 	primary = lag->primary ? "TRUE" : "FALSE";
482 	bonded = lag->bonded ? "BONDED" : "UNBONDED";
483 
484 	switch (lag->role) {
485 	case ICE_LAG_NONE:
486 		role = "NONE";
487 		break;
488 	case ICE_LAG_PRIMARY:
489 		role = "PRIMARY";
490 		break;
491 	case ICE_LAG_BACKUP:
492 		role = "BACKUP";
493 		break;
494 	case ICE_LAG_UNSET:
495 		role = "UNSET";
496 		break;
497 	default:
498 		role = "ERROR";
499 	}
500 
501 	dev_dbg(dev, "%s %s, upper:%s, role:%s, primary:%s\n", name, bonded,
502 		upper, role, primary);
503 }
504 
505 /**
506  * ice_lag_qbuf_recfg - generate a buffer of queues for a reconfigure command
507  * @hw: HW struct that contains the queue contexts
508  * @qbuf: pointer to buffer to populate
509  * @vsi_num: index of the VSI in PF space
510  * @numq: number of queues to search for
511  * @tc: traffic class that contains the queues
512  *
513  * function returns the number of valid queues in buffer
514  */
515 static u16
516 ice_lag_qbuf_recfg(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *qbuf,
517 		   u16 vsi_num, u16 numq, u8 tc)
518 {
519 	struct ice_pf *pf = hw->back;
520 	struct ice_q_ctx *q_ctx;
521 	u16 qid, count = 0;
522 	int i;
523 
524 	for (i = 0; i < numq; i++) {
525 		q_ctx = ice_get_lan_q_ctx(hw, vsi_num, tc, i);
526 		if (!q_ctx) {
527 			dev_dbg(ice_hw_to_dev(hw), "%s queue %d NO Q CONTEXT\n",
528 				__func__, i);
529 			continue;
530 		}
531 		if (q_ctx->q_teid == ICE_INVAL_TEID) {
532 			dev_dbg(ice_hw_to_dev(hw), "%s queue %d INVAL TEID\n",
533 				__func__, i);
534 			continue;
535 		}
536 		if (q_ctx->q_handle == ICE_INVAL_Q_HANDLE) {
537 			dev_dbg(ice_hw_to_dev(hw), "%s queue %d INVAL Q HANDLE\n",
538 				__func__, i);
539 			continue;
540 		}
541 
542 		qid = pf->vsi[vsi_num]->txq_map[q_ctx->q_handle];
543 		qbuf->queue_info[count].q_handle = cpu_to_le16(qid);
544 		qbuf->queue_info[count].tc = tc;
545 		qbuf->queue_info[count].q_teid = cpu_to_le32(q_ctx->q_teid);
546 		count++;
547 	}
548 
549 	return count;
550 }
551 
552 /**
553  * ice_lag_get_sched_parent - locate or create a sched node parent
554  * @hw: HW struct for getting parent in
555  * @tc: traffic class on parent/node
556  */
557 static struct ice_sched_node *
558 ice_lag_get_sched_parent(struct ice_hw *hw, u8 tc)
559 {
560 	struct ice_sched_node *tc_node, *aggnode, *parent = NULL;
561 	u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
562 	struct ice_port_info *pi = hw->port_info;
563 	struct device *dev;
564 	u8 aggl, vsil;
565 	int n;
566 
567 	dev = ice_hw_to_dev(hw);
568 
569 	tc_node = ice_sched_get_tc_node(pi, tc);
570 	if (!tc_node) {
571 		dev_warn(dev, "Failure to find TC node for LAG move\n");
572 		return parent;
573 	}
574 
575 	aggnode = ice_sched_get_agg_node(pi, tc_node, ICE_DFLT_AGG_ID);
576 	if (!aggnode) {
577 		dev_warn(dev, "Failure to find aggregate node for LAG move\n");
578 		return parent;
579 	}
580 
581 	aggl = ice_sched_get_agg_layer(hw);
582 	vsil = ice_sched_get_vsi_layer(hw);
583 
584 	for (n = aggl + 1; n < vsil; n++)
585 		num_nodes[n] = 1;
586 
587 	for (n = 0; n < aggnode->num_children; n++) {
588 		parent = ice_sched_get_free_vsi_parent(hw, aggnode->children[n],
589 						       num_nodes);
590 		if (parent)
591 			return parent;
592 	}
593 
594 	/* if free parent not found - add one */
595 	parent = aggnode;
596 	for (n = aggl + 1; n < vsil; n++) {
597 		u16 num_nodes_added;
598 		u32 first_teid;
599 		int err;
600 
601 		err = ice_sched_add_nodes_to_layer(pi, tc_node, parent, n,
602 						   num_nodes[n], &first_teid,
603 						   &num_nodes_added);
604 		if (err || num_nodes[n] != num_nodes_added)
605 			return NULL;
606 
607 		if (num_nodes_added)
608 			parent = ice_sched_find_node_by_teid(tc_node,
609 							     first_teid);
610 		else
611 			parent = parent->children[0];
612 		if (!parent) {
613 			dev_warn(dev, "Failure to add new parent for LAG move\n");
614 			return parent;
615 		}
616 	}
617 
618 	return parent;
619 }
620 
621 /**
622  * ice_lag_move_vf_node_tc - move scheduling nodes for one VF on one TC
623  * @lag: lag info struct
624  * @oldport: lport of previous nodes location
625  * @newport: lport of destination nodes location
626  * @vsi_num: array index of VSI in PF space
627  * @tc: traffic class to move
628  */
629 static void
630 ice_lag_move_vf_node_tc(struct ice_lag *lag, u8 oldport, u8 newport,
631 			u16 vsi_num, u8 tc)
632 {
633 	DEFINE_RAW_FLEX(struct ice_aqc_move_elem, buf, teid, 1);
634 	struct device *dev = ice_pf_to_dev(lag->pf);
635 	u16 numq, valq, num_moved, qbuf_size;
636 	u16 buf_size = __struct_size(buf);
637 	struct ice_aqc_cfg_txqs_buf *qbuf;
638 	struct ice_sched_node *n_prt;
639 	struct ice_hw *new_hw = NULL;
640 	__le32 teid, parent_teid;
641 	struct ice_vsi_ctx *ctx;
642 	u32 tmp_teid;
643 
644 	ctx = ice_get_vsi_ctx(&lag->pf->hw, vsi_num);
645 	if (!ctx) {
646 		dev_warn(dev, "Unable to locate VSI context for LAG failover\n");
647 		return;
648 	}
649 
650 	/* check to see if this VF is enabled on this TC */
651 	if (!ctx->sched.vsi_node[tc])
652 		return;
653 
654 	/* locate HW struct for destination port */
655 	new_hw = ice_lag_find_hw_by_lport(lag, newport);
656 	if (!new_hw) {
657 		dev_warn(dev, "Unable to locate HW struct for LAG node destination\n");
658 		return;
659 	}
660 
661 	numq = ctx->num_lan_q_entries[tc];
662 	teid = ctx->sched.vsi_node[tc]->info.node_teid;
663 	tmp_teid = le32_to_cpu(teid);
664 	parent_teid = ctx->sched.vsi_node[tc]->info.parent_teid;
665 	/* if no teid assigned or numq == 0, then this TC is not active */
666 	if (!tmp_teid || !numq)
667 		return;
668 
669 	/* suspend VSI subtree for Traffic Class "tc" on
670 	 * this VF's VSI
671 	 */
672 	if (ice_sched_suspend_resume_elems(&lag->pf->hw, 1, &tmp_teid, true))
673 		dev_dbg(dev, "Problem suspending traffic for LAG node move\n");
674 
675 	/* reconfigure all VF's queues on this Traffic Class
676 	 * to new port
677 	 */
678 	qbuf_size = struct_size(qbuf, queue_info, numq);
679 	qbuf = kzalloc(qbuf_size, GFP_KERNEL);
680 	if (!qbuf) {
681 		dev_warn(dev, "Failure allocating memory for VF queue recfg buffer\n");
682 		goto resume_traffic;
683 	}
684 
685 	/* add the per queue info for the reconfigure command buffer */
686 	valq = ice_lag_qbuf_recfg(&lag->pf->hw, qbuf, vsi_num, numq, tc);
687 	if (!valq) {
688 		dev_dbg(dev, "No valid queues found for LAG failover\n");
689 		goto qbuf_none;
690 	}
691 
692 	if (ice_aq_cfg_lan_txq(&lag->pf->hw, qbuf, qbuf_size, valq, oldport,
693 			       newport, ICE_AQC_Q_CFG_TC_CHNG, NULL)) {
694 		dev_warn(dev, "Failure to configure queues for LAG failover\n");
695 		goto qbuf_err;
696 	}
697 
698 qbuf_none:
699 	kfree(qbuf);
700 
701 	/* find new parent in destination port's tree for VF VSI node on this
702 	 * Traffic Class
703 	 */
704 	n_prt = ice_lag_get_sched_parent(new_hw, tc);
705 	if (!n_prt)
706 		goto resume_traffic;
707 
708 	/* Move Vf's VSI node for this TC to newport's scheduler tree */
709 	buf->hdr.src_parent_teid = parent_teid;
710 	buf->hdr.dest_parent_teid = n_prt->info.node_teid;
711 	buf->hdr.num_elems = cpu_to_le16(1);
712 	buf->hdr.mode = ICE_AQC_MOVE_ELEM_MODE_KEEP_OWN;
713 	buf->teid[0] = teid;
714 
715 	if (ice_aq_move_sched_elems(&lag->pf->hw, buf, buf_size, &num_moved))
716 		dev_warn(dev, "Failure to move VF nodes for failover\n");
717 	else
718 		ice_sched_update_parent(n_prt, ctx->sched.vsi_node[tc]);
719 
720 	goto resume_traffic;
721 
722 qbuf_err:
723 	kfree(qbuf);
724 
725 resume_traffic:
726 	/* restart traffic for VSI node */
727 	if (ice_sched_suspend_resume_elems(&lag->pf->hw, 1, &tmp_teid, false))
728 		dev_dbg(dev, "Problem restarting traffic for LAG node move\n");
729 }
730 
731 /**
732  * ice_lag_build_netdev_list - populate the lag struct's netdev list
733  * @lag: local lag struct
734  * @ndlist: pointer to netdev list to populate
735  */
736 static void ice_lag_build_netdev_list(struct ice_lag *lag,
737 				      struct ice_lag_netdev_list *ndlist)
738 {
739 	struct ice_lag_netdev_list *nl;
740 	struct net_device *tmp_nd;
741 
742 	INIT_LIST_HEAD(&ndlist->node);
743 	rcu_read_lock();
744 	for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) {
745 		nl = kzalloc(sizeof(*nl), GFP_ATOMIC);
746 		if (!nl)
747 			break;
748 
749 		nl->netdev = tmp_nd;
750 		list_add(&nl->node, &ndlist->node);
751 	}
752 	rcu_read_unlock();
753 	lag->netdev_head = &ndlist->node;
754 }
755 
756 /**
757  * ice_lag_destroy_netdev_list - free lag struct's netdev list
758  * @lag: pointer to local lag struct
759  * @ndlist: pointer to lag struct netdev list
760  */
761 static void ice_lag_destroy_netdev_list(struct ice_lag *lag,
762 					struct ice_lag_netdev_list *ndlist)
763 {
764 	struct ice_lag_netdev_list *entry, *n;
765 
766 	rcu_read_lock();
767 	list_for_each_entry_safe(entry, n, &ndlist->node, node) {
768 		list_del(&entry->node);
769 		kfree(entry);
770 	}
771 	rcu_read_unlock();
772 	lag->netdev_head = NULL;
773 }
774 
775 /**
776  * ice_lag_move_single_vf_nodes - Move Tx scheduling nodes for single VF
777  * @lag: primary interface LAG struct
778  * @oldport: lport of previous interface
779  * @newport: lport of destination interface
780  * @vsi_num: SW index of VF's VSI
781  */
782 static void
783 ice_lag_move_single_vf_nodes(struct ice_lag *lag, u8 oldport, u8 newport,
784 			     u16 vsi_num)
785 {
786 	u8 tc;
787 
788 	ice_for_each_traffic_class(tc)
789 		ice_lag_move_vf_node_tc(lag, oldport, newport, vsi_num, tc);
790 }
791 
792 /**
793  * ice_lag_move_new_vf_nodes - Move Tx scheduling nodes for a VF if required
794  * @vf: the VF to move Tx nodes for
795  *
796  * Called just after configuring new VF queues. Check whether the VF Tx
797  * scheduling nodes need to be updated to fail over to the active port. If so,
798  * move them now.
799  */
800 void ice_lag_move_new_vf_nodes(struct ice_vf *vf)
801 {
802 	struct ice_lag_netdev_list ndlist;
803 	u8 pri_port, act_port;
804 	struct ice_lag *lag;
805 	struct ice_vsi *vsi;
806 	struct ice_pf *pf;
807 
808 	vsi = ice_get_vf_vsi(vf);
809 
810 	if (WARN_ON(!vsi))
811 		return;
812 
813 	if (WARN_ON(vsi->type != ICE_VSI_VF))
814 		return;
815 
816 	pf = vf->pf;
817 	lag = pf->lag;
818 
819 	mutex_lock(&pf->lag_mutex);
820 	if (!lag->bonded)
821 		goto new_vf_unlock;
822 
823 	pri_port = pf->hw.port_info->lport;
824 	act_port = lag->active_port;
825 
826 	if (lag->upper_netdev)
827 		ice_lag_build_netdev_list(lag, &ndlist);
828 
829 	if (lag->bonded && lag->primary && !list_empty(lag->netdev_head)) {
830 		if (lag->bond_aa &&
831 		    ice_is_feature_supported(pf, ICE_F_SRIOV_AA_LAG))
832 			ice_lag_aa_failover(lag, ICE_LAGS_IDX, NULL);
833 
834 		if (!lag->bond_aa &&
835 		    ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) &&
836 		    pri_port != act_port)
837 			ice_lag_move_single_vf_nodes(lag, pri_port, act_port,
838 						     vsi->idx);
839 	}
840 
841 	ice_lag_destroy_netdev_list(lag, &ndlist);
842 
843 new_vf_unlock:
844 	mutex_unlock(&pf->lag_mutex);
845 }
846 
847 /**
848  * ice_lag_move_vf_nodes - move Tx scheduling nodes for all VFs to new port
849  * @lag: lag info struct
850  * @oldport: lport of previous interface
851  * @newport: lport of destination interface
852  */
853 static void ice_lag_move_vf_nodes(struct ice_lag *lag, u8 oldport, u8 newport)
854 {
855 	struct ice_pf *pf;
856 	int i;
857 
858 	if (!lag->primary)
859 		return;
860 
861 	pf = lag->pf;
862 	ice_for_each_vsi(pf, i)
863 		if (pf->vsi[i] && pf->vsi[i]->type == ICE_VSI_VF)
864 			ice_lag_move_single_vf_nodes(lag, oldport, newport, i);
865 }
866 
867 /**
868  * ice_lag_move_vf_nodes_cfg - move vf nodes outside LAG netdev event context
869  * @lag: local lag struct
870  * @src_prt: lport value for source port
871  * @dst_prt: lport value for destination port
872  *
873  * This function is used to move nodes during an out-of-netdev-event situation,
874  * primarily when the driver needs to reconfigure or recreate resources.
875  *
876  * Must be called while holding the lag_mutex to avoid lag events from
877  * processing while out-of-sync moves are happening.  Also, paired moves,
878  * such as used in a reset flow, should both be called under the same mutex
879  * lock to avoid changes between start of reset and end of reset.
880  */
881 void ice_lag_move_vf_nodes_cfg(struct ice_lag *lag, u8 src_prt, u8 dst_prt)
882 {
883 	struct ice_lag_netdev_list ndlist;
884 
885 	ice_lag_build_netdev_list(lag, &ndlist);
886 	ice_lag_move_vf_nodes(lag, src_prt, dst_prt);
887 	ice_lag_destroy_netdev_list(lag, &ndlist);
888 }
889 
890 /**
891  * ice_lag_prepare_vf_reset - helper to adjust vf lag for reset
892  * @lag: lag struct for interface that owns VF
893  *
894  * Context: must be called with the lag_mutex lock held.
895  *
896  * Return: active lport value or ICE_LAG_INVALID_PORT if nothing moved.
897  */
898 u8 ice_lag_prepare_vf_reset(struct ice_lag *lag)
899 {
900 	u8 pri_prt, act_prt;
901 
902 	if (lag && lag->bonded && lag->primary && lag->upper_netdev) {
903 		if (!lag->bond_aa) {
904 			pri_prt = lag->pf->hw.port_info->lport;
905 			act_prt = lag->active_port;
906 			if (act_prt != pri_prt &&
907 			    act_prt != ICE_LAG_INVALID_PORT) {
908 				ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt);
909 				return act_prt;
910 			}
911 		} else {
912 			if (lag->port_bitmap & ICE_LAGS_M) {
913 				lag->port_bitmap &= ~ICE_LAGS_M;
914 				ice_lag_aa_failover(lag, ICE_LAGP_IDX, NULL);
915 				lag->port_bitmap |= ICE_LAGS_M;
916 			}
917 		}
918 	}
919 
920 	return ICE_LAG_INVALID_PORT;
921 }
922 
923 /**
924  * ice_lag_complete_vf_reset - helper for lag after reset
925  * @lag: lag struct for primary interface
926  * @act_prt: which port should be active for lag
927  *
928  * Context: must be called while holding the lag_mutex.
929  */
930 void ice_lag_complete_vf_reset(struct ice_lag *lag, u8 act_prt)
931 {
932 	u8 pri_prt;
933 
934 	if (lag && lag->bonded && lag->primary) {
935 		if (!lag->bond_aa) {
936 			pri_prt = lag->pf->hw.port_info->lport;
937 			if (act_prt != ICE_LAG_INVALID_PORT)
938 				ice_lag_move_vf_nodes_cfg(lag, pri_prt,
939 							  act_prt);
940 		} else {
941 			ice_lag_aa_failover(lag, ICE_LAGS_IDX, NULL);
942 		}
943 	}
944 }
945 
946 /**
947  * ice_lag_info_event - handle NETDEV_BONDING_INFO event
948  * @lag: LAG info struct
949  * @ptr: opaque data pointer
950  *
951  * ptr is to be cast to (netdev_notifier_bonding_info *)
952  */
953 static void ice_lag_info_event(struct ice_lag *lag, void *ptr)
954 {
955 	struct netdev_notifier_bonding_info *info = ptr;
956 	struct netdev_bonding_info *bonding_info;
957 	struct net_device *event_netdev;
958 	const char *lag_netdev_name;
959 
960 	event_netdev = netdev_notifier_info_to_dev(ptr);
961 	lag_netdev_name = netdev_name(lag->netdev);
962 	bonding_info = &info->bonding_info;
963 
964 	if (event_netdev != lag->netdev || !lag->bonded || !lag->upper_netdev)
965 		return;
966 
967 	if (bonding_info->master.bond_mode != BOND_MODE_ACTIVEBACKUP) {
968 		netdev_dbg(lag->netdev, "Bonding event recv, but mode not active/backup\n");
969 		goto lag_out;
970 	}
971 
972 	if (strcmp(bonding_info->slave.slave_name, lag_netdev_name)) {
973 		netdev_dbg(lag->netdev, "Bonding event recv, but secondary info not for us\n");
974 		goto lag_out;
975 	}
976 
977 	if (bonding_info->slave.state)
978 		ice_lag_set_bkup(lag);
979 	else
980 		ice_lag_set_primary(lag);
981 
982 lag_out:
983 	ice_display_lag_info(lag);
984 }
985 
986 /**
987  * ice_lag_aa_qbuf_recfg - fill a single queue buffer for recfg cmd
988  * @hw: HW struct that contains the queue context
989  * @qbuf: pointer to single queue buffer
990  * @vsi_num: index of the VF VSI in PF space
991  * @qnum: queue index
992  *
993  * Return: Zero on success, error code on failure.
994  */
995 static int
996 ice_lag_aa_qbuf_recfg(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *qbuf,
997 		      u16 vsi_num, int qnum)
998 {
999 	struct ice_pf *pf = hw->back;
1000 	struct ice_q_ctx *q_ctx;
1001 	u16 q_id;
1002 
1003 	q_ctx = ice_get_lan_q_ctx(hw, vsi_num, 0, qnum);
1004 	if (!q_ctx) {
1005 		dev_dbg(ice_hw_to_dev(hw), "LAG queue %d no Q context\n", qnum);
1006 		return -ENOENT;
1007 	}
1008 
1009 	if (q_ctx->q_teid == ICE_INVAL_TEID) {
1010 		dev_dbg(ice_hw_to_dev(hw), "LAG queue %d INVAL TEID\n", qnum);
1011 		return -EINVAL;
1012 	}
1013 
1014 	if (q_ctx->q_handle == ICE_INVAL_Q_HANDLE) {
1015 		dev_dbg(ice_hw_to_dev(hw), "LAG queue %d INVAL Q HANDLE\n", qnum);
1016 		return -EINVAL;
1017 	}
1018 
1019 	q_id = pf->vsi[vsi_num]->txq_map[q_ctx->q_handle];
1020 	qbuf->queue_info[0].q_handle = cpu_to_le16(q_id);
1021 	qbuf->queue_info[0].tc = 0;
1022 	qbuf->queue_info[0].q_teid = cpu_to_le32(q_ctx->q_teid);
1023 
1024 	return 0;
1025 }
1026 
1027 /**
1028  * ice_lag_aa_move_vf_qs - Move some/all VF queues to destination
1029  * @lag: primary interface's lag struct
1030  * @dest: index of destination port
1031  * @vsi_num: index of VF VSI in PF space
1032  * @all: if true move all queues to destination
1033  * @odd: VF wide q indicator for odd/even
1034  * @e_pf: PF struct for the event interface
1035  *
1036  * the parameter "all" is to control whether we are splitting the queues
1037  * between two interfaces or moving them all to the destination interface
1038  */
1039 static void ice_lag_aa_move_vf_qs(struct ice_lag *lag, u8 dest, u16 vsi_num,
1040 				  bool all, bool *odd, struct ice_pf *e_pf)
1041 {
1042 	DEFINE_RAW_FLEX(struct ice_aqc_cfg_txqs_buf, qbuf, queue_info, 1);
1043 	struct ice_hw *old_hw, *new_hw, *pri_hw, *sec_hw;
1044 	struct device *dev = ice_pf_to_dev(lag->pf);
1045 	struct ice_vsi_ctx *pv_ctx, *sv_ctx;
1046 	struct ice_lag_netdev_list ndlist;
1047 	u16 num_q, qbuf_size, sec_vsi_num;
1048 	u8 pri_lport, sec_lport;
1049 	u32 pvf_teid, svf_teid;
1050 	u16 vf_id;
1051 
1052 	vf_id = lag->pf->vsi[vsi_num]->vf->vf_id;
1053 	/* If sec_vf[] not defined, then no second interface to share with */
1054 	if (lag->sec_vf[vf_id])
1055 		sec_vsi_num = lag->sec_vf[vf_id]->idx;
1056 	else
1057 		return;
1058 
1059 	pri_lport = lag->bond_lport_pri;
1060 	sec_lport = lag->bond_lport_sec;
1061 
1062 	if (pri_lport == ICE_LAG_INVALID_PORT ||
1063 	    sec_lport == ICE_LAG_INVALID_PORT)
1064 		return;
1065 
1066 	if (!e_pf)
1067 		ice_lag_build_netdev_list(lag, &ndlist);
1068 
1069 	pri_hw = &lag->pf->hw;
1070 	if (e_pf && lag->pf != e_pf)
1071 		sec_hw = &e_pf->hw;
1072 	else
1073 		sec_hw = ice_lag_find_hw_by_lport(lag, sec_lport);
1074 
1075 	if (!pri_hw || !sec_hw)
1076 		return;
1077 
1078 	if (dest == ICE_LAGP_IDX) {
1079 		struct ice_vsi *vsi;
1080 
1081 		vsi = ice_get_main_vsi(lag->pf);
1082 		if (!vsi)
1083 			return;
1084 
1085 		old_hw = sec_hw;
1086 		new_hw = pri_hw;
1087 		ice_lag_config_eswitch(lag, vsi->netdev);
1088 	} else {
1089 		struct ice_pf *sec_pf = sec_hw->back;
1090 		struct ice_vsi *vsi;
1091 
1092 		vsi = ice_get_main_vsi(sec_pf);
1093 		if (!vsi)
1094 			return;
1095 
1096 		old_hw = pri_hw;
1097 		new_hw = sec_hw;
1098 		ice_lag_config_eswitch(lag, vsi->netdev);
1099 	}
1100 
1101 	pv_ctx = ice_get_vsi_ctx(pri_hw, vsi_num);
1102 	if (!pv_ctx) {
1103 		dev_warn(dev, "Unable to locate primary VSI %d context for LAG failover\n",
1104 			 vsi_num);
1105 		return;
1106 	}
1107 
1108 	sv_ctx = ice_get_vsi_ctx(sec_hw, sec_vsi_num);
1109 	if (!sv_ctx) {
1110 		dev_warn(dev, "Unable to locate secondary VSI %d context for LAG failover\n",
1111 			 vsi_num);
1112 		return;
1113 	}
1114 
1115 	num_q = pv_ctx->num_lan_q_entries[0];
1116 	qbuf_size = __struct_size(qbuf);
1117 
1118 	/* Suspend traffic for primary VSI VF */
1119 	pvf_teid = le32_to_cpu(pv_ctx->sched.vsi_node[0]->info.node_teid);
1120 	ice_sched_suspend_resume_elems(pri_hw, 1, &pvf_teid, true);
1121 
1122 	/* Suspend traffic for secondary VSI VF */
1123 	svf_teid = le32_to_cpu(sv_ctx->sched.vsi_node[0]->info.node_teid);
1124 	ice_sched_suspend_resume_elems(sec_hw, 1, &svf_teid, true);
1125 
1126 	for (int i = 0; i < num_q; i++) {
1127 		struct ice_sched_node *n_prt, *q_node, *parent;
1128 		struct ice_port_info *pi, *new_pi;
1129 		struct ice_vsi_ctx *src_ctx;
1130 		struct ice_sched_node *p;
1131 		struct ice_q_ctx *q_ctx;
1132 		u16 dst_vsi_num;
1133 
1134 		pi = old_hw->port_info;
1135 		new_pi = new_hw->port_info;
1136 
1137 		*odd = !(*odd);
1138 		if ((dest == ICE_LAGP_IDX && *odd && !all) ||
1139 		    (dest == ICE_LAGS_IDX && !(*odd) && !all) ||
1140 		    lag->q_home[vf_id][i] == dest)
1141 			continue;
1142 
1143 		if (dest == ICE_LAGP_IDX)
1144 			dst_vsi_num = vsi_num;
1145 		else
1146 			dst_vsi_num = sec_vsi_num;
1147 
1148 		n_prt = ice_sched_get_free_qparent(new_hw->port_info,
1149 						   dst_vsi_num, 0,
1150 						   ICE_SCHED_NODE_OWNER_LAN);
1151 		if (!n_prt)
1152 			continue;
1153 
1154 		q_ctx = ice_get_lan_q_ctx(pri_hw, vsi_num, 0, i);
1155 		if (!q_ctx)
1156 			continue;
1157 
1158 		if (dest == ICE_LAGP_IDX)
1159 			src_ctx = sv_ctx;
1160 		else
1161 			src_ctx = pv_ctx;
1162 
1163 		q_node = ice_sched_find_node_by_teid(src_ctx->sched.vsi_node[0],
1164 						     q_ctx->q_teid);
1165 		if (!q_node)
1166 			continue;
1167 
1168 		qbuf->src_parent_teid = q_node->info.parent_teid;
1169 		qbuf->dst_parent_teid = n_prt->info.node_teid;
1170 
1171 		/* Move the node in the HW/FW */
1172 		if (ice_lag_aa_qbuf_recfg(pri_hw, qbuf, vsi_num, i))
1173 			continue;
1174 
1175 		if (dest == ICE_LAGP_IDX)
1176 			ice_aq_cfg_lan_txq(pri_hw, qbuf, qbuf_size, 1,
1177 					   sec_lport, pri_lport,
1178 					   ICE_AQC_Q_CFG_MOVE_TC_CHNG,
1179 					   NULL);
1180 		else
1181 			ice_aq_cfg_lan_txq(pri_hw, qbuf, qbuf_size, 1,
1182 					   pri_lport, sec_lport,
1183 					   ICE_AQC_Q_CFG_MOVE_TC_CHNG,
1184 					   NULL);
1185 
1186 		/* Move the node in the SW */
1187 		parent = q_node->parent;
1188 		if (!parent)
1189 			continue;
1190 
1191 		for (int n = 0; n < parent->num_children; n++) {
1192 			int j;
1193 
1194 			if (parent->children[n] != q_node)
1195 				continue;
1196 
1197 			for (j = n + 1; j < parent->num_children;
1198 			     j++) {
1199 				parent->children[j - 1] =
1200 					parent->children[j];
1201 			}
1202 			parent->children[j] = NULL;
1203 			parent->num_children--;
1204 			break;
1205 		}
1206 
1207 		p = pi->sib_head[0][q_node->tx_sched_layer];
1208 		while (p) {
1209 			if (p->sibling == q_node) {
1210 				p->sibling = q_node->sibling;
1211 				break;
1212 			}
1213 			p = p->sibling;
1214 		}
1215 
1216 		if (pi->sib_head[0][q_node->tx_sched_layer] == q_node)
1217 			pi->sib_head[0][q_node->tx_sched_layer] =
1218 				q_node->sibling;
1219 
1220 		q_node->parent = n_prt;
1221 		q_node->info.parent_teid = n_prt->info.node_teid;
1222 		q_node->sibling = NULL;
1223 		p = new_pi->sib_head[0][q_node->tx_sched_layer];
1224 		if (p) {
1225 			while (p) {
1226 				if (!p->sibling) {
1227 					p->sibling = q_node;
1228 					break;
1229 				}
1230 				p = p->sibling;
1231 			}
1232 		} else {
1233 			new_pi->sib_head[0][q_node->tx_sched_layer] =
1234 				q_node;
1235 		}
1236 
1237 		n_prt->children[n_prt->num_children++] = q_node;
1238 		lag->q_home[vf_id][i] = dest;
1239 	}
1240 
1241 	ice_sched_suspend_resume_elems(pri_hw, 1, &pvf_teid, false);
1242 	ice_sched_suspend_resume_elems(sec_hw, 1, &svf_teid, false);
1243 
1244 	if (!e_pf)
1245 		ice_lag_destroy_netdev_list(lag, &ndlist);
1246 }
1247 
1248 /**
1249  * ice_lag_aa_failover - move VF queues in A/A mode
1250  * @lag: primary lag struct
1251  * @dest: index of destination port
1252  * @e_pf: PF struct for event port
1253  */
1254 void ice_lag_aa_failover(struct ice_lag *lag, u8 dest, struct ice_pf *e_pf)
1255 {
1256 	bool odd = true, all = false;
1257 	int i;
1258 
1259 	/* Primary can be a target if down (cleanup), but secondary can't */
1260 	if (dest == ICE_LAGS_IDX && !(lag->port_bitmap & ICE_LAGS_M))
1261 		return;
1262 
1263 	/* Move all queues to a destination if only one port is active,
1264 	 * or no ports are active and dest is primary.
1265 	 */
1266 	if ((lag->port_bitmap ^ (ICE_LAGP_M | ICE_LAGS_M)) ||
1267 	    (!lag->port_bitmap && dest == ICE_LAGP_IDX))
1268 		all = true;
1269 
1270 	ice_for_each_vsi(lag->pf, i)
1271 		if (lag->pf->vsi[i] && lag->pf->vsi[i]->type == ICE_VSI_VF)
1272 			ice_lag_aa_move_vf_qs(lag, dest, i, all, &odd, e_pf);
1273 }
1274 
1275 /**
1276  * ice_lag_reclaim_vf_tc - move scheduling nodes back to primary interface
1277  * @lag: primary interface lag struct
1278  * @src_hw: HW struct current node location
1279  * @vsi_num: VSI index in PF space
1280  * @tc: traffic class to move
1281  */
1282 static void
1283 ice_lag_reclaim_vf_tc(struct ice_lag *lag, struct ice_hw *src_hw, u16 vsi_num,
1284 		      u8 tc)
1285 {
1286 	DEFINE_RAW_FLEX(struct ice_aqc_move_elem, buf, teid, 1);
1287 	struct device *dev = ice_pf_to_dev(lag->pf);
1288 	u16 numq, valq, num_moved, qbuf_size;
1289 	u16 buf_size = __struct_size(buf);
1290 	struct ice_aqc_cfg_txqs_buf *qbuf;
1291 	struct ice_hw *hw = &lag->pf->hw;
1292 	struct ice_sched_node *n_prt;
1293 	__le32 teid, parent_teid;
1294 	struct ice_vsi_ctx *ctx;
1295 	u32 tmp_teid;
1296 
1297 	ctx = ice_get_vsi_ctx(hw, vsi_num);
1298 	if (!ctx) {
1299 		dev_warn(dev, "Unable to locate VSI context for LAG reclaim\n");
1300 		return;
1301 	}
1302 
1303 	/* check to see if this VF is enabled on this TC */
1304 	if (!ctx->sched.vsi_node[tc])
1305 		return;
1306 
1307 	numq = ctx->num_lan_q_entries[tc];
1308 	teid = ctx->sched.vsi_node[tc]->info.node_teid;
1309 	tmp_teid = le32_to_cpu(teid);
1310 	parent_teid = ctx->sched.vsi_node[tc]->info.parent_teid;
1311 
1312 	/* if !teid or !numq, then this TC is not active */
1313 	if (!tmp_teid || !numq)
1314 		return;
1315 
1316 	/* suspend traffic */
1317 	if (ice_sched_suspend_resume_elems(hw, 1, &tmp_teid, true))
1318 		dev_dbg(dev, "Problem suspending traffic for LAG node move\n");
1319 
1320 	/* reconfig queues for new port */
1321 	qbuf_size = struct_size(qbuf, queue_info, numq);
1322 	qbuf = kzalloc(qbuf_size, GFP_KERNEL);
1323 	if (!qbuf) {
1324 		dev_warn(dev, "Failure allocating memory for VF queue recfg buffer\n");
1325 		goto resume_reclaim;
1326 	}
1327 
1328 	/* add the per queue info for the reconfigure command buffer */
1329 	valq = ice_lag_qbuf_recfg(hw, qbuf, vsi_num, numq, tc);
1330 	if (!valq) {
1331 		dev_dbg(dev, "No valid queues found for LAG reclaim\n");
1332 		goto reclaim_none;
1333 	}
1334 
1335 	if (ice_aq_cfg_lan_txq(hw, qbuf, qbuf_size, numq,
1336 			       src_hw->port_info->lport, hw->port_info->lport,
1337 			       ICE_AQC_Q_CFG_TC_CHNG, NULL)) {
1338 		dev_warn(dev, "Failure to configure queues for LAG failover\n");
1339 		goto reclaim_qerr;
1340 	}
1341 
1342 reclaim_none:
1343 	kfree(qbuf);
1344 
1345 	/* find parent in primary tree */
1346 	n_prt = ice_lag_get_sched_parent(hw, tc);
1347 	if (!n_prt)
1348 		goto resume_reclaim;
1349 
1350 	/* Move node to new parent */
1351 	buf->hdr.src_parent_teid = parent_teid;
1352 	buf->hdr.dest_parent_teid = n_prt->info.node_teid;
1353 	buf->hdr.num_elems = cpu_to_le16(1);
1354 	buf->hdr.mode = ICE_AQC_MOVE_ELEM_MODE_KEEP_OWN;
1355 	buf->teid[0] = teid;
1356 
1357 	if (ice_aq_move_sched_elems(&lag->pf->hw, buf, buf_size, &num_moved))
1358 		dev_warn(dev, "Failure to move VF nodes for LAG reclaim\n");
1359 	else
1360 		ice_sched_update_parent(n_prt, ctx->sched.vsi_node[tc]);
1361 
1362 	goto resume_reclaim;
1363 
1364 reclaim_qerr:
1365 	kfree(qbuf);
1366 
1367 resume_reclaim:
1368 	/* restart traffic */
1369 	if (ice_sched_suspend_resume_elems(hw, 1, &tmp_teid, false))
1370 		dev_warn(dev, "Problem restarting traffic for LAG node reclaim\n");
1371 }
1372 
1373 /**
1374  * ice_lag_reclaim_vf_nodes - When interface leaving bond primary reclaims nodes
1375  * @lag: primary interface lag struct
1376  * @src_hw: HW struct for current node location
1377  */
1378 static void
1379 ice_lag_reclaim_vf_nodes(struct ice_lag *lag, struct ice_hw *src_hw)
1380 {
1381 	struct ice_pf *pf;
1382 	int i, tc;
1383 
1384 	if (!lag->primary || !src_hw)
1385 		return;
1386 
1387 	pf = lag->pf;
1388 	ice_for_each_vsi(pf, i)
1389 		if (pf->vsi[i] && pf->vsi[i]->type == ICE_VSI_VF)
1390 			ice_for_each_traffic_class(tc)
1391 				ice_lag_reclaim_vf_tc(lag, src_hw, i, tc);
1392 }
1393 
1394 /**
1395  * ice_lag_link - handle LAG link event
1396  * @lag: LAG info struct
1397  */
1398 static void ice_lag_link(struct ice_lag *lag)
1399 {
1400 	struct ice_pf *pf = lag->pf;
1401 
1402 	if (lag->bonded)
1403 		dev_warn(ice_pf_to_dev(pf), "%s Already part of a bond\n",
1404 			 netdev_name(lag->netdev));
1405 
1406 	lag->bonded = true;
1407 	lag->role = ICE_LAG_UNSET;
1408 	lag->need_fltr_cfg = true;
1409 	netdev_info(lag->netdev, "Shared SR-IOV resources in bond are active\n");
1410 }
1411 
1412 /**
1413  * ice_lag_act_bkup_unlink - handle unlink event for A/B bond
1414  * @lag: LAG info struct
1415  */
1416 static void ice_lag_act_bkup_unlink(struct ice_lag *lag)
1417 {
1418 	u8 pri_port, act_port, loc_port;
1419 	struct ice_pf *pf = lag->pf;
1420 
1421 	if (!lag->bonded) {
1422 		netdev_dbg(lag->netdev, "bonding unlink event on non-LAG netdev\n");
1423 		return;
1424 	}
1425 
1426 	if (lag->primary) {
1427 		act_port = lag->active_port;
1428 		pri_port = lag->pf->hw.port_info->lport;
1429 		if (act_port != pri_port && act_port != ICE_LAG_INVALID_PORT)
1430 			ice_lag_move_vf_nodes(lag, act_port, pri_port);
1431 		lag->primary = false;
1432 		lag->active_port = ICE_LAG_INVALID_PORT;
1433 
1434 		/* Config primary's eswitch back to normal operation. */
1435 		ice_lag_config_eswitch(lag, lag->netdev);
1436 	} else {
1437 		struct ice_lag *primary_lag;
1438 
1439 		primary_lag = ice_lag_find_primary(lag);
1440 		if (primary_lag) {
1441 			act_port = primary_lag->active_port;
1442 			pri_port = primary_lag->pf->hw.port_info->lport;
1443 			loc_port = pf->hw.port_info->lport;
1444 			if (act_port == loc_port &&
1445 			    act_port != ICE_LAG_INVALID_PORT) {
1446 				ice_lag_reclaim_vf_nodes(primary_lag,
1447 							 &lag->pf->hw);
1448 				primary_lag->active_port = ICE_LAG_INVALID_PORT;
1449 			}
1450 		}
1451 	}
1452 }
1453 
1454 /**
1455  * ice_lag_aa_unlink - handle unlink event for Active-Active bond
1456  * @lag: LAG info struct
1457  */
1458 static void ice_lag_aa_unlink(struct ice_lag *lag)
1459 {
1460 	struct ice_lag *pri_lag;
1461 
1462 	if (lag->primary) {
1463 		pri_lag = lag;
1464 		lag->port_bitmap &= ~ICE_LAGP_M;
1465 	} else {
1466 		pri_lag = ice_lag_find_primary(lag);
1467 		if (pri_lag)
1468 			pri_lag->port_bitmap &= ICE_LAGS_M;
1469 	}
1470 
1471 	if (pri_lag) {
1472 		ice_lag_aa_failover(pri_lag, ICE_LAGP_IDX, lag->pf);
1473 		if (lag->primary)
1474 			pri_lag->bond_lport_pri = ICE_LAG_INVALID_PORT;
1475 		else
1476 			pri_lag->bond_lport_sec = ICE_LAG_INVALID_PORT;
1477 	}
1478 }
1479 
1480 /**
1481  * ice_lag_link_unlink - helper function to call lag_link/unlink
1482  * @lag: lag info struct
1483  * @ptr: opaque pointer data
1484  */
1485 static void ice_lag_link_unlink(struct ice_lag *lag, void *ptr)
1486 {
1487 	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
1488 	struct netdev_notifier_changeupper_info *info = ptr;
1489 
1490 	if (netdev != lag->netdev)
1491 		return;
1492 
1493 	if (info->linking) {
1494 		ice_lag_link(lag);
1495 	} else {
1496 		if (lag->bond_aa)
1497 			ice_lag_aa_unlink(lag);
1498 		else
1499 			ice_lag_act_bkup_unlink(lag);
1500 
1501 		lag->bonded = false;
1502 		lag->role = ICE_LAG_NONE;
1503 		lag->upper_netdev = NULL;
1504 		lag->bond_aa = false;
1505 		lag->need_fltr_cfg = false;
1506 	}
1507 }
1508 
1509 /**
1510  * ice_lag_set_swid - set the SWID on secondary interface
1511  * @primary_swid: primary interface's SWID
1512  * @local_lag: local interfaces LAG struct
1513  * @link: Is this a linking activity
1514  *
1515  * If link is false, then primary_swid should be expected to not be valid
1516  * This function should never be called in interrupt context.
1517  */
1518 static void
1519 ice_lag_set_swid(u16 primary_swid, struct ice_lag *local_lag,
1520 		 bool link)
1521 {
1522 	struct ice_aqc_alloc_free_res_elem *buf;
1523 	struct ice_aqc_set_port_params *cmd;
1524 	struct libie_aq_desc desc;
1525 	u16 buf_len, swid;
1526 	int status, i;
1527 
1528 	buf_len = struct_size(buf, elem, 1);
1529 	buf = kzalloc(buf_len, GFP_KERNEL);
1530 	if (!buf) {
1531 		dev_err(ice_pf_to_dev(local_lag->pf), "-ENOMEM error setting SWID\n");
1532 		return;
1533 	}
1534 
1535 	buf->num_elems = cpu_to_le16(1);
1536 	buf->res_type = cpu_to_le16(ICE_AQC_RES_TYPE_SWID);
1537 	/* if unlinnking need to free the shared resource */
1538 	if (!link && local_lag->bond_swid) {
1539 		buf->elem[0].e.sw_resp = cpu_to_le16(local_lag->bond_swid);
1540 		status = ice_aq_alloc_free_res(&local_lag->pf->hw, buf,
1541 					       buf_len, ice_aqc_opc_free_res);
1542 		if (status)
1543 			dev_err(ice_pf_to_dev(local_lag->pf), "Error freeing SWID during LAG unlink\n");
1544 		local_lag->bond_swid = 0;
1545 	}
1546 
1547 	if (link) {
1548 		buf->res_type |=  cpu_to_le16(ICE_LAG_RES_SHARED |
1549 					      ICE_LAG_RES_VALID);
1550 		/* store the primary's SWID in case it leaves bond first */
1551 		local_lag->bond_swid = primary_swid;
1552 		buf->elem[0].e.sw_resp = cpu_to_le16(local_lag->bond_swid);
1553 	} else {
1554 		buf->elem[0].e.sw_resp =
1555 			cpu_to_le16(local_lag->pf->hw.port_info->sw_id);
1556 	}
1557 
1558 	status = ice_aq_alloc_free_res(&local_lag->pf->hw, buf, buf_len,
1559 				       ice_aqc_opc_alloc_res);
1560 	if (status)
1561 		dev_err(ice_pf_to_dev(local_lag->pf), "Error subscribing to SWID 0x%04X\n",
1562 			local_lag->bond_swid);
1563 
1564 	kfree(buf);
1565 
1566 	/* Configure port param SWID to correct value */
1567 	if (link)
1568 		swid = primary_swid;
1569 	else
1570 		swid = local_lag->pf->hw.port_info->sw_id;
1571 
1572 	cmd = libie_aq_raw(&desc);
1573 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_port_params);
1574 
1575 	cmd->swid = cpu_to_le16(ICE_AQC_PORT_SWID_VALID | swid);
1576 	/* If this is happening in reset context, it is possible that the
1577 	 * primary interface has not finished setting its SWID to SHARED
1578 	 * yet.  Allow retries to account for this timing issue between
1579 	 * interfaces.
1580 	 */
1581 	for (i = 0; i < ICE_LAG_RESET_RETRIES; i++) {
1582 		status = ice_aq_send_cmd(&local_lag->pf->hw, &desc, NULL, 0,
1583 					 NULL);
1584 		if (!status)
1585 			break;
1586 
1587 		usleep_range(1000, 2000);
1588 	}
1589 
1590 	if (status)
1591 		dev_err(ice_pf_to_dev(local_lag->pf), "Error setting SWID in port params %d\n",
1592 			status);
1593 }
1594 
1595 /**
1596  * ice_lag_primary_swid - set/clear the SHARED attrib of primary's SWID
1597  * @lag: primary interface's lag struct
1598  * @link: is this a linking activity
1599  *
1600  * Implement setting primary SWID as shared using 0x020B
1601  */
1602 static void ice_lag_primary_swid(struct ice_lag *lag, bool link)
1603 {
1604 	struct ice_hw *hw = &lag->pf->hw;
1605 	u16 swid = hw->port_info->sw_id;
1606 
1607 	if (ice_share_res(hw, ICE_AQC_RES_TYPE_SWID, link, swid))
1608 		dev_warn(ice_pf_to_dev(lag->pf), "Failure to set primary interface shared status\n");
1609 }
1610 
1611 /**
1612  * ice_lag_add_prune_list - Adds event_pf's VSI to primary's prune list
1613  * @lag: lag info struct
1614  * @event_pf: PF struct for VSI we are adding to primary's prune list
1615  */
1616 static void ice_lag_add_prune_list(struct ice_lag *lag, struct ice_pf *event_pf)
1617 {
1618 	u16 rule_buf_sz, vsi_list_id, event_vsi_num, prim_vsi_idx, num_vsi = 1;
1619 	struct ice_sw_rule_vsi_list *s_rule;
1620 	struct device *dev;
1621 
1622 	dev = ice_pf_to_dev(lag->pf);
1623 	event_vsi_num = event_pf->vsi[0]->vsi_num;
1624 	prim_vsi_idx = lag->pf->vsi[0]->idx;
1625 
1626 	if (!ice_find_vsi_list_entry(&lag->pf->hw, ICE_SW_LKUP_VLAN,
1627 				     prim_vsi_idx, &vsi_list_id)) {
1628 		dev_warn(dev, "Could not locate prune list when setting up SRIOV LAG\n");
1629 		return;
1630 	}
1631 
1632 	rule_buf_sz = (u16)ICE_SW_RULE_VSI_LIST_SIZE(s_rule, num_vsi);
1633 	s_rule = kzalloc(rule_buf_sz, GFP_KERNEL);
1634 	if (!s_rule) {
1635 		dev_warn(dev, "Error allocating space for prune list when configuring SRIOV LAG\n");
1636 		return;
1637 	}
1638 
1639 	s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_PRUNE_LIST_SET);
1640 	s_rule->index = cpu_to_le16(vsi_list_id);
1641 	s_rule->number_vsi = cpu_to_le16(num_vsi);
1642 	s_rule->vsi[0] = cpu_to_le16(event_vsi_num);
1643 
1644 	if (ice_aq_sw_rules(&event_pf->hw, s_rule, rule_buf_sz, 1,
1645 			    ice_aqc_opc_update_sw_rules, NULL))
1646 		dev_warn(dev, "Error adding VSI prune list\n");
1647 	kfree(s_rule);
1648 }
1649 
1650 /**
1651  * ice_lag_del_prune_list - Remove secondary's vsi from primary's prune list
1652  * @lag: primary interface's ice_lag struct
1653  * @event_pf: PF struct for unlinking interface
1654  */
1655 static void ice_lag_del_prune_list(struct ice_lag *lag, struct ice_pf *event_pf)
1656 {
1657 	u16 vsi_num, vsi_idx, rule_buf_sz, vsi_list_id, num_vsi = 1;
1658 	struct ice_sw_rule_vsi_list *s_rule;
1659 	struct device *dev;
1660 
1661 	dev = ice_pf_to_dev(lag->pf);
1662 	vsi_num = event_pf->vsi[0]->vsi_num;
1663 	vsi_idx = lag->pf->vsi[0]->idx;
1664 
1665 	if (!ice_find_vsi_list_entry(&lag->pf->hw, ICE_SW_LKUP_VLAN,
1666 				     vsi_idx, &vsi_list_id)) {
1667 		dev_warn(dev, "Could not locate prune list when unwinding SRIOV LAG\n");
1668 		return;
1669 	}
1670 
1671 	rule_buf_sz = (u16)ICE_SW_RULE_VSI_LIST_SIZE(s_rule, num_vsi);
1672 	s_rule = kzalloc(rule_buf_sz, GFP_KERNEL);
1673 	if (!s_rule) {
1674 		dev_warn(dev, "Error allocating prune list when unwinding SRIOV LAG\n");
1675 		return;
1676 	}
1677 
1678 	s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR);
1679 	s_rule->index = cpu_to_le16(vsi_list_id);
1680 	s_rule->number_vsi = cpu_to_le16(num_vsi);
1681 	s_rule->vsi[0] = cpu_to_le16(vsi_num);
1682 
1683 	if (ice_aq_sw_rules(&event_pf->hw, (struct ice_aqc_sw_rules *)s_rule,
1684 			    rule_buf_sz, 1, ice_aqc_opc_update_sw_rules, NULL))
1685 		dev_warn(dev, "Error clearing VSI prune list\n");
1686 
1687 	kfree(s_rule);
1688 }
1689 
1690 /**
1691  * ice_lag_init_feature_support_flag - Check for package and NVM support for LAG
1692  * @pf: PF struct
1693  */
1694 static void ice_lag_init_feature_support_flag(struct ice_pf *pf)
1695 {
1696 	struct ice_hw_common_caps *caps;
1697 
1698 	caps = &pf->hw.dev_caps.common_cap;
1699 	if (caps->roce_lag)
1700 		ice_set_feature_support(pf, ICE_F_ROCE_LAG);
1701 	else
1702 		ice_clear_feature_support(pf, ICE_F_ROCE_LAG);
1703 
1704 	if (caps->sriov_lag && ice_pkg_has_lport_extract(&pf->hw))
1705 		ice_set_feature_support(pf, ICE_F_SRIOV_LAG);
1706 	else
1707 		ice_clear_feature_support(pf, ICE_F_SRIOV_LAG);
1708 
1709 	if (caps->sriov_aa_lag && ice_pkg_has_lport_extract(&pf->hw))
1710 		ice_set_feature_support(pf, ICE_F_SRIOV_AA_LAG);
1711 	else
1712 		ice_clear_feature_support(pf, ICE_F_SRIOV_AA_LAG);
1713 }
1714 
1715 /**
1716  * ice_lag_changeupper_event - handle LAG changeupper event
1717  * @lag: LAG info struct
1718  * @ptr: opaque pointer data
1719  */
1720 static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr)
1721 {
1722 	struct netdev_notifier_changeupper_info *info = ptr;
1723 	struct ice_lag *primary_lag;
1724 	struct net_device *netdev;
1725 
1726 	netdev = netdev_notifier_info_to_dev(ptr);
1727 
1728 	/* not for this netdev */
1729 	if (netdev != lag->netdev)
1730 		return;
1731 
1732 	primary_lag = ice_lag_find_primary(lag);
1733 	if (info->linking) {
1734 		lag->upper_netdev = info->upper_dev;
1735 		/* If there is not already a primary interface in the LAG,
1736 		 * then mark this one as primary.
1737 		 */
1738 		if (!primary_lag) {
1739 			lag->primary = true;
1740 			if (!ice_is_switchdev_running(lag->pf))
1741 				return;
1742 
1743 			/* Configure primary's SWID to be shared */
1744 			ice_lag_primary_swid(lag, true);
1745 			primary_lag = lag;
1746 			lag->bond_lport_pri = lag->pf->hw.port_info->lport;
1747 			lag->bond_lport_sec = ICE_LAG_INVALID_PORT;
1748 			lag->port_bitmap = 0;
1749 		} else {
1750 			u16 swid;
1751 
1752 			if (!ice_is_switchdev_running(primary_lag->pf))
1753 				return;
1754 
1755 			swid = primary_lag->pf->hw.port_info->sw_id;
1756 			ice_lag_set_swid(swid, lag, true);
1757 			ice_lag_add_prune_list(primary_lag, lag->pf);
1758 			primary_lag->bond_lport_sec =
1759 				lag->pf->hw.port_info->lport;
1760 		}
1761 		/* add filter for primary control packets */
1762 		ice_lag_cfg_lp_fltr(lag, true, true);
1763 	} else {
1764 		if (!primary_lag && lag->primary)
1765 			primary_lag = lag;
1766 
1767 		if (primary_lag) {
1768 			for (int i = 0; i < ICE_MAX_SRIOV_VFS; i++) {
1769 				if (primary_lag->sec_vf[i]) {
1770 					ice_vsi_release(primary_lag->sec_vf[i]);
1771 					primary_lag->sec_vf[i] = NULL;
1772 				}
1773 			}
1774 		}
1775 
1776 		if (!lag->primary) {
1777 			ice_lag_set_swid(0, lag, false);
1778 			if (primary_lag)
1779 				primary_lag->bond_lport_sec =
1780 					ICE_LAG_INVALID_PORT;
1781 		} else {
1782 			if (primary_lag && lag->primary) {
1783 				ice_lag_primary_swid(lag, false);
1784 				ice_lag_del_prune_list(primary_lag, lag->pf);
1785 			}
1786 		}
1787 		/* remove filter for control packets */
1788 		ice_lag_cfg_lp_fltr(lag, false, !lag->bond_aa);
1789 	}
1790 }
1791 
1792 /**
1793  * ice_lag_monitor_link - monitor interfaces entering/leaving the aggregate
1794  * @lag: lag info struct
1795  * @ptr: opaque data containing notifier event
1796  *
1797  * This function only operates after a primary has been set.
1798  */
1799 static void ice_lag_monitor_link(struct ice_lag *lag, void *ptr)
1800 {
1801 	struct netdev_notifier_changeupper_info *info = ptr;
1802 	struct ice_hw *prim_hw, *active_hw;
1803 	struct net_device *event_netdev;
1804 	struct ice_pf *pf;
1805 	u8 prim_port;
1806 
1807 	if (!lag->primary)
1808 		return;
1809 
1810 	event_netdev = netdev_notifier_info_to_dev(ptr);
1811 	if (!netif_is_same_ice(lag->pf, event_netdev))
1812 		return;
1813 
1814 	if (info->upper_dev != lag->upper_netdev)
1815 		return;
1816 
1817 	if (info->linking)
1818 		return;
1819 
1820 	pf = lag->pf;
1821 	prim_hw = &pf->hw;
1822 	prim_port = prim_hw->port_info->lport;
1823 
1824 	/* Since there are only two interfaces allowed in SRIOV+LAG, if
1825 	 * one port is leaving, then nodes need to be on primary
1826 	 * interface.
1827 	 */
1828 	if (lag->bond_aa) {
1829 		struct ice_netdev_priv *e_ndp;
1830 		struct ice_pf *e_pf;
1831 
1832 		e_ndp = netdev_priv(event_netdev);
1833 		e_pf = e_ndp->vsi->back;
1834 
1835 		if (lag->bond_lport_pri != ICE_LAG_INVALID_PORT &&
1836 		    lag->port_bitmap & ICE_LAGS_M) {
1837 			lag->port_bitmap &= ~ICE_LAGS_M;
1838 			ice_lag_aa_failover(lag, ICE_LAGP_IDX, e_pf);
1839 			lag->bond_lport_sec = ICE_LAG_INVALID_PORT;
1840 		}
1841 	} else {
1842 		if (prim_port != lag->active_port &&
1843 		    lag->active_port != ICE_LAG_INVALID_PORT) {
1844 			active_hw = ice_lag_find_hw_by_lport(lag,
1845 							     lag->active_port);
1846 			ice_lag_reclaim_vf_nodes(lag, active_hw);
1847 			lag->active_port = ICE_LAG_INVALID_PORT;
1848 		}
1849 	}
1850 }
1851 
1852 /**
1853  * ice_lag_monitor_act_bkup - keep track of which port is active in A/B LAG
1854  * @lag: lag info struct
1855  * @b_info: bonding info
1856  * @event_netdev: net_device got target netdev
1857  *
1858  * This function is for the primary PF to monitor changes in which port is
1859  * active and handle changes for SRIOV VF functionality
1860  */
1861 static void ice_lag_monitor_act_bkup(struct ice_lag *lag,
1862 				     struct netdev_bonding_info *b_info,
1863 				     struct net_device *event_netdev)
1864 {
1865 	struct ice_netdev_priv *event_np;
1866 	struct ice_pf *pf, *event_pf;
1867 	u8 prim_port, event_port;
1868 
1869 	pf = lag->pf;
1870 	if (!pf)
1871 		return;
1872 
1873 	event_np = netdev_priv(event_netdev);
1874 	event_pf = event_np->vsi->back;
1875 	event_port = event_pf->hw.port_info->lport;
1876 	prim_port = pf->hw.port_info->lport;
1877 
1878 	if (!b_info->slave.state) {
1879 		/* if no port is currently active, then nodes and filters exist
1880 		 * on primary port, check if we need to move them
1881 		 */
1882 		if (lag->active_port == ICE_LAG_INVALID_PORT) {
1883 			if (event_port != prim_port)
1884 				ice_lag_move_vf_nodes(lag, prim_port,
1885 						      event_port);
1886 			lag->active_port = event_port;
1887 			ice_lag_config_eswitch(lag, event_netdev);
1888 			return;
1889 		}
1890 
1891 		/* active port is already set and is current event port */
1892 		if (lag->active_port == event_port)
1893 			return;
1894 		/* new active port */
1895 		ice_lag_move_vf_nodes(lag, lag->active_port, event_port);
1896 		lag->active_port = event_port;
1897 		ice_lag_config_eswitch(lag, event_netdev);
1898 	} else {
1899 		/* port not set as currently active (e.g. new active port
1900 		 * has already claimed the nodes and filters
1901 		 */
1902 		if (lag->active_port != event_port)
1903 			return;
1904 		/* This is the case when neither port is active (both link down)
1905 		 * Link down on the bond - set active port to invalid and move
1906 		 * nodes and filters back to primary if not already there
1907 		 */
1908 		if (event_port != prim_port)
1909 			ice_lag_move_vf_nodes(lag, event_port, prim_port);
1910 		lag->active_port = ICE_LAG_INVALID_PORT;
1911 	}
1912 }
1913 
1914 /**
1915  * ice_lag_aa_clear_spoof - adjust the placeholder VSI spoofing for A/A LAG
1916  * @vsi: placeholder VSI to adjust
1917  */
1918 static void ice_lag_aa_clear_spoof(struct ice_vsi *vsi)
1919 {
1920 	ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof);
1921 }
1922 
1923 /**
1924  * ice_lag_monitor_act_act - Keep track of active ports in A/A LAG
1925  * @lag: lag struct for primary interface
1926  * @b_info: bonding_info for event
1927  * @event_netdev: net_device for target netdev
1928  */
1929 static void ice_lag_monitor_act_act(struct ice_lag *lag,
1930 				    struct netdev_bonding_info *b_info,
1931 				    struct net_device *event_netdev)
1932 {
1933 	struct ice_netdev_priv *event_np;
1934 	u8 prim_port, event_port;
1935 	struct ice_pf *event_pf;
1936 
1937 	event_np = netdev_priv(event_netdev);
1938 	event_pf = event_np->vsi->back;
1939 	event_port = event_pf->hw.port_info->lport;
1940 	prim_port = lag->pf->hw.port_info->lport;
1941 
1942 	if (b_info->slave.link == BOND_LINK_UP) {
1943 		/* Port is coming up */
1944 		if (prim_port == event_port) {
1945 			/* Processing event for primary interface */
1946 			if (lag->bond_lport_pri == ICE_LAG_INVALID_PORT)
1947 				return;
1948 
1949 			if (!(lag->port_bitmap & ICE_LAGP_M)) {
1950 				/* Primary port was not marked up before, move
1951 				 * some|all VF queues to it and mark as up
1952 				 */
1953 				lag->port_bitmap |= ICE_LAGP_M;
1954 				ice_lag_aa_failover(lag, ICE_LAGP_IDX, event_pf);
1955 			}
1956 		} else {
1957 			if (lag->bond_lport_sec == ICE_LAG_INVALID_PORT)
1958 				return;
1959 
1960 			/* Create placeholder VSIs on secondary PF.
1961 			 * The placeholder is necessary so that we have
1962 			 * an element that represents the VF on the secondary
1963 			 * interface's scheduling tree.  This will be a tree
1964 			 * root for scheduling nodes when they are moved to
1965 			 * the secondary interface.
1966 			 */
1967 			if (!lag->sec_vf[0]) {
1968 				struct ice_vsi_cfg_params params = {};
1969 				struct ice_vsi *nvsi;
1970 				struct ice_vf *vf;
1971 				unsigned int bkt;
1972 
1973 				params.type = ICE_VSI_VF;
1974 				params.port_info = event_pf->hw.port_info;
1975 				params.flags = ICE_VSI_FLAG_INIT;
1976 
1977 				ice_for_each_vf(lag->pf, bkt, vf) {
1978 					params.vf = vf;
1979 					nvsi = ice_vsi_setup(event_pf,
1980 							     &params);
1981 					ice_lag_aa_clear_spoof(nvsi);
1982 					lag->sec_vf[vf->vf_id] = nvsi;
1983 				}
1984 			}
1985 
1986 			if (!(lag->port_bitmap & ICE_LAGS_M)) {
1987 				/* Secondary port was not marked up before,
1988 				 * move some|all VF queues to it and mark as up
1989 				 */
1990 				lag->port_bitmap |= ICE_LAGS_M;
1991 				ice_lag_aa_failover(lag, ICE_LAGS_IDX, event_pf);
1992 			}
1993 		}
1994 	} else {
1995 		/* Port is going down */
1996 		if (prim_port == event_port) {
1997 			lag->port_bitmap &= ~ICE_LAGP_M;
1998 			ice_lag_aa_failover(lag, ICE_LAGS_IDX, event_pf);
1999 		} else {
2000 			lag->port_bitmap &= ~ICE_LAGS_M;
2001 			ice_lag_aa_failover(lag, ICE_LAGP_IDX, event_pf);
2002 		}
2003 	}
2004 }
2005 
2006 /**
2007  * ice_lag_monitor_info - Calls relevant A/A or A/B monitoring function
2008  * @lag: lag info struct
2009  * @ptr: opaque data containing notifier event
2010  *
2011  * This function is for the primary PF to monitor changes in which port is
2012  * active and handle changes for SRIOV VF functionality
2013  */
2014 static void ice_lag_monitor_info(struct ice_lag *lag, void *ptr)
2015 {
2016 	struct netdev_notifier_bonding_info *info = ptr;
2017 	struct net_device *event_netdev, *event_upper;
2018 	struct netdev_bonding_info *bonding_info;
2019 
2020 	if (!lag->primary)
2021 		return;
2022 
2023 	event_netdev = netdev_notifier_info_to_dev(ptr);
2024 	bonding_info = &info->bonding_info;
2025 	rcu_read_lock();
2026 	event_upper = netdev_master_upper_dev_get_rcu(event_netdev);
2027 	rcu_read_unlock();
2028 	if (!netif_is_ice(event_netdev) || event_upper != lag->upper_netdev)
2029 		return;
2030 
2031 	if (lag->bond_aa)
2032 		ice_lag_monitor_act_act(lag, bonding_info, event_netdev);
2033 	else
2034 		ice_lag_monitor_act_bkup(lag, bonding_info, event_netdev);
2035 }
2036 /**
2037  * ice_lag_chk_comp - evaluate bonded interface for feature support
2038  * @lag: lag info struct
2039  * @ptr: opaque data for netdev event info
2040  */
2041 static bool
2042 ice_lag_chk_comp(struct ice_lag *lag, void *ptr)
2043 {
2044 	struct netdev_notifier_bonding_info *info = ptr;
2045 	struct net_device *event_netdev, *event_upper;
2046 	struct netdev_bonding_info *bonding_info;
2047 	struct list_head *tmp;
2048 	struct device *dev;
2049 	int count = 0;
2050 
2051 	/* All members need to know if bond A/A or A/B */
2052 	bonding_info = &info->bonding_info;
2053 	lag->bond_mode = bonding_info->master.bond_mode;
2054 	if (lag->bond_mode != BOND_MODE_ACTIVEBACKUP)
2055 		lag->bond_aa = true;
2056 	else
2057 		lag->bond_aa = false;
2058 
2059 	if (!lag->primary)
2060 		return true;
2061 
2062 	event_netdev = netdev_notifier_info_to_dev(ptr);
2063 	rcu_read_lock();
2064 	event_upper = netdev_master_upper_dev_get_rcu(event_netdev);
2065 	rcu_read_unlock();
2066 	if (event_upper != lag->upper_netdev)
2067 		return true;
2068 
2069 	dev = ice_pf_to_dev(lag->pf);
2070 
2071 	/* only supporting switchdev mode for SRIOV VF LAG.
2072 	 * primary interface has to be in switchdev mode
2073 	 */
2074 	if (!ice_is_switchdev_running(lag->pf)) {
2075 		dev_info(dev, "Primary interface not in switchdev mode - VF LAG disabled\n");
2076 		return false;
2077 	}
2078 
2079 	if (lag->bond_aa && !ice_is_feature_supported(lag->pf,
2080 						      ICE_F_SRIOV_AA_LAG))
2081 		return false;
2082 
2083 	list_for_each(tmp, lag->netdev_head) {
2084 		struct ice_dcbx_cfg *dcb_cfg, *peer_dcb_cfg;
2085 		struct ice_lag_netdev_list *entry;
2086 		struct ice_netdev_priv *peer_np;
2087 		struct net_device *peer_netdev;
2088 		struct ice_vsi *vsi, *peer_vsi;
2089 		struct ice_pf *peer_pf;
2090 
2091 		entry = list_entry(tmp, struct ice_lag_netdev_list, node);
2092 		peer_netdev = entry->netdev;
2093 		if (!netif_is_ice(peer_netdev)) {
2094 			dev_info(dev, "Found %s non-ice netdev in LAG - VF LAG disabled\n",
2095 				 netdev_name(peer_netdev));
2096 			return false;
2097 		}
2098 
2099 		count++;
2100 		if (count > 2) {
2101 			dev_info(dev, "Found more than two netdevs in LAG - VF LAG disabled\n");
2102 			return false;
2103 		}
2104 
2105 		peer_np = netdev_priv(peer_netdev);
2106 		vsi = ice_get_main_vsi(lag->pf);
2107 		peer_vsi = peer_np->vsi;
2108 		if (lag->pf->pdev->bus != peer_vsi->back->pdev->bus ||
2109 		    lag->pf->pdev->slot != peer_vsi->back->pdev->slot) {
2110 			dev_info(dev, "Found %s on different device in LAG - VF LAG disabled\n",
2111 				 netdev_name(peer_netdev));
2112 			return false;
2113 		}
2114 
2115 		dcb_cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg;
2116 		peer_dcb_cfg = &peer_vsi->port_info->qos_cfg.local_dcbx_cfg;
2117 		if (memcmp(dcb_cfg, peer_dcb_cfg,
2118 			   sizeof(struct ice_dcbx_cfg))) {
2119 			dev_info(dev, "Found %s with different DCB in LAG - VF LAG disabled\n",
2120 				 netdev_name(peer_netdev));
2121 			return false;
2122 		}
2123 
2124 		peer_pf = peer_vsi->back;
2125 		if (test_bit(ICE_FLAG_FW_LLDP_AGENT, peer_pf->flags)) {
2126 			dev_warn(dev, "Found %s with FW LLDP agent active - VF LAG disabled\n",
2127 				 netdev_name(peer_netdev));
2128 			return false;
2129 		}
2130 	}
2131 
2132 	return true;
2133 }
2134 
2135 /**
2136  * ice_lag_unregister - handle netdev unregister events
2137  * @lag: LAG info struct
2138  * @event_netdev: netdev struct for target of notifier event
2139  */
2140 static void
2141 ice_lag_unregister(struct ice_lag *lag, struct net_device *event_netdev)
2142 {
2143 	struct ice_netdev_priv *np;
2144 	struct ice_pf *event_pf;
2145 	struct ice_lag *p_lag;
2146 
2147 	p_lag = ice_lag_find_primary(lag);
2148 	np = netdev_priv(event_netdev);
2149 	event_pf = np->vsi->back;
2150 
2151 	if (p_lag) {
2152 		if (p_lag->active_port != p_lag->pf->hw.port_info->lport &&
2153 		    p_lag->active_port != ICE_LAG_INVALID_PORT) {
2154 			struct ice_hw *active_hw;
2155 
2156 			active_hw = ice_lag_find_hw_by_lport(lag,
2157 							     p_lag->active_port);
2158 			if (active_hw)
2159 				ice_lag_reclaim_vf_nodes(p_lag, active_hw);
2160 			lag->active_port = ICE_LAG_INVALID_PORT;
2161 		}
2162 	}
2163 
2164 	/* primary processing for primary */
2165 	if (lag->primary && lag->netdev == event_netdev)
2166 		ice_lag_primary_swid(lag, false);
2167 
2168 	/* primary processing for secondary */
2169 	if (lag->primary && lag->netdev != event_netdev)
2170 		ice_lag_del_prune_list(lag, event_pf);
2171 
2172 	/* secondary processing for secondary */
2173 	if (!lag->primary && lag->netdev == event_netdev)
2174 		ice_lag_set_swid(0, lag, false);
2175 }
2176 
2177 /**
2178  * ice_lag_monitor_rdma - set and clear rdma functionality
2179  * @lag: pointer to lag struct
2180  * @ptr: opaque data for netdev event info
2181  */
2182 static void
2183 ice_lag_monitor_rdma(struct ice_lag *lag, void *ptr)
2184 {
2185 	struct netdev_notifier_changeupper_info *info = ptr;
2186 	struct net_device *netdev;
2187 
2188 	netdev = netdev_notifier_info_to_dev(ptr);
2189 
2190 	if (netdev != lag->netdev)
2191 		return;
2192 
2193 	if (info->linking)
2194 		ice_clear_rdma_cap(lag->pf);
2195 	else
2196 		ice_set_rdma_cap(lag->pf);
2197 }
2198 
2199 /**
2200  * ice_lag_chk_disabled_bond - monitor interfaces entering/leaving disabled bond
2201  * @lag: lag info struct
2202  * @ptr: opaque data containing event
2203  *
2204  * as interfaces enter a bond - determine if the bond is currently
2205  * SRIOV LAG compliant and flag if not.  As interfaces leave the
2206  * bond, reset their compliant status.
2207  */
2208 static void ice_lag_chk_disabled_bond(struct ice_lag *lag, void *ptr)
2209 {
2210 	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
2211 	struct netdev_notifier_changeupper_info *info = ptr;
2212 	struct ice_lag *prim_lag;
2213 
2214 	if (netdev != lag->netdev)
2215 		return;
2216 
2217 	if (info->linking) {
2218 		prim_lag = ice_lag_find_primary(lag);
2219 		if (prim_lag &&
2220 		    !ice_is_feature_supported(prim_lag->pf, ICE_F_SRIOV_LAG)) {
2221 			ice_clear_feature_support(lag->pf, ICE_F_SRIOV_LAG);
2222 			netdev_info(netdev, "Interface added to non-compliant SRIOV LAG aggregate\n");
2223 		}
2224 	} else {
2225 		ice_lag_init_feature_support_flag(lag->pf);
2226 	}
2227 }
2228 
2229 /**
2230  * ice_lag_disable_sriov_bond - set members of bond as not supporting SRIOV LAG
2231  * @lag: primary interfaces lag struct
2232  */
2233 static void ice_lag_disable_sriov_bond(struct ice_lag *lag)
2234 {
2235 	struct ice_netdev_priv *np = netdev_priv(lag->netdev);
2236 	struct ice_pf *pf = np->vsi->back;
2237 
2238 	ice_clear_feature_support(pf, ICE_F_SRIOV_LAG);
2239 	ice_clear_feature_support(pf, ICE_F_SRIOV_AA_LAG);
2240 }
2241 
2242 /**
2243  * ice_lag_preset_drop_fltr - preset drop filter for A/B bonds
2244  * @lag: local lag struct
2245  * @ptr: opaque data containing event
2246  *
2247  * Sets the initial drop filter for secondary interface in an
2248  * active-backup bond
2249  */
2250 static void ice_lag_preset_drop_fltr(struct ice_lag *lag, void *ptr)
2251 {
2252 	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
2253 
2254 	if (netdev != lag->netdev || lag->primary || !lag->need_fltr_cfg)
2255 		return;
2256 
2257 	ice_lag_cfg_drop_fltr(lag, true);
2258 	lag->need_fltr_cfg = false;
2259 }
2260 
2261 /**
2262  * ice_lag_process_event - process a task assigned to the lag_wq
2263  * @work: pointer to work_struct
2264  */
2265 static void ice_lag_process_event(struct work_struct *work)
2266 {
2267 	struct netdev_notifier_changeupper_info *info;
2268 	struct ice_lag_work *lag_work;
2269 	struct net_device *netdev;
2270 	struct list_head *tmp, *n;
2271 	struct ice_pf *pf;
2272 
2273 	lag_work = container_of(work, struct ice_lag_work, lag_task);
2274 	pf = lag_work->lag->pf;
2275 
2276 	mutex_lock(&pf->lag_mutex);
2277 	lag_work->lag->netdev_head = &lag_work->netdev_list.node;
2278 
2279 	switch (lag_work->event) {
2280 	case NETDEV_CHANGEUPPER:
2281 		info = &lag_work->info.changeupper_info;
2282 		ice_lag_chk_disabled_bond(lag_work->lag, info);
2283 		if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) {
2284 			ice_lag_monitor_link(lag_work->lag, info);
2285 			ice_lag_changeupper_event(lag_work->lag, info);
2286 			ice_lag_link_unlink(lag_work->lag, info);
2287 		}
2288 		ice_lag_monitor_rdma(lag_work->lag, info);
2289 		break;
2290 	case NETDEV_BONDING_INFO:
2291 		if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) {
2292 			if (!ice_lag_chk_comp(lag_work->lag,
2293 					      &lag_work->info.bonding_info)) {
2294 				netdev = lag_work->info.bonding_info.info.dev;
2295 				ice_lag_disable_sriov_bond(lag_work->lag);
2296 				ice_lag_unregister(lag_work->lag, netdev);
2297 				goto lag_cleanup;
2298 			}
2299 			ice_lag_cfg_pf_fltrs(lag_work->lag,
2300 					     &lag_work->info.bonding_info);
2301 			ice_lag_preset_drop_fltr(lag_work->lag,
2302 						 &lag_work->info.bonding_info);
2303 			ice_lag_monitor_info(lag_work->lag,
2304 					     &lag_work->info.bonding_info);
2305 		}
2306 		ice_lag_info_event(lag_work->lag, &lag_work->info.bonding_info);
2307 		break;
2308 	case NETDEV_UNREGISTER:
2309 		if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) {
2310 			netdev = lag_work->info.bonding_info.info.dev;
2311 			if ((netdev == lag_work->lag->netdev ||
2312 			     lag_work->lag->primary) && lag_work->lag->bonded)
2313 				ice_lag_unregister(lag_work->lag, netdev);
2314 		}
2315 		break;
2316 	default:
2317 		break;
2318 	}
2319 
2320 lag_cleanup:
2321 	/* cleanup resources allocated for this work item */
2322 	list_for_each_safe(tmp, n, &lag_work->netdev_list.node) {
2323 		struct ice_lag_netdev_list *entry;
2324 
2325 		entry = list_entry(tmp, struct ice_lag_netdev_list, node);
2326 		list_del(&entry->node);
2327 		kfree(entry);
2328 	}
2329 	lag_work->lag->netdev_head = NULL;
2330 
2331 	mutex_unlock(&pf->lag_mutex);
2332 
2333 	kfree(lag_work);
2334 }
2335 
2336 /**
2337  * ice_lag_event_handler - handle LAG events from netdev
2338  * @notif_blk: notifier block registered by this netdev
2339  * @event: event type
2340  * @ptr: opaque data containing notifier event
2341  */
2342 static int
2343 ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event,
2344 		      void *ptr)
2345 {
2346 	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
2347 	struct net_device *upper_netdev;
2348 	struct ice_lag_work *lag_work;
2349 	struct ice_lag *lag;
2350 
2351 	if (!netif_is_ice(netdev))
2352 		return NOTIFY_DONE;
2353 
2354 	if (event != NETDEV_CHANGEUPPER && event != NETDEV_BONDING_INFO &&
2355 	    event != NETDEV_UNREGISTER)
2356 		return NOTIFY_DONE;
2357 
2358 	if (!(netdev->priv_flags & IFF_BONDING))
2359 		return NOTIFY_DONE;
2360 
2361 	lag = container_of(notif_blk, struct ice_lag, notif_block);
2362 	if (!lag->netdev)
2363 		return NOTIFY_DONE;
2364 
2365 	if (!net_eq(dev_net(netdev), &init_net))
2366 		return NOTIFY_DONE;
2367 
2368 	/* This memory will be freed at the end of ice_lag_process_event */
2369 	lag_work = kzalloc(sizeof(*lag_work), GFP_KERNEL);
2370 	if (!lag_work)
2371 		return -ENOMEM;
2372 
2373 	lag_work->event_netdev = netdev;
2374 	lag_work->lag = lag;
2375 	lag_work->event = event;
2376 	if (event == NETDEV_CHANGEUPPER) {
2377 		struct netdev_notifier_changeupper_info *info = ptr;
2378 
2379 		upper_netdev = info->upper_dev;
2380 	} else {
2381 		upper_netdev = netdev_master_upper_dev_get(netdev);
2382 	}
2383 
2384 	INIT_LIST_HEAD(&lag_work->netdev_list.node);
2385 	if (upper_netdev) {
2386 		struct ice_lag_netdev_list *nd_list;
2387 		struct net_device *tmp_nd;
2388 
2389 		rcu_read_lock();
2390 		for_each_netdev_in_bond_rcu(upper_netdev, tmp_nd) {
2391 			nd_list = kzalloc(sizeof(*nd_list), GFP_ATOMIC);
2392 			if (!nd_list)
2393 				break;
2394 
2395 			nd_list->netdev = tmp_nd;
2396 			list_add(&nd_list->node, &lag_work->netdev_list.node);
2397 		}
2398 		rcu_read_unlock();
2399 	}
2400 
2401 	switch (event) {
2402 	case NETDEV_CHANGEUPPER:
2403 		lag_work->info.changeupper_info =
2404 			*((struct netdev_notifier_changeupper_info *)ptr);
2405 		break;
2406 	case NETDEV_BONDING_INFO:
2407 		lag_work->info.bonding_info =
2408 			*((struct netdev_notifier_bonding_info *)ptr);
2409 		break;
2410 	default:
2411 		lag_work->info.notifier_info =
2412 			*((struct netdev_notifier_info *)ptr);
2413 		break;
2414 	}
2415 
2416 	INIT_WORK(&lag_work->lag_task, ice_lag_process_event);
2417 	queue_work(ice_lag_wq, &lag_work->lag_task);
2418 
2419 	return NOTIFY_DONE;
2420 }
2421 
2422 /**
2423  * ice_register_lag_handler - register LAG handler on netdev
2424  * @lag: LAG struct
2425  */
2426 static int ice_register_lag_handler(struct ice_lag *lag)
2427 {
2428 	struct notifier_block *notif_blk = &lag->notif_block;
2429 	struct device *dev = ice_pf_to_dev(lag->pf);
2430 
2431 	if (!notif_blk->notifier_call) {
2432 		notif_blk->notifier_call = ice_lag_event_handler;
2433 		if (register_netdevice_notifier(notif_blk)) {
2434 			notif_blk->notifier_call = NULL;
2435 			dev_err(dev, "FAIL register LAG event handler!\n");
2436 			return -EINVAL;
2437 		}
2438 		dev_dbg(dev, "LAG event handler registered\n");
2439 	}
2440 	return 0;
2441 }
2442 
2443 /**
2444  * ice_unregister_lag_handler - unregister LAG handler on netdev
2445  * @lag: LAG struct
2446  */
2447 static void ice_unregister_lag_handler(struct ice_lag *lag)
2448 {
2449 	struct notifier_block *notif_blk = &lag->notif_block;
2450 	struct device *dev = ice_pf_to_dev(lag->pf);
2451 
2452 	if (notif_blk->notifier_call) {
2453 		unregister_netdevice_notifier(notif_blk);
2454 		dev_dbg(dev, "LAG event handler unregistered\n");
2455 	}
2456 }
2457 
2458 /**
2459  * ice_create_lag_recipe
2460  * @hw: pointer to HW struct
2461  * @rid: pointer to u16 to pass back recipe index
2462  * @base_recipe: recipe to base the new recipe on
2463  * @prio: priority for new recipe
2464  *
2465  * function returns 0 on error
2466  */
2467 static int ice_create_lag_recipe(struct ice_hw *hw, u16 *rid,
2468 				 const u8 *base_recipe, u8 prio)
2469 {
2470 	struct ice_aqc_recipe_data_elem *new_rcp;
2471 	int err;
2472 
2473 	err = ice_alloc_recipe(hw, rid);
2474 	if (err)
2475 		return err;
2476 
2477 	new_rcp = kzalloc(ICE_RECIPE_LEN * ICE_MAX_NUM_RECIPES, GFP_KERNEL);
2478 	if (!new_rcp)
2479 		return -ENOMEM;
2480 
2481 	memcpy(new_rcp, base_recipe, ICE_RECIPE_LEN);
2482 	new_rcp->content.act_ctrl_fwd_priority = prio;
2483 	new_rcp->content.rid = *rid | ICE_AQ_RECIPE_ID_IS_ROOT;
2484 	new_rcp->recipe_indx = *rid;
2485 	bitmap_zero((unsigned long *)new_rcp->recipe_bitmap,
2486 		    ICE_MAX_NUM_RECIPES);
2487 	set_bit(*rid, (unsigned long *)new_rcp->recipe_bitmap);
2488 
2489 	err = ice_aq_add_recipe(hw, new_rcp, 1, NULL);
2490 	if (err)
2491 		*rid = 0;
2492 
2493 	kfree(new_rcp);
2494 	return err;
2495 }
2496 
2497 /**
2498  * ice_lag_move_vf_nodes_tc_sync - move a VF's nodes for a tc during reset
2499  * @lag: primary interfaces lag struct
2500  * @dest_hw: HW struct for destination's interface
2501  * @vsi_num: VSI index in PF space
2502  * @tc: traffic class to move
2503  */
2504 static void
2505 ice_lag_move_vf_nodes_tc_sync(struct ice_lag *lag, struct ice_hw *dest_hw,
2506 			      u16 vsi_num, u8 tc)
2507 {
2508 	DEFINE_RAW_FLEX(struct ice_aqc_move_elem, buf, teid, 1);
2509 	struct device *dev = ice_pf_to_dev(lag->pf);
2510 	u16 numq, valq, num_moved, qbuf_size;
2511 	u16 buf_size = __struct_size(buf);
2512 	struct ice_aqc_cfg_txqs_buf *qbuf;
2513 	struct ice_hw *hw = &lag->pf->hw;
2514 	struct ice_sched_node *n_prt;
2515 	__le32 teid, parent_teid;
2516 	struct ice_vsi_ctx *ctx;
2517 	u32 tmp_teid;
2518 
2519 	ctx = ice_get_vsi_ctx(hw, vsi_num);
2520 	if (!ctx) {
2521 		dev_warn(dev, "LAG rebuild failed after reset due to VSI Context failure\n");
2522 		return;
2523 	}
2524 
2525 	if (!ctx->sched.vsi_node[tc])
2526 		return;
2527 
2528 	numq = ctx->num_lan_q_entries[tc];
2529 	teid = ctx->sched.vsi_node[tc]->info.node_teid;
2530 	tmp_teid = le32_to_cpu(teid);
2531 	parent_teid = ctx->sched.vsi_node[tc]->info.parent_teid;
2532 
2533 	if (!tmp_teid || !numq)
2534 		return;
2535 
2536 	if (ice_sched_suspend_resume_elems(hw, 1, &tmp_teid, true))
2537 		dev_dbg(dev, "Problem suspending traffic during reset rebuild\n");
2538 
2539 	/* reconfig queues for new port */
2540 	qbuf_size = struct_size(qbuf, queue_info, numq);
2541 	qbuf = kzalloc(qbuf_size, GFP_KERNEL);
2542 	if (!qbuf) {
2543 		dev_warn(dev, "Failure allocating VF queue recfg buffer for reset rebuild\n");
2544 		goto resume_sync;
2545 	}
2546 
2547 	/* add the per queue info for the reconfigure command buffer */
2548 	valq = ice_lag_qbuf_recfg(hw, qbuf, vsi_num, numq, tc);
2549 	if (!valq) {
2550 		dev_warn(dev, "Failure to reconfig queues for LAG reset rebuild\n");
2551 		goto sync_none;
2552 	}
2553 
2554 	if (ice_aq_cfg_lan_txq(hw, qbuf, qbuf_size, numq, hw->port_info->lport,
2555 			       dest_hw->port_info->lport,
2556 			       ICE_AQC_Q_CFG_TC_CHNG, NULL)) {
2557 		dev_warn(dev, "Failure to configure queues for LAG reset rebuild\n");
2558 		goto sync_qerr;
2559 	}
2560 
2561 sync_none:
2562 	kfree(qbuf);
2563 
2564 	/* find parent in destination tree */
2565 	n_prt = ice_lag_get_sched_parent(dest_hw, tc);
2566 	if (!n_prt)
2567 		goto resume_sync;
2568 
2569 	/* Move node to new parent */
2570 	buf->hdr.src_parent_teid = parent_teid;
2571 	buf->hdr.dest_parent_teid = n_prt->info.node_teid;
2572 	buf->hdr.num_elems = cpu_to_le16(1);
2573 	buf->hdr.mode = ICE_AQC_MOVE_ELEM_MODE_KEEP_OWN;
2574 	buf->teid[0] = teid;
2575 
2576 	if (ice_aq_move_sched_elems(&lag->pf->hw, buf, buf_size, &num_moved))
2577 		dev_warn(dev, "Failure to move VF nodes for LAG reset rebuild\n");
2578 	else
2579 		ice_sched_update_parent(n_prt, ctx->sched.vsi_node[tc]);
2580 
2581 	goto resume_sync;
2582 
2583 sync_qerr:
2584 	kfree(qbuf);
2585 
2586 resume_sync:
2587 	if (ice_sched_suspend_resume_elems(hw, 1, &tmp_teid, false))
2588 		dev_warn(dev, "Problem restarting traffic for LAG node reset rebuild\n");
2589 }
2590 
2591 /**
2592  * ice_lag_move_vf_nodes_sync - move vf nodes to active interface
2593  * @lag: primary interfaces lag struct
2594  * @dest_hw: lport value for currently active port
2595  *
2596  * This function is used in a reset context, outside of event handling,
2597  * to move the VF nodes to the secondary interface when that interface
2598  * is the active interface during a reset rebuild
2599  */
2600 static void
2601 ice_lag_move_vf_nodes_sync(struct ice_lag *lag, struct ice_hw *dest_hw)
2602 {
2603 	struct ice_pf *pf;
2604 	int i, tc;
2605 
2606 	if (!lag->primary || !dest_hw)
2607 		return;
2608 
2609 	pf = lag->pf;
2610 	ice_for_each_vsi(pf, i)
2611 		if (pf->vsi[i] && pf->vsi[i]->type == ICE_VSI_VF)
2612 			ice_for_each_traffic_class(tc)
2613 				ice_lag_move_vf_nodes_tc_sync(lag, dest_hw, i,
2614 							      tc);
2615 }
2616 
2617 /**
2618  * ice_init_lag - initialize support for LAG
2619  * @pf: PF struct
2620  *
2621  * Alloc memory for LAG structs and initialize the elements.
2622  * Memory will be freed in ice_deinit_lag
2623  */
2624 int ice_init_lag(struct ice_pf *pf)
2625 {
2626 	struct device *dev = ice_pf_to_dev(pf);
2627 	struct ice_lag *lag;
2628 	struct ice_vsi *vsi;
2629 	u64 recipe_bits = 0;
2630 	int n, err;
2631 
2632 	ice_lag_init_feature_support_flag(pf);
2633 	if (!ice_is_feature_supported(pf, ICE_F_SRIOV_LAG))
2634 		return 0;
2635 
2636 	pf->lag = kzalloc(sizeof(*lag), GFP_KERNEL);
2637 	if (!pf->lag)
2638 		return -ENOMEM;
2639 	lag = pf->lag;
2640 
2641 	vsi = ice_get_main_vsi(pf);
2642 	if (!vsi) {
2643 		dev_err(dev, "couldn't get main vsi, link aggregation init fail\n");
2644 		err = -EIO;
2645 		goto lag_error;
2646 	}
2647 
2648 	lag->pf = pf;
2649 	lag->netdev = vsi->netdev;
2650 	lag->role = ICE_LAG_NONE;
2651 	lag->active_port = ICE_LAG_INVALID_PORT;
2652 	lag->port_bitmap = 0x0;
2653 	lag->bonded = false;
2654 	lag->bond_aa = false;
2655 	lag->need_fltr_cfg = false;
2656 	lag->upper_netdev = NULL;
2657 	lag->notif_block.notifier_call = NULL;
2658 	memset(lag->sec_vf, 0, sizeof(lag->sec_vf));
2659 
2660 	err = ice_register_lag_handler(lag);
2661 	if (err) {
2662 		dev_warn(dev, "INIT LAG: Failed to register event handler\n");
2663 		goto lag_error;
2664 	}
2665 
2666 	err = ice_create_lag_recipe(&pf->hw, &lag->pf_recipe,
2667 				    ice_dflt_vsi_rcp, 1);
2668 	if (err)
2669 		goto lag_error;
2670 
2671 	err = ice_create_lag_recipe(&pf->hw, &lag->lport_recipe,
2672 				    ice_lport_rcp, 3);
2673 	if (err)
2674 		goto free_rcp_res;
2675 
2676 	err = ice_create_lag_recipe(&pf->hw, &lag->act_act_recipe,
2677 				    ice_lport_rcp, 1);
2678 	if (err)
2679 		goto  free_lport_res;
2680 
2681 	/* associate recipes to profiles */
2682 	for (n = 0; n < ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER; n++) {
2683 		err = ice_aq_get_recipe_to_profile(&pf->hw, n,
2684 						   &recipe_bits, NULL);
2685 		if (err)
2686 			continue;
2687 
2688 		if (recipe_bits & BIT(ICE_SW_LKUP_DFLT)) {
2689 			recipe_bits |= BIT(lag->pf_recipe) |
2690 				       BIT(lag->lport_recipe) |
2691 				       BIT(lag->act_act_recipe);
2692 			ice_aq_map_recipe_to_profile(&pf->hw, n,
2693 						     recipe_bits, NULL);
2694 		}
2695 	}
2696 
2697 	ice_display_lag_info(lag);
2698 
2699 	dev_dbg(dev, "INIT LAG complete\n");
2700 	return 0;
2701 
2702 free_lport_res:
2703 	ice_free_hw_res(&pf->hw, ICE_AQC_RES_TYPE_RECIPE, 1,
2704 			&lag->lport_recipe);
2705 
2706 free_rcp_res:
2707 	ice_free_hw_res(&pf->hw, ICE_AQC_RES_TYPE_RECIPE, 1,
2708 			&lag->pf_recipe);
2709 lag_error:
2710 	kfree(lag);
2711 	pf->lag = NULL;
2712 	return err;
2713 }
2714 
2715 /**
2716  * ice_deinit_lag - Clean up LAG
2717  * @pf: PF struct
2718  *
2719  * Clean up kernel LAG info and free memory
2720  * This function is meant to only be called on driver remove/shutdown
2721  */
2722 void ice_deinit_lag(struct ice_pf *pf)
2723 {
2724 	struct ice_lag *lag = pf->lag;
2725 
2726 	if (!lag)
2727 		return;
2728 
2729 	if (lag->pf)
2730 		ice_unregister_lag_handler(lag);
2731 
2732 	flush_workqueue(ice_lag_wq);
2733 
2734 	ice_free_hw_res(&pf->hw, ICE_AQC_RES_TYPE_RECIPE, 1,
2735 			&pf->lag->pf_recipe);
2736 	ice_free_hw_res(&pf->hw, ICE_AQC_RES_TYPE_RECIPE, 1,
2737 			&pf->lag->lport_recipe);
2738 
2739 	kfree(lag);
2740 
2741 	pf->lag = NULL;
2742 }
2743 
2744 /**
2745  * ice_lag_rebuild - rebuild lag resources after reset
2746  * @pf: pointer to local pf struct
2747  *
2748  * PF resets are promoted to CORER resets when interface in an aggregate.  This
2749  * means that we need to rebuild the PF resources for the interface.  Since
2750  * this will happen outside the normal event processing, need to acquire the lag
2751  * lock.
2752  *
2753  * This function will also evaluate the VF resources if this is the primary
2754  * interface.
2755  */
2756 void ice_lag_rebuild(struct ice_pf *pf)
2757 {
2758 	struct ice_lag_netdev_list ndlist;
2759 	struct ice_lag *lag, *prim_lag;
2760 	u8 act_port, loc_port;
2761 
2762 	if (!pf->lag || !pf->lag->bonded)
2763 		return;
2764 
2765 	mutex_lock(&pf->lag_mutex);
2766 
2767 	lag = pf->lag;
2768 	if (lag->primary) {
2769 		prim_lag = lag;
2770 	} else {
2771 		ice_lag_build_netdev_list(lag, &ndlist);
2772 		prim_lag = ice_lag_find_primary(lag);
2773 	}
2774 
2775 	if (!prim_lag) {
2776 		dev_dbg(ice_pf_to_dev(pf), "No primary interface in aggregate, can't rebuild\n");
2777 		goto lag_rebuild_out;
2778 	}
2779 
2780 	act_port = prim_lag->active_port;
2781 	loc_port = lag->pf->hw.port_info->lport;
2782 
2783 	/* configure SWID for this port */
2784 	if (lag->primary) {
2785 		ice_lag_primary_swid(lag, true);
2786 	} else {
2787 		ice_lag_set_swid(prim_lag->pf->hw.port_info->sw_id, lag, true);
2788 		ice_lag_add_prune_list(prim_lag, pf);
2789 		if (act_port == loc_port)
2790 			ice_lag_move_vf_nodes_sync(prim_lag, &pf->hw);
2791 	}
2792 
2793 	if (!lag->bond_aa) {
2794 		ice_lag_cfg_lp_fltr(lag, true, true);
2795 		if (lag->pf_rx_rule_id)
2796 			if (ice_lag_cfg_dflt_fltr(lag, true))
2797 				dev_err(ice_pf_to_dev(pf), "Error adding default VSI rule in rebuild\n");
2798 	} else {
2799 		ice_lag_cfg_lp_fltr(lag, true, false);
2800 	}
2801 
2802 
2803 	ice_clear_rdma_cap(pf);
2804 lag_rebuild_out:
2805 	ice_lag_destroy_netdev_list(lag, &ndlist);
2806 	mutex_unlock(&pf->lag_mutex);
2807 }
2808 
2809 /**
2810  * ice_lag_is_switchdev_running
2811  * @pf: pointer to PF structure
2812  *
2813  * Check if switchdev is running on any of the interfaces connected to lag.
2814  */
2815 bool ice_lag_is_switchdev_running(struct ice_pf *pf)
2816 {
2817 	struct ice_lag *lag = pf->lag;
2818 	struct net_device *tmp_nd;
2819 
2820 	if (!ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) ||
2821 	    !lag || !lag->upper_netdev)
2822 		return false;
2823 
2824 	rcu_read_lock();
2825 	for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) {
2826 		struct ice_netdev_priv *priv = netdev_priv(tmp_nd);
2827 
2828 		if (!netif_is_ice(tmp_nd) || !priv || !priv->vsi ||
2829 		    !priv->vsi->back)
2830 			continue;
2831 
2832 		if (ice_is_switchdev_running(priv->vsi->back)) {
2833 			rcu_read_unlock();
2834 			return true;
2835 		}
2836 	}
2837 	rcu_read_unlock();
2838 
2839 	return false;
2840 }
2841