xref: /linux/drivers/infiniband/hw/hns/hns_roce_bond.c (revision 55aa394a5ed871208eac11c5f4677cafd258c4dd)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (c) 2025 Hisilicon Limited.
4  */
5 
6 #include <net/lag.h>
7 #include <net/bonding.h>
8 #include "hns_roce_device.h"
9 #include "hns_roce_hw_v2.h"
10 #include "hns_roce_bond.h"
11 
12 static DEFINE_XARRAY(roce_bond_xa);
13 
hns_roce_get_hrdev_by_netdev(struct net_device * net_dev)14 static struct hns_roce_dev *hns_roce_get_hrdev_by_netdev(struct net_device *net_dev)
15 {
16 	struct ib_device *ibdev =
17 		ib_device_get_by_netdev(net_dev, RDMA_DRIVER_HNS);
18 
19 	if (!ibdev)
20 		return NULL;
21 
22 	return container_of(ibdev, struct hns_roce_dev, ib_dev);
23 }
24 
get_upper_dev_from_ndev(struct net_device * net_dev)25 static struct net_device *get_upper_dev_from_ndev(struct net_device *net_dev)
26 {
27 	struct net_device *upper_dev;
28 
29 	rcu_read_lock();
30 	upper_dev = netdev_master_upper_dev_get_rcu(net_dev);
31 	dev_hold(upper_dev);
32 	rcu_read_unlock();
33 
34 	return upper_dev;
35 }
36 
get_netdev_bond_slave_id(struct net_device * net_dev,struct hns_roce_bond_group * bond_grp)37 static int get_netdev_bond_slave_id(struct net_device *net_dev,
38 				    struct hns_roce_bond_group *bond_grp)
39 {
40 	int i;
41 
42 	for (i = 0; i < ROCE_BOND_FUNC_MAX; i++)
43 		if (net_dev == bond_grp->bond_func_info[i].net_dev)
44 			return i;
45 
46 	return -ENOENT;
47 }
48 
hns_roce_get_bond_grp(struct net_device * net_dev,u8 bus_num)49 struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev,
50 						  u8 bus_num)
51 {
52 	struct hns_roce_die_info *die_info = xa_load(&roce_bond_xa, bus_num);
53 	struct hns_roce_bond_group *bond_grp;
54 	struct net_device *upper_dev = NULL;
55 	int i;
56 
57 	if (!die_info)
58 		return NULL;
59 
60 	for (i = 0; i < ROCE_BOND_NUM_MAX; i++) {
61 		bond_grp = die_info->bgrps[i];
62 		if (!bond_grp)
63 			continue;
64 		if (get_netdev_bond_slave_id(net_dev, bond_grp) >= 0)
65 			return bond_grp;
66 		if (bond_grp->upper_dev) {
67 			upper_dev = get_upper_dev_from_ndev(net_dev);
68 			if (bond_grp->upper_dev == upper_dev) {
69 				dev_put(upper_dev);
70 				return bond_grp;
71 			}
72 			dev_put(upper_dev);
73 		}
74 	}
75 
76 	return NULL;
77 }
78 
hns_roce_set_bond_netdev(struct hns_roce_bond_group * bond_grp,struct hns_roce_dev * hr_dev)79 static int hns_roce_set_bond_netdev(struct hns_roce_bond_group *bond_grp,
80 				    struct hns_roce_dev *hr_dev)
81 {
82 	struct net_device *active_dev;
83 	struct net_device *old_dev;
84 	int i, ret = 0;
85 
86 	if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
87 		rcu_read_lock();
88 		active_dev =
89 			bond_option_active_slave_get_rcu(netdev_priv(bond_grp->upper_dev));
90 		rcu_read_unlock();
91 	} else {
92 		for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
93 			active_dev = bond_grp->bond_func_info[i].net_dev;
94 			if (active_dev &&
95 			    ib_get_curr_port_state(active_dev) == IB_PORT_ACTIVE)
96 				break;
97 		}
98 	}
99 
100 	if (!active_dev || i == ROCE_BOND_FUNC_MAX)
101 		active_dev = get_hr_netdev(hr_dev, 0);
102 
103 	old_dev = ib_device_get_netdev(&hr_dev->ib_dev, 1);
104 	if (old_dev == active_dev)
105 		goto out;
106 
107 	ret = ib_device_set_netdev(&hr_dev->ib_dev, active_dev, 1);
108 	if (ret) {
109 		dev_err(hr_dev->dev, "failed to set netdev for bond.\n");
110 		goto out;
111 	}
112 
113 	if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
114 		if (old_dev)
115 			roce_del_all_netdev_gids(&hr_dev->ib_dev, 1, old_dev);
116 		rdma_roce_rescan_port(&hr_dev->ib_dev, 1);
117 	}
118 out:
119 	dev_put(old_dev);
120 	return ret;
121 }
122 
hns_roce_bond_is_active(struct hns_roce_dev * hr_dev)123 bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev)
124 {
125 	struct net_device *net_dev = get_hr_netdev(hr_dev, 0);
126 	struct hns_roce_bond_group *bond_grp;
127 	u8 bus_num = get_hr_bus_num(hr_dev);
128 
129 	bond_grp = hns_roce_get_bond_grp(net_dev, bus_num);
130 	if (bond_grp && bond_grp->bond_state != HNS_ROCE_BOND_NOT_BONDED &&
131 	    bond_grp->bond_state != HNS_ROCE_BOND_NOT_ATTACHED)
132 		return true;
133 
134 	return false;
135 }
136 
hns_roce_bond_get_active_slave(struct hns_roce_bond_group * bond_grp)137 static void hns_roce_bond_get_active_slave(struct hns_roce_bond_group *bond_grp)
138 {
139 	struct net_device *net_dev;
140 	u32 active_slave_map = 0;
141 	u8 active_slave_num = 0;
142 	bool active;
143 	u8 i;
144 
145 	for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
146 		net_dev = bond_grp->bond_func_info[i].net_dev;
147 		if (!net_dev || !(bond_grp->slave_map & (1U << i)))
148 			continue;
149 
150 		active = (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ?
151 			 net_lag_port_dev_txable(net_dev) :
152 			 (ib_get_curr_port_state(net_dev) == IB_PORT_ACTIVE);
153 		if (active) {
154 			active_slave_num++;
155 			active_slave_map |= (1U << i);
156 		}
157 	}
158 
159 	bond_grp->active_slave_num = active_slave_num;
160 	bond_grp->active_slave_map = active_slave_map;
161 }
162 
hns_roce_recover_bond(struct hns_roce_bond_group * bond_grp,struct hns_roce_dev * hr_dev)163 static int hns_roce_recover_bond(struct hns_roce_bond_group *bond_grp,
164 				 struct hns_roce_dev *hr_dev)
165 {
166 	bond_grp->main_hr_dev = hr_dev;
167 	hns_roce_bond_get_active_slave(bond_grp);
168 
169 	return hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND);
170 }
171 
hns_roce_slave_uninit(struct hns_roce_bond_group * bond_grp,u8 func_idx)172 static void hns_roce_slave_uninit(struct hns_roce_bond_group *bond_grp,
173 				  u8 func_idx)
174 {
175 	struct hnae3_handle *handle;
176 
177 	handle = bond_grp->bond_func_info[func_idx].handle;
178 	if (handle->priv)
179 		hns_roce_bond_uninit_client(bond_grp, func_idx);
180 }
181 
182 static struct hns_roce_dev
183 	*hns_roce_slave_init(struct hns_roce_bond_group *bond_grp,
184 			     u8 func_idx, bool need_switch);
185 
switch_main_dev(struct hns_roce_bond_group * bond_grp,u8 main_func_idx)186 static int switch_main_dev(struct hns_roce_bond_group *bond_grp,
187 			   u8 main_func_idx)
188 {
189 	struct hns_roce_dev *hr_dev;
190 	struct net_device *net_dev;
191 	u8 i;
192 
193 	bond_grp->main_hr_dev = NULL;
194 	hns_roce_bond_uninit_client(bond_grp, main_func_idx);
195 
196 	for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
197 		net_dev = bond_grp->bond_func_info[i].net_dev;
198 		if ((bond_grp->slave_map & (1U << i)) && net_dev) {
199 			/* In case this slave is still being registered as
200 			 * a non-bonded PF, uninit it first and then re-init
201 			 * it as the main device.
202 			 */
203 			hns_roce_slave_uninit(bond_grp, i);
204 			hr_dev = hns_roce_slave_init(bond_grp, i, false);
205 			if (hr_dev) {
206 				bond_grp->main_hr_dev = hr_dev;
207 				break;
208 			}
209 		}
210 	}
211 
212 	if (!bond_grp->main_hr_dev)
213 		return -ENODEV;
214 
215 	return 0;
216 }
217 
218 static struct hns_roce_dev
hns_roce_slave_init(struct hns_roce_bond_group * bond_grp,u8 func_idx,bool need_switch)219 	*hns_roce_slave_init(struct hns_roce_bond_group *bond_grp,
220 			     u8 func_idx, bool need_switch)
221 {
222 	struct hns_roce_dev *hr_dev = NULL;
223 	struct hnae3_handle *handle;
224 	u8 main_func_idx;
225 	int ret;
226 
227 	if (need_switch) {
228 		main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn);
229 		if (func_idx == main_func_idx) {
230 			ret = switch_main_dev(bond_grp, main_func_idx);
231 			if (ret == -ENODEV)
232 				return NULL;
233 		}
234 	}
235 
236 	handle = bond_grp->bond_func_info[func_idx].handle;
237 	if (handle) {
238 		if (handle->priv)
239 			return handle->priv;
240 		/* Prevent this device from being initialized as a bond device */
241 		if (need_switch)
242 			bond_grp->bond_func_info[func_idx].net_dev = NULL;
243 		hr_dev = hns_roce_bond_init_client(bond_grp, func_idx);
244 		if (!hr_dev)
245 			BOND_ERR_LOG("failed to init slave %u.\n", func_idx);
246 	}
247 
248 	return hr_dev;
249 }
250 
alloc_die_info(int bus_num)251 static struct hns_roce_die_info *alloc_die_info(int bus_num)
252 {
253 	struct hns_roce_die_info *die_info;
254 	int ret;
255 
256 	die_info = kzalloc(sizeof(*die_info), GFP_KERNEL);
257 	if (!die_info)
258 		return NULL;
259 
260 	ret = xa_err(xa_store(&roce_bond_xa, bus_num, die_info, GFP_KERNEL));
261 	if (ret) {
262 		kfree(die_info);
263 		return NULL;
264 	}
265 
266 	mutex_init(&die_info->die_mutex);
267 
268 	return die_info;
269 }
270 
dealloc_die_info(struct hns_roce_die_info * die_info,u8 bus_num)271 static void dealloc_die_info(struct hns_roce_die_info *die_info, u8 bus_num)
272 {
273 	mutex_destroy(&die_info->die_mutex);
274 	xa_erase(&roce_bond_xa, bus_num);
275 	kfree(die_info);
276 }
277 
alloc_bond_id(struct hns_roce_bond_group * bond_grp)278 static int alloc_bond_id(struct hns_roce_bond_group *bond_grp)
279 {
280 	u8 bus_num = bond_grp->bus_num;
281 	struct hns_roce_die_info *die_info = xa_load(&roce_bond_xa, bus_num);
282 	int i;
283 
284 	if (!die_info) {
285 		die_info = alloc_die_info(bus_num);
286 		if (!die_info)
287 			return -ENOMEM;
288 	}
289 
290 	for (i = 0; i < ROCE_BOND_NUM_MAX; i++) {
291 		if (die_info->bond_id_mask & BOND_ID(i))
292 			continue;
293 
294 		die_info->bond_id_mask |= BOND_ID(i);
295 		die_info->bgrps[i] = bond_grp;
296 		bond_grp->bond_id = i;
297 
298 		return 0;
299 	}
300 
301 	return -ENOSPC;
302 }
303 
remove_bond_id(int bus_num,u8 bond_id)304 static int remove_bond_id(int bus_num, u8 bond_id)
305 {
306 	struct hns_roce_die_info *die_info = xa_load(&roce_bond_xa, bus_num);
307 
308 	if (bond_id >= ROCE_BOND_NUM_MAX)
309 		return -EINVAL;
310 
311 	if (!die_info)
312 		return -ENODEV;
313 
314 	die_info->bond_id_mask &= ~BOND_ID(bond_id);
315 	die_info->bgrps[bond_id] = NULL;
316 	if (!die_info->bond_id_mask)
317 		dealloc_die_info(die_info, bus_num);
318 
319 	return 0;
320 }
321 
hns_roce_set_bond(struct hns_roce_bond_group * bond_grp)322 static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp)
323 {
324 	struct hns_roce_dev *hr_dev;
325 	int ret;
326 	int i;
327 
328 	for (i = ROCE_BOND_FUNC_MAX - 1; i >= 0; i--) {
329 		if (bond_grp->slave_map & (1 << i))
330 			hns_roce_slave_uninit(bond_grp, i);
331 	}
332 
333 	mutex_lock(&bond_grp->bond_mutex);
334 	bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED;
335 	mutex_unlock(&bond_grp->bond_mutex);
336 	bond_grp->main_hr_dev = NULL;
337 
338 	for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
339 		if (bond_grp->slave_map & (1 << i)) {
340 			hr_dev = hns_roce_slave_init(bond_grp, i, false);
341 			if (hr_dev) {
342 				bond_grp->main_hr_dev = hr_dev;
343 				break;
344 			}
345 		}
346 	}
347 
348 	if (!bond_grp->main_hr_dev) {
349 		ret = -ENODEV;
350 		goto out;
351 	}
352 
353 	hns_roce_bond_get_active_slave(bond_grp);
354 
355 	ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND);
356 
357 out:
358 	if (ret) {
359 		BOND_ERR_LOG("failed to set RoCE bond, ret = %d.\n", ret);
360 		hns_roce_cleanup_bond(bond_grp);
361 	} else {
362 		ibdev_info(&bond_grp->main_hr_dev->ib_dev,
363 			   "RoCE set bond finished!\n");
364 	}
365 }
366 
hns_roce_clear_bond(struct hns_roce_bond_group * bond_grp)367 static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp)
368 {
369 	u8 main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn);
370 	struct hns_roce_dev *hr_dev;
371 	u8 i;
372 
373 	if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_BONDED)
374 		goto out;
375 
376 	bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED;
377 	bond_grp->main_hr_dev = NULL;
378 
379 	hns_roce_slave_uninit(bond_grp, main_func_idx);
380 
381 	for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
382 		hr_dev = hns_roce_slave_init(bond_grp, i, false);
383 		if (hr_dev)
384 			bond_grp->main_hr_dev = hr_dev;
385 	}
386 
387 out:
388 	hns_roce_cleanup_bond(bond_grp);
389 }
390 
hns_roce_slave_changestate(struct hns_roce_bond_group * bond_grp)391 static void hns_roce_slave_changestate(struct hns_roce_bond_group *bond_grp)
392 {
393 	int ret;
394 
395 	hns_roce_bond_get_active_slave(bond_grp);
396 
397 	ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND);
398 
399 	mutex_lock(&bond_grp->bond_mutex);
400 	if (bond_grp->bond_state == HNS_ROCE_BOND_SLAVE_CHANGESTATE)
401 		bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED;
402 	mutex_unlock(&bond_grp->bond_mutex);
403 
404 	if (ret)
405 		ibdev_err(&bond_grp->main_hr_dev->ib_dev,
406 			  "failed to change RoCE bond slave state, ret = %d.\n",
407 			  ret);
408 	else
409 		ibdev_info(&bond_grp->main_hr_dev->ib_dev,
410 			   "RoCE slave changestate finished!\n");
411 }
412 
hns_roce_slave_change_num(struct hns_roce_bond_group * bond_grp)413 static void hns_roce_slave_change_num(struct hns_roce_bond_group *bond_grp)
414 {
415 	int ret;
416 	u8 i;
417 
418 	for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
419 		if (bond_grp->slave_map & (1U << i)) {
420 			if (i == PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn))
421 				continue;
422 			hns_roce_slave_uninit(bond_grp, i);
423 		} else {
424 			hns_roce_slave_init(bond_grp, i, true);
425 			if (!bond_grp->main_hr_dev) {
426 				ret = -ENODEV;
427 				goto out;
428 			}
429 			bond_grp->bond_func_info[i].net_dev = NULL;
430 			bond_grp->bond_func_info[i].handle = NULL;
431 		}
432 	}
433 
434 	hns_roce_bond_get_active_slave(bond_grp);
435 
436 	ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND);
437 
438 out:
439 	if (ret) {
440 		BOND_ERR_LOG("failed to change RoCE bond slave num, ret = %d.\n", ret);
441 		hns_roce_cleanup_bond(bond_grp);
442 	} else {
443 		mutex_lock(&bond_grp->bond_mutex);
444 		if (bond_grp->bond_state == HNS_ROCE_BOND_SLAVE_CHANGE_NUM)
445 			bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED;
446 		mutex_unlock(&bond_grp->bond_mutex);
447 		ibdev_info(&bond_grp->main_hr_dev->ib_dev,
448 			   "RoCE slave change num finished!\n");
449 	}
450 }
451 
hns_roce_bond_info_update_nolock(struct hns_roce_bond_group * bond_grp,struct net_device * upper_dev)452 static void hns_roce_bond_info_update_nolock(struct hns_roce_bond_group *bond_grp,
453 					     struct net_device *upper_dev)
454 {
455 	struct hns_roce_v2_priv *priv;
456 	struct hns_roce_dev *hr_dev;
457 	struct net_device *net_dev;
458 	int func_idx;
459 
460 	bond_grp->slave_map = 0;
461 	rcu_read_lock();
462 	for_each_netdev_in_bond_rcu(upper_dev, net_dev) {
463 		func_idx = get_netdev_bond_slave_id(net_dev, bond_grp);
464 		if (func_idx < 0) {
465 			hr_dev = hns_roce_get_hrdev_by_netdev(net_dev);
466 			if (!hr_dev)
467 				continue;
468 			func_idx = PCI_FUNC(hr_dev->pci_dev->devfn);
469 			if (!bond_grp->bond_func_info[func_idx].net_dev) {
470 				priv = hr_dev->priv;
471 				bond_grp->bond_func_info[func_idx].net_dev =
472 					net_dev;
473 				bond_grp->bond_func_info[func_idx].handle =
474 					priv->handle;
475 			}
476 			ib_device_put(&hr_dev->ib_dev);
477 		}
478 
479 		bond_grp->slave_map |= (1 << func_idx);
480 	}
481 	rcu_read_unlock();
482 }
483 
is_dev_bond_supported(struct hns_roce_bond_group * bond_grp,struct net_device * net_dev)484 static bool is_dev_bond_supported(struct hns_roce_bond_group *bond_grp,
485 				  struct net_device *net_dev)
486 {
487 	struct hns_roce_dev *hr_dev = hns_roce_get_hrdev_by_netdev(net_dev);
488 	bool ret = true;
489 
490 	if (!hr_dev) {
491 		if (bond_grp &&
492 		    get_netdev_bond_slave_id(net_dev, bond_grp) >= 0)
493 			return true;
494 		else
495 			return false;
496 	}
497 
498 	if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND)) {
499 		ret = false;
500 		goto out;
501 	}
502 
503 	if (hr_dev->is_vf || pci_num_vf(hr_dev->pci_dev) > 0) {
504 		ret = false;
505 		goto out;
506 	}
507 
508 	if (bond_grp->bus_num != get_hr_bus_num(hr_dev))
509 		ret = false;
510 
511 out:
512 	ib_device_put(&hr_dev->ib_dev);
513 	return ret;
514 }
515 
check_slave_support(struct hns_roce_bond_group * bond_grp,struct net_device * upper_dev)516 static bool check_slave_support(struct hns_roce_bond_group *bond_grp,
517 				struct net_device *upper_dev)
518 {
519 	struct net_device *net_dev;
520 	u8 slave_num = 0;
521 
522 	rcu_read_lock();
523 	for_each_netdev_in_bond_rcu(upper_dev, net_dev) {
524 		if (is_dev_bond_supported(bond_grp, net_dev)) {
525 			slave_num++;
526 			continue;
527 		}
528 		rcu_read_unlock();
529 		return false;
530 	}
531 	rcu_read_unlock();
532 
533 	return (slave_num > 1 && slave_num <= ROCE_BOND_FUNC_MAX);
534 }
535 
hns_roce_bond_work(struct work_struct * work)536 static void hns_roce_bond_work(struct work_struct *work)
537 {
538 	struct delayed_work *delayed_work = to_delayed_work(work);
539 	struct hns_roce_bond_group *bond_grp =
540 		container_of(delayed_work, struct hns_roce_bond_group,
541 			     bond_work);
542 	enum hns_roce_bond_state bond_state;
543 	bool bond_ready;
544 
545 	mutex_lock(&bond_grp->bond_mutex);
546 	bond_ready = check_slave_support(bond_grp, bond_grp->upper_dev);
547 	hns_roce_bond_info_update_nolock(bond_grp, bond_grp->upper_dev);
548 	bond_state = bond_grp->bond_state;
549 	bond_grp->bond_ready = bond_ready;
550 	mutex_unlock(&bond_grp->bond_mutex);
551 
552 	ibdev_info(&bond_grp->main_hr_dev->ib_dev,
553 		   "bond work: bond_ready - %d, bond_state - %d.\n",
554 		   bond_ready, bond_state);
555 
556 	if (!bond_ready) {
557 		hns_roce_clear_bond(bond_grp);
558 		return;
559 	}
560 
561 	switch (bond_state) {
562 	case HNS_ROCE_BOND_NOT_BONDED:
563 		hns_roce_set_bond(bond_grp);
564 		/* In set_bond flow, we don't need to set bond netdev here as
565 		 * it has been done when bond_grp->main_hr_dev is registered.
566 		 */
567 		return;
568 	case HNS_ROCE_BOND_SLAVE_CHANGESTATE:
569 		hns_roce_slave_changestate(bond_grp);
570 		break;
571 	case HNS_ROCE_BOND_SLAVE_CHANGE_NUM:
572 		hns_roce_slave_change_num(bond_grp);
573 		break;
574 	default:
575 		return;
576 	}
577 	hns_roce_set_bond_netdev(bond_grp, bond_grp->main_hr_dev);
578 }
579 
hns_roce_attach_bond_grp(struct hns_roce_bond_group * bond_grp,struct hns_roce_dev * hr_dev,struct net_device * upper_dev)580 static void hns_roce_attach_bond_grp(struct hns_roce_bond_group *bond_grp,
581 				     struct hns_roce_dev *hr_dev,
582 				     struct net_device *upper_dev)
583 {
584 	bond_grp->upper_dev = upper_dev;
585 	bond_grp->main_hr_dev = hr_dev;
586 	bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED;
587 	bond_grp->bond_ready = false;
588 }
589 
hns_roce_detach_bond_grp(struct hns_roce_bond_group * bond_grp)590 static void hns_roce_detach_bond_grp(struct hns_roce_bond_group *bond_grp)
591 {
592 	mutex_lock(&bond_grp->bond_mutex);
593 
594 	cancel_delayed_work(&bond_grp->bond_work);
595 	bond_grp->upper_dev = NULL;
596 	bond_grp->main_hr_dev = NULL;
597 	bond_grp->bond_ready = false;
598 	bond_grp->bond_state = HNS_ROCE_BOND_NOT_ATTACHED;
599 	bond_grp->slave_map = 0;
600 	memset(bond_grp->bond_func_info, 0, sizeof(bond_grp->bond_func_info));
601 
602 	mutex_unlock(&bond_grp->bond_mutex);
603 }
604 
hns_roce_cleanup_bond(struct hns_roce_bond_group * bond_grp)605 void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp)
606 {
607 	int ret;
608 
609 	ret = bond_grp->main_hr_dev ?
610 	      hns_roce_cmd_bond(bond_grp, HNS_ROCE_CLEAR_BOND) : -EIO;
611 	if (ret)
612 		BOND_ERR_LOG("failed to clear RoCE bond, ret = %d.\n", ret);
613 	else
614 		ibdev_info(&bond_grp->main_hr_dev->ib_dev,
615 			   "RoCE clear bond finished!\n");
616 
617 	hns_roce_detach_bond_grp(bond_grp);
618 }
619 
lowerstate_event_filter(struct hns_roce_bond_group * bond_grp,struct net_device * net_dev)620 static bool lowerstate_event_filter(struct hns_roce_bond_group *bond_grp,
621 				    struct net_device *net_dev)
622 {
623 	struct hns_roce_bond_group *bond_grp_tmp;
624 
625 	bond_grp_tmp = hns_roce_get_bond_grp(net_dev, bond_grp->bus_num);
626 	return bond_grp_tmp == bond_grp;
627 }
628 
lowerstate_event_setting(struct hns_roce_bond_group * bond_grp,struct netdev_notifier_changelowerstate_info * info)629 static void lowerstate_event_setting(struct hns_roce_bond_group *bond_grp,
630 				     struct netdev_notifier_changelowerstate_info *info)
631 {
632 	mutex_lock(&bond_grp->bond_mutex);
633 
634 	if (bond_grp->bond_ready &&
635 	    bond_grp->bond_state == HNS_ROCE_BOND_IS_BONDED)
636 		bond_grp->bond_state = HNS_ROCE_BOND_SLAVE_CHANGESTATE;
637 
638 	mutex_unlock(&bond_grp->bond_mutex);
639 }
640 
hns_roce_bond_lowerstate_event(struct hns_roce_bond_group * bond_grp,struct netdev_notifier_changelowerstate_info * info)641 static bool hns_roce_bond_lowerstate_event(struct hns_roce_bond_group *bond_grp,
642 					   struct netdev_notifier_changelowerstate_info *info)
643 {
644 	struct net_device *net_dev =
645 		netdev_notifier_info_to_dev((struct netdev_notifier_info *)info);
646 
647 	if (!netif_is_lag_port(net_dev))
648 		return false;
649 
650 	if (!lowerstate_event_filter(bond_grp, net_dev))
651 		return false;
652 
653 	lowerstate_event_setting(bond_grp, info);
654 
655 	return true;
656 }
657 
is_bond_setting_supported(struct netdev_lag_upper_info * bond_info)658 static bool is_bond_setting_supported(struct netdev_lag_upper_info *bond_info)
659 {
660 	if (!bond_info)
661 		return false;
662 
663 	if (bond_info->tx_type != NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
664 	    bond_info->tx_type != NETDEV_LAG_TX_TYPE_HASH)
665 		return false;
666 
667 	if (bond_info->tx_type == NETDEV_LAG_TX_TYPE_HASH &&
668 	    bond_info->hash_type > NETDEV_LAG_HASH_L23)
669 		return false;
670 
671 	return true;
672 }
673 
upper_event_setting(struct hns_roce_bond_group * bond_grp,struct netdev_notifier_changeupper_info * info)674 static void upper_event_setting(struct hns_roce_bond_group *bond_grp,
675 				struct netdev_notifier_changeupper_info *info)
676 {
677 	struct netdev_lag_upper_info *bond_upper_info = NULL;
678 	bool slave_inc = info->linking;
679 
680 	if (slave_inc)
681 		bond_upper_info = info->upper_info;
682 
683 	if (bond_upper_info) {
684 		bond_grp->tx_type = bond_upper_info->tx_type;
685 		bond_grp->hash_type = bond_upper_info->hash_type;
686 	}
687 }
688 
check_unlinking_bond_support(struct hns_roce_bond_group * bond_grp)689 static bool check_unlinking_bond_support(struct hns_roce_bond_group *bond_grp)
690 {
691 	struct net_device *net_dev;
692 	u8 slave_num = 0;
693 
694 	rcu_read_lock();
695 	for_each_netdev_in_bond_rcu(bond_grp->upper_dev, net_dev) {
696 		if (get_netdev_bond_slave_id(net_dev, bond_grp) >= 0)
697 			slave_num++;
698 	}
699 	rcu_read_unlock();
700 
701 	return (slave_num > 1);
702 }
703 
check_linking_bond_support(struct netdev_lag_upper_info * bond_info,struct hns_roce_bond_group * bond_grp,struct net_device * upper_dev)704 static bool check_linking_bond_support(struct netdev_lag_upper_info *bond_info,
705 				       struct hns_roce_bond_group *bond_grp,
706 				       struct net_device *upper_dev)
707 {
708 	if (!is_bond_setting_supported(bond_info))
709 		return false;
710 
711 	return check_slave_support(bond_grp, upper_dev);
712 }
713 
714 static enum bond_support_type
check_bond_support(struct hns_roce_bond_group * bond_grp,struct net_device * upper_dev,struct netdev_notifier_changeupper_info * info)715 	check_bond_support(struct hns_roce_bond_group *bond_grp,
716 			   struct net_device *upper_dev,
717 			   struct netdev_notifier_changeupper_info *info)
718 {
719 	bool bond_grp_exist = false;
720 	bool support;
721 
722 	if (upper_dev == bond_grp->upper_dev)
723 		bond_grp_exist = true;
724 
725 	if (!info->linking && !bond_grp_exist)
726 		return BOND_NOT_SUPPORT;
727 
728 	if (info->linking)
729 		support = check_linking_bond_support(info->upper_info, bond_grp,
730 						     upper_dev);
731 	else
732 		support = check_unlinking_bond_support(bond_grp);
733 
734 	if (support)
735 		return BOND_SUPPORT;
736 
737 	return bond_grp_exist ? BOND_EXISTING_NOT_SUPPORT : BOND_NOT_SUPPORT;
738 }
739 
upper_event_filter(struct netdev_notifier_changeupper_info * info,struct hns_roce_bond_group * bond_grp,struct net_device * net_dev)740 static bool upper_event_filter(struct netdev_notifier_changeupper_info *info,
741 			       struct hns_roce_bond_group *bond_grp,
742 			       struct net_device *net_dev)
743 {
744 	struct net_device *upper_dev = info->upper_dev;
745 	struct hns_roce_bond_group *bond_grp_tmp;
746 	struct hns_roce_dev *hr_dev;
747 	bool ret = true;
748 	u8 bus_num;
749 
750 	if (!info->linking ||
751 	    bond_grp->bond_state != HNS_ROCE_BOND_NOT_ATTACHED)
752 		return bond_grp->upper_dev == upper_dev;
753 
754 	hr_dev = hns_roce_get_hrdev_by_netdev(net_dev);
755 	if (!hr_dev)
756 		return false;
757 
758 	bus_num = get_hr_bus_num(hr_dev);
759 	if (bond_grp->bus_num != bus_num) {
760 		ret = false;
761 		goto out;
762 	}
763 
764 	bond_grp_tmp = hns_roce_get_bond_grp(net_dev, bus_num);
765 	if (bond_grp_tmp && bond_grp_tmp != bond_grp)
766 		ret = false;
767 out:
768 	ib_device_put(&hr_dev->ib_dev);
769 	return ret;
770 }
771 
hns_roce_bond_upper_event(struct hns_roce_bond_group * bond_grp,struct netdev_notifier_changeupper_info * info)772 static bool hns_roce_bond_upper_event(struct hns_roce_bond_group *bond_grp,
773 				      struct netdev_notifier_changeupper_info *info)
774 {
775 	struct net_device *net_dev =
776 		netdev_notifier_info_to_dev((struct netdev_notifier_info *)info);
777 	struct net_device *upper_dev = info->upper_dev;
778 	enum bond_support_type support = BOND_SUPPORT;
779 	struct hns_roce_dev *hr_dev;
780 	int slave_id;
781 
782 	if (!upper_dev || !netif_is_lag_master(upper_dev))
783 		return false;
784 
785 	if (!upper_event_filter(info, bond_grp, net_dev))
786 		return false;
787 
788 	mutex_lock(&bond_grp->bond_mutex);
789 	support = check_bond_support(bond_grp, upper_dev, info);
790 	if (support == BOND_NOT_SUPPORT) {
791 		mutex_unlock(&bond_grp->bond_mutex);
792 		return false;
793 	}
794 
795 	if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_ATTACHED) {
796 		hr_dev = hns_roce_get_hrdev_by_netdev(net_dev);
797 		if (!hr_dev) {
798 			mutex_unlock(&bond_grp->bond_mutex);
799 			return false;
800 		}
801 		hns_roce_attach_bond_grp(bond_grp, hr_dev, upper_dev);
802 		ib_device_put(&hr_dev->ib_dev);
803 	}
804 
805 	/* In the case of netdev being unregistered, the roce
806 	 * instance shouldn't be inited.
807 	 */
808 	if (net_dev->reg_state >= NETREG_UNREGISTERING) {
809 		slave_id = get_netdev_bond_slave_id(net_dev, bond_grp);
810 		if (slave_id >= 0) {
811 			bond_grp->bond_func_info[slave_id].net_dev = NULL;
812 			bond_grp->bond_func_info[slave_id].handle = NULL;
813 		}
814 	}
815 
816 	if (support == BOND_SUPPORT) {
817 		bond_grp->bond_ready = true;
818 		if (bond_grp->bond_state != HNS_ROCE_BOND_NOT_BONDED)
819 			bond_grp->bond_state = HNS_ROCE_BOND_SLAVE_CHANGE_NUM;
820 	}
821 	mutex_unlock(&bond_grp->bond_mutex);
822 	if (support == BOND_SUPPORT)
823 		upper_event_setting(bond_grp, info);
824 
825 	return true;
826 }
827 
hns_roce_bond_event(struct notifier_block * self,unsigned long event,void * ptr)828 static int hns_roce_bond_event(struct notifier_block *self,
829 			       unsigned long event, void *ptr)
830 {
831 	struct hns_roce_bond_group *bond_grp =
832 		container_of(self, struct hns_roce_bond_group, bond_nb);
833 	bool changed = false;
834 
835 	if (event == NETDEV_CHANGEUPPER)
836 		changed = hns_roce_bond_upper_event(bond_grp, ptr);
837 	if (event == NETDEV_CHANGELOWERSTATE)
838 		changed = hns_roce_bond_lowerstate_event(bond_grp, ptr);
839 
840 	if (changed)
841 		schedule_delayed_work(&bond_grp->bond_work, HZ);
842 
843 	return NOTIFY_DONE;
844 }
845 
hns_roce_alloc_bond_grp(struct hns_roce_dev * hr_dev)846 int hns_roce_alloc_bond_grp(struct hns_roce_dev *hr_dev)
847 {
848 	struct hns_roce_bond_group *bgrps[ROCE_BOND_NUM_MAX];
849 	struct hns_roce_bond_group *bond_grp;
850 	u8 bus_num = get_hr_bus_num(hr_dev);
851 	int ret;
852 	int i;
853 
854 	if (xa_load(&roce_bond_xa, bus_num))
855 		return 0;
856 
857 	for (i = 0; i < ROCE_BOND_NUM_MAX; i++) {
858 		bond_grp = kvzalloc(sizeof(*bond_grp), GFP_KERNEL);
859 		if (!bond_grp) {
860 			ret = -ENOMEM;
861 			goto mem_err;
862 		}
863 
864 		mutex_init(&bond_grp->bond_mutex);
865 		INIT_DELAYED_WORK(&bond_grp->bond_work, hns_roce_bond_work);
866 
867 		bond_grp->bond_ready = false;
868 		bond_grp->bond_state = HNS_ROCE_BOND_NOT_ATTACHED;
869 		bond_grp->bus_num = bus_num;
870 
871 		ret = alloc_bond_id(bond_grp);
872 		if (ret) {
873 			dev_err(hr_dev->dev,
874 				"failed to alloc bond ID, ret = %d.\n", ret);
875 			goto alloc_id_err;
876 		}
877 
878 		bond_grp->bond_nb.notifier_call = hns_roce_bond_event;
879 		ret = register_netdevice_notifier(&bond_grp->bond_nb);
880 		if (ret) {
881 			ibdev_err(&hr_dev->ib_dev,
882 				  "failed to register bond nb, ret = %d.\n", ret);
883 			goto register_nb_err;
884 		}
885 		bgrps[i] = bond_grp;
886 	}
887 
888 	return 0;
889 
890 register_nb_err:
891 	remove_bond_id(bond_grp->bus_num, bond_grp->bond_id);
892 alloc_id_err:
893 	mutex_destroy(&bond_grp->bond_mutex);
894 	kvfree(bond_grp);
895 mem_err:
896 	for (i--; i >= 0; i--) {
897 		unregister_netdevice_notifier(&bgrps[i]->bond_nb);
898 		cancel_delayed_work_sync(&bgrps[i]->bond_work);
899 		remove_bond_id(bgrps[i]->bus_num, bgrps[i]->bond_id);
900 		mutex_destroy(&bgrps[i]->bond_mutex);
901 		kvfree(bgrps[i]);
902 	}
903 	return ret;
904 }
905 
hns_roce_dealloc_bond_grp(void)906 void hns_roce_dealloc_bond_grp(void)
907 {
908 	struct hns_roce_bond_group *bond_grp;
909 	struct hns_roce_die_info *die_info;
910 	unsigned long id;
911 	int i;
912 
913 	xa_for_each(&roce_bond_xa, id, die_info) {
914 		for (i = 0; i < ROCE_BOND_NUM_MAX; i++) {
915 			bond_grp = die_info->bgrps[i];
916 			if (!bond_grp)
917 				continue;
918 			unregister_netdevice_notifier(&bond_grp->bond_nb);
919 			cancel_delayed_work_sync(&bond_grp->bond_work);
920 			remove_bond_id(bond_grp->bus_num, bond_grp->bond_id);
921 			mutex_destroy(&bond_grp->bond_mutex);
922 			kvfree(bond_grp);
923 		}
924 	}
925 }
926 
hns_roce_bond_init(struct hns_roce_dev * hr_dev)927 int hns_roce_bond_init(struct hns_roce_dev *hr_dev)
928 {
929 	struct net_device *net_dev = get_hr_netdev(hr_dev, 0);
930 	struct hns_roce_v2_priv *priv = hr_dev->priv;
931 	struct hns_roce_bond_group *bond_grp;
932 	u8 bus_num = get_hr_bus_num(hr_dev);
933 	int ret;
934 
935 	bond_grp = hns_roce_get_bond_grp(net_dev, bus_num);
936 
937 	if (priv->handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT) {
938 		ret = hns_roce_recover_bond(bond_grp, hr_dev);
939 		if (ret) {
940 			dev_err(hr_dev->dev,
941 				"failed to recover RoCE bond, ret = %d.\n", ret);
942 			return ret;
943 		}
944 	}
945 
946 	return hns_roce_set_bond_netdev(bond_grp, hr_dev);
947 }
948 
hns_roce_bond_suspend(struct hnae3_handle * handle)949 void hns_roce_bond_suspend(struct hnae3_handle *handle)
950 {
951 	u8 bus_num = handle->pdev->bus->number;
952 	struct hns_roce_bond_group *bond_grp;
953 	struct hns_roce_die_info *die_info;
954 	int i;
955 
956 	die_info = xa_load(&roce_bond_xa, bus_num);
957 	if (!die_info)
958 		return;
959 
960 	mutex_lock(&die_info->die_mutex);
961 
962 	/*
963 	 * Avoid duplicated processing when calling this function
964 	 * multiple times.
965 	 */
966 	if (die_info->suspend_cnt)
967 		goto out;
968 
969 	for (i = 0; i < ROCE_BOND_NUM_MAX; i++) {
970 		bond_grp = die_info->bgrps[i];
971 		if (!bond_grp)
972 			continue;
973 		unregister_netdevice_notifier(&bond_grp->bond_nb);
974 		cancel_delayed_work_sync(&bond_grp->bond_work);
975 	}
976 
977 out:
978 	die_info->suspend_cnt++;
979 	mutex_unlock(&die_info->die_mutex);
980 }
981 
hns_roce_bond_resume(struct hnae3_handle * handle)982 void hns_roce_bond_resume(struct hnae3_handle *handle)
983 {
984 	u8 bus_num = handle->pdev->bus->number;
985 	struct hns_roce_bond_group *bond_grp;
986 	struct hns_roce_die_info *die_info;
987 	int i, ret;
988 
989 	die_info = xa_load(&roce_bond_xa, bus_num);
990 	if (!die_info)
991 		return;
992 
993 	mutex_lock(&die_info->die_mutex);
994 
995 	die_info->suspend_cnt--;
996 	if (die_info->suspend_cnt)
997 		goto out;
998 
999 	for (i = 0; i < ROCE_BOND_NUM_MAX; i++) {
1000 		bond_grp = die_info->bgrps[i];
1001 		if (!bond_grp)
1002 			continue;
1003 		ret = register_netdevice_notifier(&bond_grp->bond_nb);
1004 		if (ret)
1005 			dev_err(&handle->pdev->dev,
1006 				"failed to resume bond notifier(bus_num = %u, id = %u), ret = %d.\n",
1007 				bus_num, bond_grp->bond_id, ret);
1008 	}
1009 
1010 out:
1011 	mutex_unlock(&die_info->die_mutex);
1012 }
1013