1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3
4 #include <linux/netdevice.h>
5 #include <net/nexthop.h>
6 #include "lag/lag.h"
7 #include "eswitch.h"
8 #include "esw/acl/ofld.h"
9 #include "lib/events.h"
10
mlx5_mpesw_metadata_cleanup(struct mlx5_lag * ldev)11 static void mlx5_mpesw_metadata_cleanup(struct mlx5_lag *ldev)
12 {
13 struct mlx5_core_dev *dev;
14 struct mlx5_eswitch *esw;
15 u32 pf_metadata;
16 int i;
17
18 mlx5_ldev_for_each(i, 0, ldev) {
19 dev = ldev->pf[i].dev;
20 esw = dev->priv.eswitch;
21 pf_metadata = ldev->lag_mpesw.pf_metadata[i];
22 if (!pf_metadata)
23 continue;
24 mlx5_esw_acl_ingress_vport_metadata_update(esw, MLX5_VPORT_UPLINK, 0);
25 mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_MULTIPORT_ESW,
26 (void *)0);
27 mlx5_esw_match_metadata_free(esw, pf_metadata);
28 ldev->lag_mpesw.pf_metadata[i] = 0;
29 }
30 }
31
mlx5_mpesw_metadata_set(struct mlx5_lag * ldev)32 static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev)
33 {
34 struct mlx5_core_dev *dev;
35 struct mlx5_eswitch *esw;
36 u32 pf_metadata;
37 int i, err;
38
39 mlx5_ldev_for_each(i, 0, ldev) {
40 dev = ldev->pf[i].dev;
41 esw = dev->priv.eswitch;
42 pf_metadata = mlx5_esw_match_metadata_alloc(esw);
43 if (!pf_metadata) {
44 err = -ENOSPC;
45 goto err_metadata;
46 }
47
48 ldev->lag_mpesw.pf_metadata[i] = pf_metadata;
49 err = mlx5_esw_acl_ingress_vport_metadata_update(esw, MLX5_VPORT_UPLINK,
50 pf_metadata);
51 if (err)
52 goto err_metadata;
53 }
54
55 mlx5_ldev_for_each(i, 0, ldev) {
56 dev = ldev->pf[i].dev;
57 mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_MULTIPORT_ESW,
58 (void *)0);
59 }
60
61 return 0;
62
63 err_metadata:
64 mlx5_mpesw_metadata_cleanup(ldev);
65 return err;
66 }
67
mlx5_lag_enable_mpesw(struct mlx5_lag * ldev)68 static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev)
69 {
70 struct mlx5_core_dev *dev0;
71 int err;
72 int idx;
73 int i;
74
75 if (ldev->mode == MLX5_LAG_MODE_MPESW)
76 return 0;
77
78 if (ldev->mode != MLX5_LAG_MODE_NONE)
79 return -EINVAL;
80
81 idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
82 if (idx < 0)
83 return -EINVAL;
84
85 dev0 = ldev->pf[idx].dev;
86 if (mlx5_eswitch_mode(dev0) != MLX5_ESWITCH_OFFLOADS ||
87 !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table) ||
88 !MLX5_CAP_GEN(dev0, create_lag_when_not_master_up) ||
89 !mlx5_lag_check_prereq(ldev) ||
90 !mlx5_lag_shared_fdb_supported(ldev))
91 return -EOPNOTSUPP;
92
93 err = mlx5_mpesw_metadata_set(ldev);
94 if (err)
95 return err;
96
97 mlx5_lag_remove_devices(ldev);
98
99 err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, true);
100 if (err) {
101 mlx5_core_warn(dev0, "Failed to create LAG in MPESW mode (%d)\n", err);
102 goto err_add_devices;
103 }
104
105 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
106 mlx5_rescan_drivers_locked(dev0);
107 mlx5_ldev_for_each(i, 0, ldev) {
108 err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
109 if (err)
110 goto err_rescan_drivers;
111 }
112
113 mlx5_lag_set_vports_agg_speed(ldev);
114
115 return 0;
116
117 err_rescan_drivers:
118 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
119 mlx5_rescan_drivers_locked(dev0);
120 mlx5_deactivate_lag(ldev);
121 err_add_devices:
122 mlx5_lag_add_devices(ldev);
123 mlx5_ldev_for_each(i, 0, ldev)
124 mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
125 mlx5_mpesw_metadata_cleanup(ldev);
126 return err;
127 }
128
mlx5_lag_disable_mpesw(struct mlx5_lag * ldev)129 void mlx5_lag_disable_mpesw(struct mlx5_lag *ldev)
130 {
131 if (ldev->mode == MLX5_LAG_MODE_MPESW) {
132 mlx5_mpesw_metadata_cleanup(ldev);
133 mlx5_disable_lag(ldev);
134 }
135 }
136
mlx5_mpesw_work(struct work_struct * work)137 static void mlx5_mpesw_work(struct work_struct *work)
138 {
139 struct mlx5_mpesw_work_st *mpesww = container_of(work, struct mlx5_mpesw_work_st, work);
140 struct mlx5_devcom_comp_dev *devcom;
141 struct mlx5_lag *ldev = mpesww->lag;
142
143 devcom = mlx5_lag_get_devcom_comp(ldev);
144 if (!devcom)
145 return;
146
147 mlx5_devcom_comp_lock(devcom);
148 mutex_lock(&ldev->lock);
149 if (ldev->mode_changes_in_progress) {
150 mpesww->result = -EAGAIN;
151 goto unlock;
152 }
153
154 if (mpesww->op == MLX5_MPESW_OP_ENABLE)
155 mpesww->result = mlx5_lag_enable_mpesw(ldev);
156 else if (mpesww->op == MLX5_MPESW_OP_DISABLE)
157 mlx5_lag_disable_mpesw(ldev);
158 unlock:
159 mutex_unlock(&ldev->lock);
160 mlx5_devcom_comp_unlock(devcom);
161 complete(&mpesww->comp);
162 }
163
mlx5_lag_mpesw_queue_work(struct mlx5_core_dev * dev,enum mpesw_op op)164 static int mlx5_lag_mpesw_queue_work(struct mlx5_core_dev *dev,
165 enum mpesw_op op)
166 {
167 struct mlx5_lag *ldev = mlx5_lag_dev(dev);
168 struct mlx5_mpesw_work_st *work;
169 int err = 0;
170
171 if (!ldev)
172 return 0;
173
174 work = kzalloc_obj(*work);
175 if (!work)
176 return -ENOMEM;
177
178 INIT_WORK(&work->work, mlx5_mpesw_work);
179 init_completion(&work->comp);
180 work->op = op;
181 work->lag = ldev;
182
183 if (!queue_work(ldev->wq, &work->work)) {
184 mlx5_core_warn(dev, "failed to queue mpesw work\n");
185 err = -EINVAL;
186 goto out;
187 }
188 wait_for_completion(&work->comp);
189 err = work->result;
190 out:
191 kfree(work);
192 return err;
193 }
194
mlx5_lag_mpesw_disable(struct mlx5_core_dev * dev)195 void mlx5_lag_mpesw_disable(struct mlx5_core_dev *dev)
196 {
197 mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_DISABLE);
198 }
199
mlx5_lag_mpesw_enable(struct mlx5_core_dev * dev)200 int mlx5_lag_mpesw_enable(struct mlx5_core_dev *dev)
201 {
202 return mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_ENABLE);
203 }
204
mlx5_lag_mpesw_do_mirred(struct mlx5_core_dev * mdev,struct net_device * out_dev,struct netlink_ext_ack * extack)205 int mlx5_lag_mpesw_do_mirred(struct mlx5_core_dev *mdev,
206 struct net_device *out_dev,
207 struct netlink_ext_ack *extack)
208 {
209 struct mlx5_lag *ldev = mlx5_lag_dev(mdev);
210
211 if (!netif_is_bond_master(out_dev) || !ldev)
212 return 0;
213
214 if (ldev->mode != MLX5_LAG_MODE_MPESW)
215 return 0;
216
217 NL_SET_ERR_MSG_MOD(extack, "can't forward to bond in mpesw mode");
218 return -EOPNOTSUPP;
219 }
220
mlx5_lag_is_mpesw(struct mlx5_core_dev * dev)221 bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev)
222 {
223 struct mlx5_lag *ldev = mlx5_lag_dev(dev);
224
225 return ldev && ldev->mode == MLX5_LAG_MODE_MPESW;
226 }
227 EXPORT_SYMBOL(mlx5_lag_is_mpesw);
228
mlx5_mpesw_speed_update_work(struct work_struct * work)229 void mlx5_mpesw_speed_update_work(struct work_struct *work)
230 {
231 struct mlx5_lag *ldev = container_of(work, struct mlx5_lag,
232 speed_update_work);
233
234 mutex_lock(&ldev->lock);
235 if (ldev->mode == MLX5_LAG_MODE_MPESW) {
236 if (ldev->mode_changes_in_progress)
237 queue_work(ldev->wq, &ldev->speed_update_work);
238 else
239 mlx5_lag_set_vports_agg_speed(ldev);
240 }
241
242 mutex_unlock(&ldev->lock);
243 }
244
mlx5_lag_mpesw_port_change_event(struct notifier_block * nb,unsigned long event,void * data)245 int mlx5_lag_mpesw_port_change_event(struct notifier_block *nb,
246 unsigned long event, void *data)
247 {
248 struct mlx5_nb *mlx5_nb = container_of(nb, struct mlx5_nb, nb);
249 struct lag_func *lag_func = container_of(mlx5_nb,
250 struct lag_func,
251 port_change_nb);
252 struct mlx5_core_dev *dev = lag_func->dev;
253 struct mlx5_lag *ldev = dev->priv.lag;
254 struct mlx5_eqe *eqe = data;
255
256 if (!ldev)
257 return NOTIFY_DONE;
258
259 if (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_DOWN ||
260 eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE)
261 queue_work(ldev->wq, &ldev->speed_update_work);
262
263 return NOTIFY_OK;
264 }
265