xref: /linux/drivers/infiniband/hw/mlx5/ib_rep.c (revision 1fd1dc41724319406b0aff221a352a400b0ddfc5)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
4  */
5 
6 #include <linux/mlx5/vport.h>
7 #include "ib_rep.h"
8 #include "srq.h"
9 
10 static int
11 mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
12 		      struct mlx5_eswitch_rep *rep,
13 		      int vport_index)
14 {
15 	struct mlx5_ib_dev *ibdev;
16 	struct net_device *ndev;
17 
18 	ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
19 	if (!ibdev)
20 		return -EINVAL;
21 
22 	ibdev->port[vport_index].rep = rep;
23 	rep->rep_data[REP_IB].priv = ibdev;
24 	ndev = mlx5_ib_get_rep_netdev(rep->esw, rep->vport);
25 
26 	return ib_device_set_netdev(&ibdev->ib_dev, ndev, vport_index + 1);
27 }
28 
29 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
30 
31 static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
32 {
33 	struct mlx5_core_dev *peer_dev;
34 	int i;
35 
36 	mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
37 		u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
38 
39 		if (mlx5_lag_is_mpesw(peer_dev))
40 			*num_ports += peer_num_ports;
41 		else
42 			/* Only 1 ib port is the representor for all uplinks */
43 			*num_ports += peer_num_ports - 1;
44 	}
45 }
46 
47 static int mlx5_ib_set_owner_transport(struct mlx5_core_dev *cur_owner,
48 					struct mlx5_core_dev *new_owner)
49 {
50 	int ret;
51 
52 	if (!MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(cur_owner, ft_support) ||
53 	    !MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(cur_owner, ft_support))
54 		return 0;
55 
56 	if (!MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager) ||
57 	    !MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager_other_eswitch))
58 		return 0;
59 
60 	ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
61 				   FS_FT_RDMA_TRANSPORT_TX);
62 	if (ret)
63 		return ret;
64 
65 	ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
66 				   FS_FT_RDMA_TRANSPORT_RX);
67 	if (ret) {
68 		mlx5_fs_set_root_dev(cur_owner, cur_owner,
69 				     FS_FT_RDMA_TRANSPORT_TX);
70 		return ret;
71 	}
72 
73 	return 0;
74 }
75 
76 static void mlx5_ib_release_transport(struct mlx5_core_dev *dev)
77 {
78 	struct mlx5_core_dev *peer_dev;
79 	int i, ret;
80 
81 	mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
82 		ret = mlx5_ib_set_owner_transport(peer_dev, peer_dev);
83 		WARN_ON_ONCE(ret);
84 	}
85 }
86 
87 static int mlx5_ib_take_transport(struct mlx5_core_dev *dev)
88 {
89 	struct mlx5_core_dev *peer_dev;
90 	int ret;
91 	int i;
92 
93 	mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
94 		ret = mlx5_ib_set_owner_transport(peer_dev, dev);
95 		if (ret) {
96 			mlx5_ib_release_transport(dev);
97 			return ret;
98 		}
99 	}
100 
101 	return 0;
102 }
103 
104 static int
105 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
106 {
107 	u32 num_ports = mlx5_eswitch_get_total_vports(dev);
108 	struct mlx5_core_dev *lag_master = dev;
109 	const struct mlx5_ib_profile *profile;
110 	struct mlx5_core_dev *peer_dev;
111 	struct mlx5_ib_dev *ibdev;
112 	int new_uplink = false;
113 	int vport_index;
114 	int ret;
115 	int i;
116 
117 	vport_index = rep->vport_index;
118 
119 	if (mlx5_lag_is_shared_fdb(dev)) {
120 		if (mlx5_lag_is_master(dev)) {
121 			mlx5_ib_num_ports_update(dev, &num_ports);
122 		} else {
123 			if (rep->vport == MLX5_VPORT_UPLINK) {
124 				if (!mlx5_lag_is_mpesw(dev))
125 					return 0;
126 				new_uplink = true;
127 			}
128 			mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
129 				u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev);
130 
131 				if (mlx5_lag_is_master(peer_dev))
132 					lag_master = peer_dev;
133 				else if (!mlx5_lag_is_mpesw(dev))
134 				/* Only 1 ib port is the representor for all uplinks */
135 					peer_n_ports--;
136 
137 				if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev))
138 					vport_index += peer_n_ports;
139 			}
140 		}
141 	}
142 
143 	if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink)
144 		profile = &raw_eth_profile;
145 	else
146 		return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
147 
148 	if (mlx5_lag_is_shared_fdb(dev)) {
149 		ret = mlx5_ib_take_transport(lag_master);
150 		if (ret)
151 			return ret;
152 	}
153 
154 	ibdev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev,
155 					 mlx5_core_net(lag_master));
156 	if (!ibdev) {
157 		ret = -ENOMEM;
158 		goto release_transport;
159 	}
160 
161 	ibdev->port = kzalloc_objs(*ibdev->port, num_ports);
162 	if (!ibdev->port) {
163 		ret = -ENOMEM;
164 		goto fail_port;
165 	}
166 
167 	ibdev->is_rep = true;
168 	vport_index = rep->vport_index;
169 	ibdev->port[vport_index].rep = rep;
170 	ibdev->mdev = lag_master;
171 	ibdev->num_ports = num_ports;
172 	ibdev->ib_dev.phys_port_cnt = num_ports;
173 	ret = ib_device_set_netdev(&ibdev->ib_dev,
174 			mlx5_ib_get_rep_netdev(lag_master->priv.eswitch,
175 					       rep->vport),
176 			vport_index + 1);
177 	if (ret)
178 		goto fail_add;
179 
180 	ret = __mlx5_ib_add(ibdev, profile);
181 	if (ret)
182 		goto fail_add;
183 
184 	rep->rep_data[REP_IB].priv = ibdev;
185 	if (mlx5_lag_is_shared_fdb(lag_master))
186 		mlx5_ib_register_peer_vport_reps(lag_master);
187 
188 	return 0;
189 
190 fail_add:
191 	kfree(ibdev->port);
192 fail_port:
193 	ib_dealloc_device(&ibdev->ib_dev);
194 release_transport:
195 	if (mlx5_lag_is_shared_fdb(lag_master))
196 		mlx5_ib_release_transport(lag_master);
197 
198 	return ret;
199 }
200 
201 static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
202 {
203 	return rep->rep_data[REP_IB].priv;
204 }
205 
206 static void
207 mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
208 {
209 	struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw);
210 	struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
211 	int vport_index = rep->vport_index;
212 	struct mlx5_ib_port *port;
213 	int i;
214 
215 	if (WARN_ON(!mdev))
216 		return;
217 
218 	if (!dev)
219 		return;
220 
221 	if (mlx5_lag_is_shared_fdb(mdev) &&
222 	    !mlx5_lag_is_master(mdev)) {
223 		if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev))
224 			return;
225 		for (i = 0; i < dev->num_ports; i++) {
226 			if (dev->port[i].rep == rep)
227 				break;
228 		}
229 		if (WARN_ON(i == dev->num_ports))
230 			return;
231 		vport_index = i;
232 	}
233 
234 	port = &dev->port[vport_index];
235 
236 	ib_device_set_netdev(&dev->ib_dev, NULL, vport_index + 1);
237 	rep->rep_data[REP_IB].priv = NULL;
238 	port->rep = NULL;
239 
240 	if (rep->vport == MLX5_VPORT_UPLINK) {
241 
242 		if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev))
243 			return;
244 
245 		if (mlx5_lag_is_shared_fdb(mdev)) {
246 			struct mlx5_core_dev *peer_mdev;
247 			struct mlx5_eswitch *esw;
248 
249 			mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
250 				esw = peer_mdev->priv.eswitch;
251 				mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
252 			}
253 			mlx5_ib_release_transport(mdev);
254 		}
255 		__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
256 	}
257 }
258 
259 static const struct mlx5_eswitch_rep_ops rep_ops = {
260 	.load = mlx5_ib_vport_rep_load,
261 	.unload = mlx5_ib_vport_rep_unload,
262 	.get_proto_dev = mlx5_ib_rep_to_dev,
263 };
264 
265 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
266 {
267 	struct mlx5_core_dev *peer_mdev;
268 	struct mlx5_eswitch *esw;
269 	int i;
270 
271 	mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
272 		esw = peer_mdev->priv.eswitch;
273 		mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
274 	}
275 }
276 
277 struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
278 					  u16 vport_num)
279 {
280 	return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_ETH);
281 }
282 
283 struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
284 						   struct mlx5_ib_sq *sq,
285 						   u32 port)
286 {
287 	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
288 	struct mlx5_eswitch_rep *rep;
289 
290 	if (!dev->is_rep || !port)
291 		return NULL;
292 
293 	if (!dev->port[port - 1].rep)
294 		return ERR_PTR(-EINVAL);
295 
296 	rep = dev->port[port - 1].rep;
297 
298 	return mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sq->base.mqp.qpn);
299 }
300 
301 static int mlx5r_rep_probe(struct auxiliary_device *adev,
302 			   const struct auxiliary_device_id *id)
303 {
304 	struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
305 	struct mlx5_core_dev *mdev = idev->mdev;
306 	struct mlx5_eswitch *esw;
307 
308 	esw = mdev->priv.eswitch;
309 	mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
310 	return 0;
311 }
312 
313 static void mlx5r_rep_remove(struct auxiliary_device *adev)
314 {
315 	struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
316 	struct mlx5_core_dev *mdev = idev->mdev;
317 	struct mlx5_eswitch *esw;
318 
319 	esw = mdev->priv.eswitch;
320 	mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
321 }
322 
323 static const struct auxiliary_device_id mlx5r_rep_id_table[] = {
324 	{ .name = MLX5_ADEV_NAME ".rdma-rep", },
325 	{},
326 };
327 
328 MODULE_DEVICE_TABLE(auxiliary, mlx5r_rep_id_table);
329 
330 static struct auxiliary_driver mlx5r_rep_driver = {
331 	.name = "rep",
332 	.probe = mlx5r_rep_probe,
333 	.remove = mlx5r_rep_remove,
334 	.id_table = mlx5r_rep_id_table,
335 };
336 
337 int mlx5r_rep_init(void)
338 {
339 	return auxiliary_driver_register(&mlx5r_rep_driver);
340 }
341 
342 void mlx5r_rep_cleanup(void)
343 {
344 	auxiliary_driver_unregister(&mlx5r_rep_driver);
345 }
346