xref: /linux/drivers/infiniband/hw/mlx5/ib_rep.c (revision 22c55fb9eb92395d999b8404d73e58540d11bdd8)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
4  */
5 
6 #include <linux/mlx5/vport.h>
7 #include "ib_rep.h"
8 #include "srq.h"
9 
10 static int
11 mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
12 		      struct mlx5_eswitch_rep *rep,
13 		      int vport_index)
14 {
15 	struct mlx5_ib_dev *ibdev;
16 	struct net_device *ndev;
17 
18 	ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
19 	if (!ibdev)
20 		return -EINVAL;
21 
22 	ibdev->port[vport_index].rep = rep;
23 	rep->rep_data[REP_IB].priv = ibdev;
24 	ndev = mlx5_ib_get_rep_netdev(rep->esw, rep->vport);
25 
26 	return ib_device_set_netdev(&ibdev->ib_dev, ndev, vport_index + 1);
27 }
28 
29 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
30 
31 static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
32 {
33 	struct mlx5_core_dev *peer_dev;
34 	int i;
35 
36 	mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
37 		u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
38 
39 		if (mlx5_lag_is_mpesw(peer_dev))
40 			*num_ports += peer_num_ports;
41 		else
42 			/* Only 1 ib port is the representor for all uplinks */
43 			*num_ports += peer_num_ports - 1;
44 	}
45 }
46 
47 static int
48 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
49 {
50 	u32 num_ports = mlx5_eswitch_get_total_vports(dev);
51 	struct mlx5_core_dev *lag_master = dev;
52 	const struct mlx5_ib_profile *profile;
53 	struct mlx5_core_dev *peer_dev;
54 	struct mlx5_ib_dev *ibdev;
55 	int new_uplink = false;
56 	int vport_index;
57 	int ret;
58 	int i;
59 
60 	vport_index = rep->vport_index;
61 
62 	if (mlx5_lag_is_shared_fdb(dev)) {
63 		if (mlx5_lag_is_master(dev)) {
64 			mlx5_ib_num_ports_update(dev, &num_ports);
65 		} else {
66 			if (rep->vport == MLX5_VPORT_UPLINK) {
67 				if (!mlx5_lag_is_mpesw(dev))
68 					return 0;
69 				new_uplink = true;
70 			}
71 			mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
72 				u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev);
73 
74 				if (mlx5_lag_is_master(peer_dev))
75 					lag_master = peer_dev;
76 				else if (!mlx5_lag_is_mpesw(dev))
77 				/* Only 1 ib port is the representor for all uplinks */
78 					peer_n_ports--;
79 
80 				if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev))
81 					vport_index += peer_n_ports;
82 			}
83 		}
84 	}
85 
86 	if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink)
87 		profile = &raw_eth_profile;
88 	else
89 		return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
90 
91 	ibdev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev,
92 					 mlx5_core_net(lag_master));
93 	if (!ibdev)
94 		return -ENOMEM;
95 
96 	ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port),
97 			      GFP_KERNEL);
98 	if (!ibdev->port) {
99 		ret = -ENOMEM;
100 		goto fail_port;
101 	}
102 
103 	ibdev->is_rep = true;
104 	vport_index = rep->vport_index;
105 	ibdev->port[vport_index].rep = rep;
106 	ibdev->mdev = lag_master;
107 	ibdev->num_ports = num_ports;
108 	ibdev->ib_dev.phys_port_cnt = num_ports;
109 	ret = ib_device_set_netdev(&ibdev->ib_dev,
110 			mlx5_ib_get_rep_netdev(lag_master->priv.eswitch,
111 					       rep->vport),
112 			vport_index + 1);
113 	if (ret)
114 		goto fail_add;
115 
116 	ret = __mlx5_ib_add(ibdev, profile);
117 	if (ret)
118 		goto fail_add;
119 
120 	rep->rep_data[REP_IB].priv = ibdev;
121 	if (mlx5_lag_is_shared_fdb(lag_master))
122 		mlx5_ib_register_peer_vport_reps(lag_master);
123 
124 	return 0;
125 
126 fail_add:
127 	kfree(ibdev->port);
128 fail_port:
129 	ib_dealloc_device(&ibdev->ib_dev);
130 	return ret;
131 }
132 
133 static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
134 {
135 	return rep->rep_data[REP_IB].priv;
136 }
137 
138 static void
139 mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
140 {
141 	struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw);
142 	struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
143 	int vport_index = rep->vport_index;
144 	struct mlx5_ib_port *port;
145 	int i;
146 
147 	if (WARN_ON(!mdev))
148 		return;
149 
150 	if (!dev)
151 		return;
152 
153 	if (mlx5_lag_is_shared_fdb(mdev) &&
154 	    !mlx5_lag_is_master(mdev)) {
155 		if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev))
156 			return;
157 		for (i = 0; i < dev->num_ports; i++) {
158 			if (dev->port[i].rep == rep)
159 				break;
160 		}
161 		if (WARN_ON(i == dev->num_ports))
162 			return;
163 		vport_index = i;
164 	}
165 
166 	port = &dev->port[vport_index];
167 
168 	ib_device_set_netdev(&dev->ib_dev, NULL, vport_index + 1);
169 	rep->rep_data[REP_IB].priv = NULL;
170 	port->rep = NULL;
171 
172 	if (rep->vport == MLX5_VPORT_UPLINK) {
173 
174 		if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev))
175 			return;
176 
177 		if (mlx5_lag_is_shared_fdb(mdev)) {
178 			struct mlx5_core_dev *peer_mdev;
179 			struct mlx5_eswitch *esw;
180 
181 			mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
182 				esw = peer_mdev->priv.eswitch;
183 				mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
184 			}
185 		}
186 		__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
187 	}
188 }
189 
190 static const struct mlx5_eswitch_rep_ops rep_ops = {
191 	.load = mlx5_ib_vport_rep_load,
192 	.unload = mlx5_ib_vport_rep_unload,
193 	.get_proto_dev = mlx5_ib_rep_to_dev,
194 };
195 
196 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
197 {
198 	struct mlx5_core_dev *peer_mdev;
199 	struct mlx5_eswitch *esw;
200 	int i;
201 
202 	mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
203 		esw = peer_mdev->priv.eswitch;
204 		mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
205 	}
206 }
207 
208 struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
209 					  u16 vport_num)
210 {
211 	return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_ETH);
212 }
213 
214 struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
215 						   struct mlx5_ib_sq *sq,
216 						   u32 port)
217 {
218 	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
219 	struct mlx5_eswitch_rep *rep;
220 
221 	if (!dev->is_rep || !port)
222 		return NULL;
223 
224 	if (!dev->port[port - 1].rep)
225 		return ERR_PTR(-EINVAL);
226 
227 	rep = dev->port[port - 1].rep;
228 
229 	return mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sq->base.mqp.qpn);
230 }
231 
232 static int mlx5r_rep_probe(struct auxiliary_device *adev,
233 			   const struct auxiliary_device_id *id)
234 {
235 	struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
236 	struct mlx5_core_dev *mdev = idev->mdev;
237 	struct mlx5_eswitch *esw;
238 
239 	esw = mdev->priv.eswitch;
240 	mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
241 	return 0;
242 }
243 
244 static void mlx5r_rep_remove(struct auxiliary_device *adev)
245 {
246 	struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
247 	struct mlx5_core_dev *mdev = idev->mdev;
248 	struct mlx5_eswitch *esw;
249 
250 	esw = mdev->priv.eswitch;
251 	mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
252 }
253 
254 static const struct auxiliary_device_id mlx5r_rep_id_table[] = {
255 	{ .name = MLX5_ADEV_NAME ".rdma-rep", },
256 	{},
257 };
258 
259 MODULE_DEVICE_TABLE(auxiliary, mlx5r_rep_id_table);
260 
261 static struct auxiliary_driver mlx5r_rep_driver = {
262 	.name = "rep",
263 	.probe = mlx5r_rep_probe,
264 	.remove = mlx5r_rep_remove,
265 	.id_table = mlx5r_rep_id_table,
266 };
267 
268 int mlx5r_rep_init(void)
269 {
270 	return auxiliary_driver_register(&mlx5r_rep_driver);
271 }
272 
273 void mlx5r_rep_cleanup(void)
274 {
275 	auxiliary_driver_unregister(&mlx5r_rep_driver);
276 }
277