xref: /linux/drivers/infiniband/hw/mlx5/ib_rep.c (revision ae22a94997b8a03dcb3c922857c203246711f9d4)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
4  */
5 
6 #include <linux/mlx5/vport.h>
7 #include "ib_rep.h"
8 #include "srq.h"
9 
10 static int
11 mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
12 		      struct mlx5_eswitch_rep *rep,
13 		      int vport_index)
14 {
15 	struct mlx5_ib_dev *ibdev;
16 
17 	ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
18 	if (!ibdev)
19 		return -EINVAL;
20 
21 	ibdev->port[vport_index].rep = rep;
22 	rep->rep_data[REP_IB].priv = ibdev;
23 	write_lock(&ibdev->port[vport_index].roce.netdev_lock);
24 	ibdev->port[vport_index].roce.netdev =
25 		mlx5_ib_get_rep_netdev(rep->esw, rep->vport);
26 	write_unlock(&ibdev->port[vport_index].roce.netdev_lock);
27 
28 	return 0;
29 }
30 
31 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
32 
33 static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
34 {
35 	struct mlx5_core_dev *peer_dev;
36 	int i;
37 
38 	mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
39 		u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
40 
41 		if (mlx5_lag_is_mpesw(peer_dev))
42 			*num_ports += peer_num_ports;
43 		else
44 			/* Only 1 ib port is the representor for all uplinks */
45 			*num_ports += peer_num_ports - 1;
46 	}
47 }
48 
49 static int
50 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
51 {
52 	u32 num_ports = mlx5_eswitch_get_total_vports(dev);
53 	struct mlx5_core_dev *lag_master = dev;
54 	const struct mlx5_ib_profile *profile;
55 	struct mlx5_core_dev *peer_dev;
56 	struct mlx5_ib_dev *ibdev;
57 	int new_uplink = false;
58 	int vport_index;
59 	int ret;
60 	int i;
61 
62 	vport_index = rep->vport_index;
63 
64 	if (mlx5_lag_is_shared_fdb(dev)) {
65 		if (mlx5_lag_is_master(dev)) {
66 			mlx5_ib_num_ports_update(dev, &num_ports);
67 		} else {
68 			if (rep->vport == MLX5_VPORT_UPLINK) {
69 				if (!mlx5_lag_is_mpesw(dev))
70 					return 0;
71 				new_uplink = true;
72 			}
73 			mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
74 				u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev);
75 
76 				if (mlx5_lag_is_master(peer_dev))
77 					lag_master = peer_dev;
78 				else if (!mlx5_lag_is_mpesw(dev))
79 				/* Only 1 ib port is the representor for all uplinks */
80 					peer_n_ports--;
81 
82 				if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev))
83 					vport_index += peer_n_ports;
84 			}
85 		}
86 	}
87 
88 	if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink)
89 		profile = &raw_eth_profile;
90 	else
91 		return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
92 
93 	ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
94 	if (!ibdev)
95 		return -ENOMEM;
96 
97 	ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port),
98 			      GFP_KERNEL);
99 	if (!ibdev->port) {
100 		ret = -ENOMEM;
101 		goto fail_port;
102 	}
103 
104 	ibdev->is_rep = true;
105 	vport_index = rep->vport_index;
106 	ibdev->port[vport_index].rep = rep;
107 	ibdev->port[vport_index].roce.netdev =
108 		mlx5_ib_get_rep_netdev(lag_master->priv.eswitch, rep->vport);
109 	ibdev->mdev = lag_master;
110 	ibdev->num_ports = num_ports;
111 
112 	ret = __mlx5_ib_add(ibdev, profile);
113 	if (ret)
114 		goto fail_add;
115 
116 	rep->rep_data[REP_IB].priv = ibdev;
117 	if (mlx5_lag_is_shared_fdb(lag_master))
118 		mlx5_ib_register_peer_vport_reps(lag_master);
119 
120 	return 0;
121 
122 fail_add:
123 	kfree(ibdev->port);
124 fail_port:
125 	ib_dealloc_device(&ibdev->ib_dev);
126 	return ret;
127 }
128 
129 static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
130 {
131 	return rep->rep_data[REP_IB].priv;
132 }
133 
134 static void
135 mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
136 {
137 	struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw);
138 	struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
139 	int vport_index = rep->vport_index;
140 	struct mlx5_ib_port *port;
141 	int i;
142 
143 	if (WARN_ON(!mdev))
144 		return;
145 
146 	if (!dev)
147 		return;
148 
149 	if (mlx5_lag_is_shared_fdb(mdev) &&
150 	    !mlx5_lag_is_master(mdev)) {
151 		if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev))
152 			return;
153 		for (i = 0; i < dev->num_ports; i++) {
154 			if (dev->port[i].rep == rep)
155 				break;
156 		}
157 		if (WARN_ON(i == dev->num_ports))
158 			return;
159 		vport_index = i;
160 	}
161 
162 	port = &dev->port[vport_index];
163 	write_lock(&port->roce.netdev_lock);
164 	port->roce.netdev = NULL;
165 	write_unlock(&port->roce.netdev_lock);
166 	rep->rep_data[REP_IB].priv = NULL;
167 	port->rep = NULL;
168 
169 	if (rep->vport == MLX5_VPORT_UPLINK) {
170 
171 		if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev))
172 			return;
173 
174 		if (mlx5_lag_is_shared_fdb(mdev)) {
175 			struct mlx5_core_dev *peer_mdev;
176 			struct mlx5_eswitch *esw;
177 
178 			mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
179 				esw = peer_mdev->priv.eswitch;
180 				mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
181 			}
182 		}
183 		__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
184 	}
185 }
186 
187 static const struct mlx5_eswitch_rep_ops rep_ops = {
188 	.load = mlx5_ib_vport_rep_load,
189 	.unload = mlx5_ib_vport_rep_unload,
190 	.get_proto_dev = mlx5_ib_rep_to_dev,
191 };
192 
193 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
194 {
195 	struct mlx5_core_dev *peer_mdev;
196 	struct mlx5_eswitch *esw;
197 	int i;
198 
199 	mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
200 		esw = peer_mdev->priv.eswitch;
201 		mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
202 	}
203 }
204 
205 struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
206 					  u16 vport_num)
207 {
208 	return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_ETH);
209 }
210 
211 struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
212 						   struct mlx5_ib_sq *sq,
213 						   u32 port)
214 {
215 	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
216 	struct mlx5_eswitch_rep *rep;
217 
218 	if (!dev->is_rep || !port)
219 		return NULL;
220 
221 	if (!dev->port[port - 1].rep)
222 		return ERR_PTR(-EINVAL);
223 
224 	rep = dev->port[port - 1].rep;
225 
226 	return mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sq->base.mqp.qpn);
227 }
228 
229 static int mlx5r_rep_probe(struct auxiliary_device *adev,
230 			   const struct auxiliary_device_id *id)
231 {
232 	struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
233 	struct mlx5_core_dev *mdev = idev->mdev;
234 	struct mlx5_eswitch *esw;
235 
236 	esw = mdev->priv.eswitch;
237 	mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
238 	return 0;
239 }
240 
241 static void mlx5r_rep_remove(struct auxiliary_device *adev)
242 {
243 	struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
244 	struct mlx5_core_dev *mdev = idev->mdev;
245 	struct mlx5_eswitch *esw;
246 
247 	esw = mdev->priv.eswitch;
248 	mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
249 }
250 
251 static const struct auxiliary_device_id mlx5r_rep_id_table[] = {
252 	{ .name = MLX5_ADEV_NAME ".rdma-rep", },
253 	{},
254 };
255 
256 MODULE_DEVICE_TABLE(auxiliary, mlx5r_rep_id_table);
257 
258 static struct auxiliary_driver mlx5r_rep_driver = {
259 	.name = "rep",
260 	.probe = mlx5r_rep_probe,
261 	.remove = mlx5r_rep_remove,
262 	.id_table = mlx5r_rep_id_table,
263 };
264 
265 int mlx5r_rep_init(void)
266 {
267 	return auxiliary_driver_register(&mlx5r_rep_driver);
268 }
269 
270 void mlx5r_rep_cleanup(void)
271 {
272 	auxiliary_driver_unregister(&mlx5r_rep_driver);
273 }
274