xref: /freebsd/sys/dev/mlx5/mlx5_ib/mlx5_ib_gsi.c (revision 62ff619dcc3540659a319be71c9a489f1659e14a)
1 /*-
2  * Copyright (c) 2016-2020, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include "opt_rss.h"
29 #include "opt_ratelimit.h"
30 
31 #include <dev/mlx5/mlx5_ib/mlx5_ib.h>
32 
33 struct mlx5_ib_gsi_wr {
34 	struct ib_cqe cqe;
35 	struct ib_wc wc;
36 	int send_flags;
37 	bool completed:1;
38 };
39 
40 struct mlx5_ib_gsi_qp {
41 	struct ib_qp ibqp;
42 	struct ib_qp *rx_qp;
43 	u8 port_num;
44 	struct ib_qp_cap cap;
45 	enum ib_sig_type sq_sig_type;
46 	/* Serialize qp state modifications */
47 	struct mutex mutex;
48 	struct ib_cq *cq;
49 	struct mlx5_ib_gsi_wr *outstanding_wrs;
50 	u32 outstanding_pi, outstanding_ci;
51 	int num_qps;
52 	/* Protects access to the tx_qps. Post send operations synchronize
53 	 * with tx_qp creation in setup_qp(). Also protects the
54 	 * outstanding_wrs array and indices.
55 	 */
56 	spinlock_t lock;
57 	struct ib_qp **tx_qps;
58 };
59 
60 static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp)
61 {
62 	return container_of(qp, struct mlx5_ib_gsi_qp, ibqp);
63 }
64 
65 static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
66 {
67 	return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
68 }
69 
70 /* Call with gsi->lock locked */
71 static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
72 {
73 	struct ib_cq *gsi_cq = gsi->ibqp.send_cq;
74 	struct mlx5_ib_gsi_wr *wr;
75 	u32 index;
76 
77 	for (index = gsi->outstanding_ci; index != gsi->outstanding_pi;
78 	     index++) {
79 		wr = &gsi->outstanding_wrs[index % gsi->cap.max_send_wr];
80 
81 		if (!wr->completed)
82 			break;
83 
84 		if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR ||
85 		    wr->send_flags & IB_SEND_SIGNALED)
86 			WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
87 
88 		wr->completed = false;
89 	}
90 
91 	gsi->outstanding_ci = index;
92 }
93 
94 static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
95 {
96 	struct mlx5_ib_gsi_qp *gsi = cq->cq_context;
97 	struct mlx5_ib_gsi_wr *wr =
98 		container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe);
99 	u64 wr_id;
100 	unsigned long flags;
101 
102 	spin_lock_irqsave(&gsi->lock, flags);
103 	wr->completed = true;
104 	wr_id = wr->wc.wr_id;
105 	wr->wc = *wc;
106 	wr->wc.wr_id = wr_id;
107 	wr->wc.qp = &gsi->ibqp;
108 
109 	generate_completions(gsi);
110 	spin_unlock_irqrestore(&gsi->lock, flags);
111 }
112 
113 struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
114 				    struct ib_qp_init_attr *init_attr)
115 {
116 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
117 	struct mlx5_ib_gsi_qp *gsi;
118 	struct ib_qp_init_attr hw_init_attr = *init_attr;
119 	const u8 port_num = init_attr->port_num;
120 	const int num_pkeys = pd->device->attrs.max_pkeys;
121 	const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0;
122 	int ret;
123 
124 	mlx5_ib_dbg(dev, "creating GSI QP\n");
125 
126 	if (port_num > ARRAY_SIZE(dev->devr.ports) || port_num < 1) {
127 		mlx5_ib_warn(dev,
128 			     "invalid port number %d during GSI QP creation\n",
129 			     port_num);
130 		return ERR_PTR(-EINVAL);
131 	}
132 
133 	gsi = kzalloc(sizeof(*gsi), GFP_KERNEL);
134 	if (!gsi)
135 		return ERR_PTR(-ENOMEM);
136 
137 	gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL);
138 	if (!gsi->tx_qps) {
139 		ret = -ENOMEM;
140 		goto err_free;
141 	}
142 
143 	gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr,
144 				       sizeof(*gsi->outstanding_wrs),
145 				       GFP_KERNEL);
146 	if (!gsi->outstanding_wrs) {
147 		ret = -ENOMEM;
148 		goto err_free_tx;
149 	}
150 
151 	mutex_init(&gsi->mutex);
152 
153 	mutex_lock(&dev->devr.mutex);
154 
155 	if (dev->devr.ports[port_num - 1].gsi) {
156 		mlx5_ib_warn(dev, "GSI QP already exists on port %d\n",
157 			     port_num);
158 		ret = -EBUSY;
159 		goto err_free_wrs;
160 	}
161 	gsi->num_qps = num_qps;
162 	spin_lock_init(&gsi->lock);
163 
164 	gsi->cap = init_attr->cap;
165 	gsi->sq_sig_type = init_attr->sq_sig_type;
166 	gsi->ibqp.qp_num = 1;
167 	gsi->port_num = port_num;
168 
169 	gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0,
170 			      IB_POLL_SOFTIRQ);
171 	if (IS_ERR(gsi->cq)) {
172 		mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n",
173 			     PTR_ERR(gsi->cq));
174 		ret = PTR_ERR(gsi->cq);
175 		goto err_free_wrs;
176 	}
177 
178 	hw_init_attr.qp_type = MLX5_IB_QPT_HW_GSI;
179 	hw_init_attr.send_cq = gsi->cq;
180 	if (num_qps) {
181 		hw_init_attr.cap.max_send_wr = 0;
182 		hw_init_attr.cap.max_send_sge = 0;
183 		hw_init_attr.cap.max_inline_data = 0;
184 	}
185 	gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
186 	if (IS_ERR(gsi->rx_qp)) {
187 		mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
188 			     PTR_ERR(gsi->rx_qp));
189 		ret = PTR_ERR(gsi->rx_qp);
190 		goto err_destroy_cq;
191 	}
192 
193 	dev->devr.ports[init_attr->port_num - 1].gsi = gsi;
194 
195 	mutex_unlock(&dev->devr.mutex);
196 
197 	return &gsi->ibqp;
198 
199 err_destroy_cq:
200 	ib_free_cq(gsi->cq);
201 err_free_wrs:
202 	mutex_unlock(&dev->devr.mutex);
203 	kfree(gsi->outstanding_wrs);
204 err_free_tx:
205 	kfree(gsi->tx_qps);
206 err_free:
207 	kfree(gsi);
208 	return ERR_PTR(ret);
209 }
210 
211 int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp)
212 {
213 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
214 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
215 	const int port_num = gsi->port_num;
216 	int qp_index;
217 	int ret;
218 
219 	mlx5_ib_dbg(dev, "destroying GSI QP\n");
220 
221 	mutex_lock(&dev->devr.mutex);
222 	ret = ib_destroy_qp(gsi->rx_qp);
223 	if (ret) {
224 		mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n",
225 			     ret);
226 		mutex_unlock(&dev->devr.mutex);
227 		return ret;
228 	}
229 	dev->devr.ports[port_num - 1].gsi = NULL;
230 	mutex_unlock(&dev->devr.mutex);
231 	gsi->rx_qp = NULL;
232 
233 	for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) {
234 		if (!gsi->tx_qps[qp_index])
235 			continue;
236 		WARN_ON_ONCE(ib_destroy_qp(gsi->tx_qps[qp_index]));
237 		gsi->tx_qps[qp_index] = NULL;
238 	}
239 
240 	ib_free_cq(gsi->cq);
241 
242 	kfree(gsi->outstanding_wrs);
243 	kfree(gsi->tx_qps);
244 	kfree(gsi);
245 
246 	return 0;
247 }
248 
249 static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
250 {
251 	struct ib_pd *pd = gsi->rx_qp->pd;
252 	struct ib_qp_init_attr init_attr = {
253 		.event_handler = gsi->rx_qp->event_handler,
254 		.qp_context = gsi->rx_qp->qp_context,
255 		.send_cq = gsi->cq,
256 		.recv_cq = gsi->rx_qp->recv_cq,
257 		.cap = {
258 			.max_send_wr = gsi->cap.max_send_wr,
259 			.max_send_sge = gsi->cap.max_send_sge,
260 			.max_inline_data = gsi->cap.max_inline_data,
261 		},
262 		.sq_sig_type = gsi->sq_sig_type,
263 		.qp_type = IB_QPT_UD,
264 		.create_flags = MLX5_IB_QP_CREATE_SQPN_QP1,
265 	};
266 
267 	return ib_create_qp(pd, &init_attr);
268 }
269 
270 static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp,
271 			 u16 qp_index)
272 {
273 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
274 	struct ib_qp_attr attr;
275 	int mask;
276 	int ret;
277 
278 	mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT;
279 	attr.qp_state = IB_QPS_INIT;
280 	attr.pkey_index = qp_index;
281 	attr.qkey = IB_QP1_QKEY;
282 	attr.port_num = gsi->port_num;
283 	ret = ib_modify_qp(qp, &attr, mask);
284 	if (ret) {
285 		mlx5_ib_err(dev, "could not change QP%d state to INIT: %d\n",
286 			    qp->qp_num, ret);
287 		return ret;
288 	}
289 
290 	attr.qp_state = IB_QPS_RTR;
291 	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
292 	if (ret) {
293 		mlx5_ib_err(dev, "could not change QP%d state to RTR: %d\n",
294 			    qp->qp_num, ret);
295 		return ret;
296 	}
297 
298 	attr.qp_state = IB_QPS_RTS;
299 	attr.sq_psn = 0;
300 	ret = ib_modify_qp(qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
301 	if (ret) {
302 		mlx5_ib_err(dev, "could not change QP%d state to RTS: %d\n",
303 			    qp->qp_num, ret);
304 		return ret;
305 	}
306 
307 	return 0;
308 }
309 
310 static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
311 {
312 	struct ib_device *device = gsi->rx_qp->device;
313 	struct mlx5_ib_dev *dev = to_mdev(device);
314 	struct ib_qp *qp;
315 	unsigned long flags;
316 	u16 pkey;
317 	int ret;
318 
319 	ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey);
320 	if (ret) {
321 		mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n",
322 			     gsi->port_num, qp_index);
323 		return;
324 	}
325 
326 	if (!pkey) {
327 		mlx5_ib_dbg(dev, "invalid P_Key at port %d, index %d.  Skipping.\n",
328 			    gsi->port_num, qp_index);
329 		return;
330 	}
331 
332 	spin_lock_irqsave(&gsi->lock, flags);
333 	qp = gsi->tx_qps[qp_index];
334 	spin_unlock_irqrestore(&gsi->lock, flags);
335 	if (qp) {
336 		mlx5_ib_dbg(dev, "already existing GSI TX QP at port %d, index %d. Skipping\n",
337 			    gsi->port_num, qp_index);
338 		return;
339 	}
340 
341 	qp = create_gsi_ud_qp(gsi);
342 	if (IS_ERR(qp)) {
343 		mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n",
344 			     PTR_ERR(qp));
345 		return;
346 	}
347 
348 	ret = modify_to_rts(gsi, qp, qp_index);
349 	if (ret)
350 		goto err_destroy_qp;
351 
352 	spin_lock_irqsave(&gsi->lock, flags);
353 	WARN_ON_ONCE(gsi->tx_qps[qp_index]);
354 	gsi->tx_qps[qp_index] = qp;
355 	spin_unlock_irqrestore(&gsi->lock, flags);
356 
357 	return;
358 
359 err_destroy_qp:
360 	WARN_ON_ONCE(qp);
361 }
362 
363 static void setup_qps(struct mlx5_ib_gsi_qp *gsi)
364 {
365 	u16 qp_index;
366 
367 	for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
368 		setup_qp(gsi, qp_index);
369 }
370 
371 int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
372 			  int attr_mask)
373 {
374 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
375 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
376 	int ret;
377 
378 	mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state);
379 
380 	mutex_lock(&gsi->mutex);
381 	ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask);
382 	if (ret) {
383 		mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret);
384 		goto unlock;
385 	}
386 
387 	if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS)
388 		setup_qps(gsi);
389 
390 unlock:
391 	mutex_unlock(&gsi->mutex);
392 
393 	return ret;
394 }
395 
396 int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
397 			 int qp_attr_mask,
398 			 struct ib_qp_init_attr *qp_init_attr)
399 {
400 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
401 	int ret;
402 
403 	mutex_lock(&gsi->mutex);
404 	ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr);
405 	qp_init_attr->cap = gsi->cap;
406 	mutex_unlock(&gsi->mutex);
407 
408 	return ret;
409 }
410 
411 /* Call with gsi->lock locked */
412 static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
413 				      struct ib_ud_wr *wr, struct ib_wc *wc)
414 {
415 	struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
416 	struct mlx5_ib_gsi_wr *gsi_wr;
417 
418 	if (gsi->outstanding_pi == gsi->outstanding_ci + gsi->cap.max_send_wr) {
419 		mlx5_ib_warn(dev, "no available GSI work request.\n");
420 		return -ENOMEM;
421 	}
422 
423 	gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi %
424 				       gsi->cap.max_send_wr];
425 	gsi->outstanding_pi++;
426 
427 	if (!wc) {
428 		memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc));
429 		gsi_wr->wc.pkey_index = wr->pkey_index;
430 		gsi_wr->wc.wr_id = wr->wr.wr_id;
431 	} else {
432 		gsi_wr->wc = *wc;
433 		gsi_wr->completed = true;
434 	}
435 
436 	gsi_wr->cqe.done = &handle_single_completion;
437 	wr->wr.wr_cqe = &gsi_wr->cqe;
438 
439 	return 0;
440 }
441 
442 /* Call with gsi->lock locked */
443 static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi,
444 				    struct ib_ud_wr *wr)
445 {
446 	struct ib_wc wc = {
447 		{ .wr_id = wr->wr.wr_id },
448 		.status = IB_WC_SUCCESS,
449 		.opcode = IB_WC_SEND,
450 		.qp = &gsi->ibqp,
451 	};
452 	int ret;
453 
454 	ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc);
455 	if (ret)
456 		return ret;
457 
458 	generate_completions(gsi);
459 
460 	return 0;
461 }
462 
463 /* Call with gsi->lock locked */
464 static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
465 {
466 	struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
467 	int qp_index = wr->pkey_index;
468 
469 	if (!mlx5_ib_deth_sqpn_cap(dev))
470 		return gsi->rx_qp;
471 
472 	if (qp_index >= gsi->num_qps)
473 		return NULL;
474 
475 	return gsi->tx_qps[qp_index];
476 }
477 
478 int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
479 			  const struct ib_send_wr **bad_wr)
480 {
481 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
482 	struct ib_qp *tx_qp;
483 	unsigned long flags;
484 	int ret;
485 
486 	for (; wr; wr = wr->next) {
487 		struct ib_ud_wr cur_wr = *ud_wr(wr);
488 
489 		cur_wr.wr.next = NULL;
490 
491 		spin_lock_irqsave(&gsi->lock, flags);
492 		tx_qp = get_tx_qp(gsi, &cur_wr);
493 		if (!tx_qp) {
494 			ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr);
495 			if (ret)
496 				goto err;
497 			spin_unlock_irqrestore(&gsi->lock, flags);
498 			continue;
499 		}
500 
501 		ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL);
502 		if (ret)
503 			goto err;
504 
505 		ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr);
506 		if (ret) {
507 			/* Undo the effect of adding the outstanding wr */
508 			gsi->outstanding_pi = (gsi->outstanding_pi - 1) %
509 					      gsi->cap.max_send_wr;
510 			goto err;
511 		}
512 		spin_unlock_irqrestore(&gsi->lock, flags);
513 	}
514 
515 	return 0;
516 
517 err:
518 	spin_unlock_irqrestore(&gsi->lock, flags);
519 	*bad_wr = wr;
520 	return ret;
521 }
522 
523 int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
524 			  const struct ib_recv_wr **bad_wr)
525 {
526 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
527 
528 	return ib_post_recv(gsi->rx_qp, wr, bad_wr);
529 }
530 
531 void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi)
532 {
533 	if (!gsi)
534 		return;
535 
536 	mutex_lock(&gsi->mutex);
537 	setup_qps(gsi);
538 	mutex_unlock(&gsi->mutex);
539 }
540