1 /*-
2 * Copyright (c) 2016-2020, Mellanox Technologies, Ltd. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26 #include "opt_rss.h"
27 #include "opt_ratelimit.h"
28
29 #include <dev/mlx5/mlx5_ib/mlx5_ib.h>
30
31 struct mlx5_ib_gsi_wr {
32 struct ib_cqe cqe;
33 struct ib_wc wc;
34 int send_flags;
35 bool completed:1;
36 };
37
38 struct mlx5_ib_gsi_qp {
39 struct ib_qp ibqp;
40 struct ib_qp *rx_qp;
41 u8 port_num;
42 struct ib_qp_cap cap;
43 enum ib_sig_type sq_sig_type;
44 /* Serialize qp state modifications */
45 struct mutex mutex;
46 struct ib_cq *cq;
47 struct mlx5_ib_gsi_wr *outstanding_wrs;
48 u32 outstanding_pi, outstanding_ci;
49 int num_qps;
50 /* Protects access to the tx_qps. Post send operations synchronize
51 * with tx_qp creation in setup_qp(). Also protects the
52 * outstanding_wrs array and indices.
53 */
54 spinlock_t lock;
55 struct ib_qp **tx_qps;
56 };
57
gsi_qp(struct ib_qp * qp)58 static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp)
59 {
60 return container_of(qp, struct mlx5_ib_gsi_qp, ibqp);
61 }
62
mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev * dev)63 static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
64 {
65 return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
66 }
67
68 /* Call with gsi->lock locked */
generate_completions(struct mlx5_ib_gsi_qp * gsi)69 static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
70 {
71 struct ib_cq *gsi_cq = gsi->ibqp.send_cq;
72 struct mlx5_ib_gsi_wr *wr;
73 u32 index;
74
75 for (index = gsi->outstanding_ci; index != gsi->outstanding_pi;
76 index++) {
77 wr = &gsi->outstanding_wrs[index % gsi->cap.max_send_wr];
78
79 if (!wr->completed)
80 break;
81
82 if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR ||
83 wr->send_flags & IB_SEND_SIGNALED)
84 WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
85
86 wr->completed = false;
87 }
88
89 gsi->outstanding_ci = index;
90 }
91
handle_single_completion(struct ib_cq * cq,struct ib_wc * wc)92 static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
93 {
94 struct mlx5_ib_gsi_qp *gsi = cq->cq_context;
95 struct mlx5_ib_gsi_wr *wr =
96 container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe);
97 u64 wr_id;
98 unsigned long flags;
99
100 spin_lock_irqsave(&gsi->lock, flags);
101 wr->completed = true;
102 wr_id = wr->wc.wr_id;
103 wr->wc = *wc;
104 wr->wc.wr_id = wr_id;
105 wr->wc.qp = &gsi->ibqp;
106
107 generate_completions(gsi);
108 spin_unlock_irqrestore(&gsi->lock, flags);
109 }
110
mlx5_ib_gsi_create_qp(struct ib_pd * pd,struct ib_qp_init_attr * init_attr)111 struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
112 struct ib_qp_init_attr *init_attr)
113 {
114 struct mlx5_ib_dev *dev = to_mdev(pd->device);
115 struct mlx5_ib_gsi_qp *gsi;
116 struct ib_qp_init_attr hw_init_attr = *init_attr;
117 const u8 port_num = init_attr->port_num;
118 const int num_pkeys = pd->device->attrs.max_pkeys;
119 const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0;
120 int ret;
121
122 mlx5_ib_dbg(dev, "creating GSI QP\n");
123
124 if (port_num > ARRAY_SIZE(dev->devr.ports) || port_num < 1) {
125 mlx5_ib_warn(dev,
126 "invalid port number %d during GSI QP creation\n",
127 port_num);
128 return ERR_PTR(-EINVAL);
129 }
130
131 gsi = kzalloc(sizeof(*gsi), GFP_KERNEL);
132 if (!gsi)
133 return ERR_PTR(-ENOMEM);
134
135 gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL);
136 if (!gsi->tx_qps) {
137 ret = -ENOMEM;
138 goto err_free;
139 }
140
141 gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr,
142 sizeof(*gsi->outstanding_wrs),
143 GFP_KERNEL);
144 if (!gsi->outstanding_wrs) {
145 ret = -ENOMEM;
146 goto err_free_tx;
147 }
148
149 mutex_init(&gsi->mutex);
150
151 mutex_lock(&dev->devr.mutex);
152
153 if (dev->devr.ports[port_num - 1].gsi) {
154 mlx5_ib_warn(dev, "GSI QP already exists on port %d\n",
155 port_num);
156 ret = -EBUSY;
157 goto err_free_wrs;
158 }
159 gsi->num_qps = num_qps;
160 spin_lock_init(&gsi->lock);
161
162 gsi->cap = init_attr->cap;
163 gsi->sq_sig_type = init_attr->sq_sig_type;
164 gsi->ibqp.qp_num = 1;
165 gsi->port_num = port_num;
166
167 gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0,
168 IB_POLL_SOFTIRQ);
169 if (IS_ERR(gsi->cq)) {
170 mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n",
171 PTR_ERR(gsi->cq));
172 ret = PTR_ERR(gsi->cq);
173 goto err_free_wrs;
174 }
175
176 hw_init_attr.qp_type = MLX5_IB_QPT_HW_GSI;
177 hw_init_attr.send_cq = gsi->cq;
178 if (num_qps) {
179 hw_init_attr.cap.max_send_wr = 0;
180 hw_init_attr.cap.max_send_sge = 0;
181 hw_init_attr.cap.max_inline_data = 0;
182 }
183 gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
184 if (IS_ERR(gsi->rx_qp)) {
185 mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
186 PTR_ERR(gsi->rx_qp));
187 ret = PTR_ERR(gsi->rx_qp);
188 goto err_destroy_cq;
189 }
190
191 dev->devr.ports[init_attr->port_num - 1].gsi = gsi;
192
193 mutex_unlock(&dev->devr.mutex);
194
195 return &gsi->ibqp;
196
197 err_destroy_cq:
198 ib_free_cq(gsi->cq);
199 err_free_wrs:
200 mutex_unlock(&dev->devr.mutex);
201 kfree(gsi->outstanding_wrs);
202 err_free_tx:
203 kfree(gsi->tx_qps);
204 err_free:
205 kfree(gsi);
206 return ERR_PTR(ret);
207 }
208
mlx5_ib_gsi_destroy_qp(struct ib_qp * qp)209 int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp)
210 {
211 struct mlx5_ib_dev *dev = to_mdev(qp->device);
212 struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
213 const int port_num = gsi->port_num;
214 int qp_index;
215 int ret;
216
217 mlx5_ib_dbg(dev, "destroying GSI QP\n");
218
219 mutex_lock(&dev->devr.mutex);
220 ret = ib_destroy_qp(gsi->rx_qp);
221 if (ret) {
222 mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n",
223 ret);
224 mutex_unlock(&dev->devr.mutex);
225 return ret;
226 }
227 dev->devr.ports[port_num - 1].gsi = NULL;
228 mutex_unlock(&dev->devr.mutex);
229 gsi->rx_qp = NULL;
230
231 for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) {
232 if (!gsi->tx_qps[qp_index])
233 continue;
234 WARN_ON_ONCE(ib_destroy_qp(gsi->tx_qps[qp_index]));
235 gsi->tx_qps[qp_index] = NULL;
236 }
237
238 ib_free_cq(gsi->cq);
239
240 kfree(gsi->outstanding_wrs);
241 kfree(gsi->tx_qps);
242 kfree(gsi);
243
244 return 0;
245 }
246
create_gsi_ud_qp(struct mlx5_ib_gsi_qp * gsi)247 static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
248 {
249 struct ib_pd *pd = gsi->rx_qp->pd;
250 struct ib_qp_init_attr init_attr = {
251 .event_handler = gsi->rx_qp->event_handler,
252 .qp_context = gsi->rx_qp->qp_context,
253 .send_cq = gsi->cq,
254 .recv_cq = gsi->rx_qp->recv_cq,
255 .cap = {
256 .max_send_wr = gsi->cap.max_send_wr,
257 .max_send_sge = gsi->cap.max_send_sge,
258 .max_inline_data = gsi->cap.max_inline_data,
259 },
260 .sq_sig_type = gsi->sq_sig_type,
261 .qp_type = IB_QPT_UD,
262 .create_flags = MLX5_IB_QP_CREATE_SQPN_QP1,
263 };
264
265 return ib_create_qp(pd, &init_attr);
266 }
267
modify_to_rts(struct mlx5_ib_gsi_qp * gsi,struct ib_qp * qp,u16 qp_index)268 static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp,
269 u16 qp_index)
270 {
271 struct mlx5_ib_dev *dev = to_mdev(qp->device);
272 struct ib_qp_attr attr;
273 int mask;
274 int ret;
275
276 mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT;
277 attr.qp_state = IB_QPS_INIT;
278 attr.pkey_index = qp_index;
279 attr.qkey = IB_QP1_QKEY;
280 attr.port_num = gsi->port_num;
281 ret = ib_modify_qp(qp, &attr, mask);
282 if (ret) {
283 mlx5_ib_err(dev, "could not change QP%d state to INIT: %d\n",
284 qp->qp_num, ret);
285 return ret;
286 }
287
288 attr.qp_state = IB_QPS_RTR;
289 ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
290 if (ret) {
291 mlx5_ib_err(dev, "could not change QP%d state to RTR: %d\n",
292 qp->qp_num, ret);
293 return ret;
294 }
295
296 attr.qp_state = IB_QPS_RTS;
297 attr.sq_psn = 0;
298 ret = ib_modify_qp(qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
299 if (ret) {
300 mlx5_ib_err(dev, "could not change QP%d state to RTS: %d\n",
301 qp->qp_num, ret);
302 return ret;
303 }
304
305 return 0;
306 }
307
setup_qp(struct mlx5_ib_gsi_qp * gsi,u16 qp_index)308 static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
309 {
310 struct ib_device *device = gsi->rx_qp->device;
311 struct mlx5_ib_dev *dev = to_mdev(device);
312 struct ib_qp *qp;
313 unsigned long flags;
314 u16 pkey;
315 int ret;
316
317 ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey);
318 if (ret) {
319 mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n",
320 gsi->port_num, qp_index);
321 return;
322 }
323
324 if (!pkey) {
325 mlx5_ib_dbg(dev, "invalid P_Key at port %d, index %d. Skipping.\n",
326 gsi->port_num, qp_index);
327 return;
328 }
329
330 spin_lock_irqsave(&gsi->lock, flags);
331 qp = gsi->tx_qps[qp_index];
332 spin_unlock_irqrestore(&gsi->lock, flags);
333 if (qp) {
334 mlx5_ib_dbg(dev, "already existing GSI TX QP at port %d, index %d. Skipping\n",
335 gsi->port_num, qp_index);
336 return;
337 }
338
339 qp = create_gsi_ud_qp(gsi);
340 if (IS_ERR(qp)) {
341 mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n",
342 PTR_ERR(qp));
343 return;
344 }
345
346 ret = modify_to_rts(gsi, qp, qp_index);
347 if (ret)
348 goto err_destroy_qp;
349
350 spin_lock_irqsave(&gsi->lock, flags);
351 WARN_ON_ONCE(gsi->tx_qps[qp_index]);
352 gsi->tx_qps[qp_index] = qp;
353 spin_unlock_irqrestore(&gsi->lock, flags);
354
355 return;
356
357 err_destroy_qp:
358 WARN_ON_ONCE(qp);
359 }
360
setup_qps(struct mlx5_ib_gsi_qp * gsi)361 static void setup_qps(struct mlx5_ib_gsi_qp *gsi)
362 {
363 u16 qp_index;
364
365 for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
366 setup_qp(gsi, qp_index);
367 }
368
mlx5_ib_gsi_modify_qp(struct ib_qp * qp,struct ib_qp_attr * attr,int attr_mask)369 int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
370 int attr_mask)
371 {
372 struct mlx5_ib_dev *dev = to_mdev(qp->device);
373 struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
374 int ret;
375
376 mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state);
377
378 mutex_lock(&gsi->mutex);
379 ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask);
380 if (ret) {
381 mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret);
382 goto unlock;
383 }
384
385 if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS)
386 setup_qps(gsi);
387
388 unlock:
389 mutex_unlock(&gsi->mutex);
390
391 return ret;
392 }
393
mlx5_ib_gsi_query_qp(struct ib_qp * qp,struct ib_qp_attr * qp_attr,int qp_attr_mask,struct ib_qp_init_attr * qp_init_attr)394 int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
395 int qp_attr_mask,
396 struct ib_qp_init_attr *qp_init_attr)
397 {
398 struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
399 int ret;
400
401 mutex_lock(&gsi->mutex);
402 ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr);
403 qp_init_attr->cap = gsi->cap;
404 mutex_unlock(&gsi->mutex);
405
406 return ret;
407 }
408
409 /* Call with gsi->lock locked */
mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp * gsi,struct ib_ud_wr * wr,struct ib_wc * wc)410 static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
411 struct ib_ud_wr *wr, struct ib_wc *wc)
412 {
413 struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
414 struct mlx5_ib_gsi_wr *gsi_wr;
415
416 if (gsi->outstanding_pi == gsi->outstanding_ci + gsi->cap.max_send_wr) {
417 mlx5_ib_warn(dev, "no available GSI work request.\n");
418 return -ENOMEM;
419 }
420
421 gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi %
422 gsi->cap.max_send_wr];
423 gsi->outstanding_pi++;
424
425 if (!wc) {
426 memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc));
427 gsi_wr->wc.pkey_index = wr->pkey_index;
428 gsi_wr->wc.wr_id = wr->wr.wr_id;
429 } else {
430 gsi_wr->wc = *wc;
431 gsi_wr->completed = true;
432 }
433
434 gsi_wr->cqe.done = &handle_single_completion;
435 wr->wr.wr_cqe = &gsi_wr->cqe;
436
437 return 0;
438 }
439
440 /* Call with gsi->lock locked */
mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp * gsi,struct ib_ud_wr * wr)441 static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi,
442 struct ib_ud_wr *wr)
443 {
444 struct ib_wc wc = {
445 { .wr_id = wr->wr.wr_id },
446 .status = IB_WC_SUCCESS,
447 .opcode = IB_WC_SEND,
448 .qp = &gsi->ibqp,
449 };
450 int ret;
451
452 ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc);
453 if (ret)
454 return ret;
455
456 generate_completions(gsi);
457
458 return 0;
459 }
460
461 /* Call with gsi->lock locked */
get_tx_qp(struct mlx5_ib_gsi_qp * gsi,struct ib_ud_wr * wr)462 static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
463 {
464 struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
465 int qp_index = wr->pkey_index;
466
467 if (!mlx5_ib_deth_sqpn_cap(dev))
468 return gsi->rx_qp;
469
470 if (qp_index >= gsi->num_qps)
471 return NULL;
472
473 return gsi->tx_qps[qp_index];
474 }
475
mlx5_ib_gsi_post_send(struct ib_qp * qp,const struct ib_send_wr * wr,const struct ib_send_wr ** bad_wr)476 int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
477 const struct ib_send_wr **bad_wr)
478 {
479 struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
480 struct ib_qp *tx_qp;
481 unsigned long flags;
482 int ret;
483
484 for (; wr; wr = wr->next) {
485 struct ib_ud_wr cur_wr = *ud_wr(wr);
486
487 cur_wr.wr.next = NULL;
488
489 spin_lock_irqsave(&gsi->lock, flags);
490 tx_qp = get_tx_qp(gsi, &cur_wr);
491 if (!tx_qp) {
492 ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr);
493 if (ret)
494 goto err;
495 spin_unlock_irqrestore(&gsi->lock, flags);
496 continue;
497 }
498
499 ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL);
500 if (ret)
501 goto err;
502
503 ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr);
504 if (ret) {
505 /* Undo the effect of adding the outstanding wr */
506 gsi->outstanding_pi = (gsi->outstanding_pi - 1) %
507 gsi->cap.max_send_wr;
508 goto err;
509 }
510 spin_unlock_irqrestore(&gsi->lock, flags);
511 }
512
513 return 0;
514
515 err:
516 spin_unlock_irqrestore(&gsi->lock, flags);
517 *bad_wr = wr;
518 return ret;
519 }
520
mlx5_ib_gsi_post_recv(struct ib_qp * qp,const struct ib_recv_wr * wr,const struct ib_recv_wr ** bad_wr)521 int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
522 const struct ib_recv_wr **bad_wr)
523 {
524 struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
525
526 return ib_post_recv(gsi->rx_qp, wr, bad_wr);
527 }
528
mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp * gsi)529 void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi)
530 {
531 if (!gsi)
532 return;
533
534 mutex_lock(&gsi->mutex);
535 setup_qps(gsi);
536 mutex_unlock(&gsi->mutex);
537 }
538