1 /*
2 * Copyright (c) 2007 Cisco, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <config.h>
34
35 #include <infiniband/endian.h>
36 #include <stdlib.h>
37 #include <stdio.h>
38 #include <string.h>
39 #include <pthread.h>
40 #include <errno.h>
41
42 #include "mlx4.h"
43 #include "mlx4-abi.h"
44 #include "wqe.h"
45
mlx4_query_device(struct ibv_context * context,struct ibv_device_attr * attr)46 int mlx4_query_device(struct ibv_context *context, struct ibv_device_attr *attr)
47 {
48 struct ibv_query_device cmd;
49 uint64_t raw_fw_ver;
50 unsigned major, minor, sub_minor;
51 int ret;
52
53 ret = ibv_cmd_query_device(context, attr, &raw_fw_ver, &cmd, sizeof cmd);
54 if (ret)
55 return ret;
56
57 major = (raw_fw_ver >> 32) & 0xffff;
58 minor = (raw_fw_ver >> 16) & 0xffff;
59 sub_minor = raw_fw_ver & 0xffff;
60
61 snprintf(attr->fw_ver, sizeof attr->fw_ver,
62 "%d.%d.%03d", major, minor, sub_minor);
63
64 return 0;
65 }
66
mlx4_query_device_ex(struct ibv_context * context,const struct ibv_query_device_ex_input * input,struct ibv_device_attr_ex * attr,size_t attr_size)67 int mlx4_query_device_ex(struct ibv_context *context,
68 const struct ibv_query_device_ex_input *input,
69 struct ibv_device_attr_ex *attr,
70 size_t attr_size)
71 {
72 struct mlx4_context *mctx = to_mctx(context);
73 struct mlx4_query_device_ex_resp resp = {};
74 struct mlx4_query_device_ex cmd = {};
75 uint64_t raw_fw_ver;
76 unsigned sub_minor;
77 unsigned major;
78 unsigned minor;
79 int err;
80
81 err = ibv_cmd_query_device_ex(context, input, attr, attr_size,
82 &raw_fw_ver,
83 &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd),
84 &resp.ibv_resp, sizeof(resp.ibv_resp),
85 sizeof(resp));
86 if (err)
87 return err;
88
89 if (resp.comp_mask & MLX4_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET) {
90 mctx->core_clock.offset = resp.hca_core_clock_offset;
91 mctx->core_clock.offset_valid = 1;
92 }
93
94 major = (raw_fw_ver >> 32) & 0xffff;
95 minor = (raw_fw_ver >> 16) & 0xffff;
96 sub_minor = raw_fw_ver & 0xffff;
97
98 snprintf(attr->orig_attr.fw_ver, sizeof attr->orig_attr.fw_ver,
99 "%d.%d.%03d", major, minor, sub_minor);
100
101 return 0;
102 }
103
104 #define READL(ptr) (*((uint32_t *)(ptr)))
mlx4_read_clock(struct ibv_context * context,uint64_t * cycles)105 static int mlx4_read_clock(struct ibv_context *context, uint64_t *cycles)
106 {
107 unsigned int clockhi, clocklo, clockhi1;
108 int i;
109 struct mlx4_context *ctx = to_mctx(context);
110
111 if (!ctx->hca_core_clock)
112 return -EOPNOTSUPP;
113
114 /* Handle wraparound */
115 for (i = 0; i < 2; i++) {
116 clockhi = be32toh(READL(ctx->hca_core_clock));
117 clocklo = be32toh(READL(ctx->hca_core_clock + 4));
118 clockhi1 = be32toh(READL(ctx->hca_core_clock));
119 if (clockhi == clockhi1)
120 break;
121 }
122
123 *cycles = (uint64_t)clockhi << 32 | (uint64_t)clocklo;
124
125 return 0;
126 }
127
mlx4_query_rt_values(struct ibv_context * context,struct ibv_values_ex * values)128 int mlx4_query_rt_values(struct ibv_context *context,
129 struct ibv_values_ex *values)
130 {
131 uint32_t comp_mask = 0;
132 int err = 0;
133
134 if (values->comp_mask & IBV_VALUES_MASK_RAW_CLOCK) {
135 uint64_t cycles;
136
137 err = mlx4_read_clock(context, &cycles);
138 if (!err) {
139 values->raw_clock.tv_sec = 0;
140 values->raw_clock.tv_nsec = cycles;
141 comp_mask |= IBV_VALUES_MASK_RAW_CLOCK;
142 }
143 }
144
145 values->comp_mask = comp_mask;
146
147 return err;
148 }
149
mlx4_query_port(struct ibv_context * context,uint8_t port,struct ibv_port_attr * attr)150 int mlx4_query_port(struct ibv_context *context, uint8_t port,
151 struct ibv_port_attr *attr)
152 {
153 struct ibv_query_port cmd;
154 int err;
155
156 err = ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd));
157 if (!err && port <= MLX4_PORTS_NUM && port > 0) {
158 struct mlx4_context *mctx = to_mctx(context);
159 if (!mctx->port_query_cache[port - 1].valid) {
160 mctx->port_query_cache[port - 1].link_layer =
161 attr->link_layer;
162 mctx->port_query_cache[port - 1].caps =
163 attr->port_cap_flags;
164 mctx->port_query_cache[port - 1].valid = 1;
165 }
166 }
167
168 return err;
169 }
170
171 /* Only the fields in the port cache will be valid */
query_port_cache(struct ibv_context * context,uint8_t port_num,struct ibv_port_attr * port_attr)172 static int query_port_cache(struct ibv_context *context, uint8_t port_num,
173 struct ibv_port_attr *port_attr)
174 {
175 struct mlx4_context *mctx = to_mctx(context);
176 if (port_num <= 0 || port_num > MLX4_PORTS_NUM)
177 return -EINVAL;
178 if (mctx->port_query_cache[port_num - 1].valid) {
179 port_attr->link_layer =
180 mctx->
181 port_query_cache[port_num - 1].
182 link_layer;
183 port_attr->port_cap_flags =
184 mctx->
185 port_query_cache[port_num - 1].
186 caps;
187 return 0;
188 }
189 return mlx4_query_port(context, port_num,
190 (struct ibv_port_attr *)port_attr);
191
192 }
193
mlx4_alloc_pd(struct ibv_context * context)194 struct ibv_pd *mlx4_alloc_pd(struct ibv_context *context)
195 {
196 struct ibv_alloc_pd cmd;
197 struct mlx4_alloc_pd_resp resp;
198 struct mlx4_pd *pd;
199
200 pd = malloc(sizeof *pd);
201 if (!pd)
202 return NULL;
203
204 if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof cmd,
205 &resp.ibv_resp, sizeof resp)) {
206 free(pd);
207 return NULL;
208 }
209
210 pd->pdn = resp.pdn;
211
212 return &pd->ibv_pd;
213 }
214
mlx4_free_pd(struct ibv_pd * pd)215 int mlx4_free_pd(struct ibv_pd *pd)
216 {
217 int ret;
218
219 ret = ibv_cmd_dealloc_pd(pd);
220 if (ret)
221 return ret;
222
223 free(to_mpd(pd));
224 return 0;
225 }
226
mlx4_open_xrcd(struct ibv_context * context,struct ibv_xrcd_init_attr * attr)227 struct ibv_xrcd *mlx4_open_xrcd(struct ibv_context *context,
228 struct ibv_xrcd_init_attr *attr)
229 {
230 struct ibv_open_xrcd cmd;
231 struct ibv_open_xrcd_resp resp;
232 struct verbs_xrcd *xrcd;
233 int ret;
234
235 xrcd = calloc(1, sizeof *xrcd);
236 if (!xrcd)
237 return NULL;
238
239 ret = ibv_cmd_open_xrcd(context, xrcd, sizeof(*xrcd), attr,
240 &cmd, sizeof cmd, &resp, sizeof resp);
241 if (ret)
242 goto err;
243
244 return &xrcd->xrcd;
245
246 err:
247 free(xrcd);
248 return NULL;
249 }
250
mlx4_close_xrcd(struct ibv_xrcd * ib_xrcd)251 int mlx4_close_xrcd(struct ibv_xrcd *ib_xrcd)
252 {
253 struct verbs_xrcd *xrcd = container_of(ib_xrcd, struct verbs_xrcd, xrcd);
254 int ret;
255
256 ret = ibv_cmd_close_xrcd(xrcd);
257 if (!ret)
258 free(xrcd);
259
260 return ret;
261 }
262
mlx4_reg_mr(struct ibv_pd * pd,void * addr,size_t length,int access)263 struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
264 int access)
265 {
266 struct ibv_mr *mr;
267 struct ibv_reg_mr cmd;
268 struct ibv_reg_mr_resp resp;
269 int ret;
270
271 mr = malloc(sizeof *mr);
272 if (!mr)
273 return NULL;
274
275 ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr,
276 access, mr, &cmd, sizeof cmd,
277 &resp, sizeof resp);
278 if (ret) {
279 free(mr);
280 return NULL;
281 }
282
283 return mr;
284 }
285
mlx4_rereg_mr(struct ibv_mr * mr,int flags,struct ibv_pd * pd,void * addr,size_t length,int access)286 int mlx4_rereg_mr(struct ibv_mr *mr,
287 int flags,
288 struct ibv_pd *pd, void *addr,
289 size_t length, int access)
290 {
291 struct ibv_rereg_mr cmd;
292 struct ibv_rereg_mr_resp resp;
293
294 if (flags & IBV_REREG_MR_KEEP_VALID)
295 return ENOTSUP;
296
297 return ibv_cmd_rereg_mr(mr, flags, addr, length,
298 (uintptr_t)addr,
299 access, pd,
300 &cmd, sizeof(cmd),
301 &resp, sizeof(resp));
302 }
303
mlx4_dereg_mr(struct ibv_mr * mr)304 int mlx4_dereg_mr(struct ibv_mr *mr)
305 {
306 int ret;
307
308 ret = ibv_cmd_dereg_mr(mr);
309 if (ret)
310 return ret;
311
312 free(mr);
313 return 0;
314 }
315
mlx4_alloc_mw(struct ibv_pd * pd,enum ibv_mw_type type)316 struct ibv_mw *mlx4_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type)
317 {
318 struct ibv_mw *mw;
319 struct ibv_alloc_mw cmd;
320 struct ibv_alloc_mw_resp resp;
321 int ret;
322
323 mw = calloc(1, sizeof(*mw));
324 if (!mw)
325 return NULL;
326
327 ret = ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd),
328 &resp, sizeof(resp));
329
330 if (ret) {
331 free(mw);
332 return NULL;
333 }
334
335 return mw;
336 }
337
mlx4_dealloc_mw(struct ibv_mw * mw)338 int mlx4_dealloc_mw(struct ibv_mw *mw)
339 {
340 int ret;
341 struct ibv_dealloc_mw cmd;
342
343 ret = ibv_cmd_dealloc_mw(mw, &cmd, sizeof(cmd));
344 if (ret)
345 return ret;
346
347 free(mw);
348 return 0;
349 }
350
mlx4_bind_mw(struct ibv_qp * qp,struct ibv_mw * mw,struct ibv_mw_bind * mw_bind)351 int mlx4_bind_mw(struct ibv_qp *qp, struct ibv_mw *mw,
352 struct ibv_mw_bind *mw_bind)
353 {
354 struct ibv_send_wr *bad_wr = NULL;
355 struct ibv_send_wr wr = { };
356 int ret;
357
358
359 wr.opcode = IBV_WR_BIND_MW;
360 wr.next = NULL;
361
362 wr.wr_id = mw_bind->wr_id;
363 wr.send_flags = mw_bind->send_flags;
364
365 wr.bind_mw.mw = mw;
366 wr.bind_mw.rkey = ibv_inc_rkey(mw->rkey);
367 wr.bind_mw.bind_info = mw_bind->bind_info;
368
369 ret = mlx4_post_send(qp, &wr, &bad_wr);
370
371 if (ret)
372 return ret;
373
374 /* updating the mw with the latest rkey. */
375 mw->rkey = wr.bind_mw.rkey;
376
377 return 0;
378 }
379
align_queue_size(int req)380 int align_queue_size(int req)
381 {
382 int nent;
383
384 for (nent = 1; nent < req; nent <<= 1)
385 ; /* nothing */
386
387 return nent;
388 }
389
390 enum {
391 CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS |
392 IBV_WC_EX_WITH_COMPLETION_TIMESTAMP
393 };
394
395 enum {
396 CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS
397 };
398
399 enum {
400 CREATE_CQ_SUPPORTED_FLAGS = IBV_CREATE_CQ_ATTR_SINGLE_THREADED
401 };
402
403
mlx4_cmd_create_cq(struct ibv_context * context,struct ibv_cq_init_attr_ex * cq_attr,struct mlx4_cq * cq)404 static int mlx4_cmd_create_cq(struct ibv_context *context,
405 struct ibv_cq_init_attr_ex *cq_attr,
406 struct mlx4_cq *cq)
407 {
408 struct mlx4_create_cq cmd = {};
409 struct mlx4_create_cq_resp resp = {};
410 int ret;
411
412 cmd.buf_addr = (uintptr_t) cq->buf.buf;
413 cmd.db_addr = (uintptr_t) cq->set_ci_db;
414
415 ret = ibv_cmd_create_cq(context, cq_attr->cqe, cq_attr->channel,
416 cq_attr->comp_vector,
417 ibv_cq_ex_to_cq(&cq->ibv_cq),
418 &cmd.ibv_cmd, sizeof(cmd),
419 &resp.ibv_resp, sizeof(resp));
420 if (!ret)
421 cq->cqn = resp.cqn;
422
423 return ret;
424
425 }
426
mlx4_cmd_create_cq_ex(struct ibv_context * context,struct ibv_cq_init_attr_ex * cq_attr,struct mlx4_cq * cq)427 static int mlx4_cmd_create_cq_ex(struct ibv_context *context,
428 struct ibv_cq_init_attr_ex *cq_attr,
429 struct mlx4_cq *cq)
430 {
431 struct mlx4_create_cq_ex cmd = {};
432 struct mlx4_create_cq_resp_ex resp = {};
433 int ret;
434
435 cmd.buf_addr = (uintptr_t) cq->buf.buf;
436 cmd.db_addr = (uintptr_t) cq->set_ci_db;
437
438 ret = ibv_cmd_create_cq_ex(context, cq_attr,
439 &cq->ibv_cq, &cmd.ibv_cmd,
440 sizeof(cmd.ibv_cmd),
441 sizeof(cmd),
442 &resp.ibv_resp,
443 sizeof(resp.ibv_resp),
444 sizeof(resp));
445 if (!ret)
446 cq->cqn = resp.cqn;
447
448 return ret;
449 }
450
create_cq(struct ibv_context * context,struct ibv_cq_init_attr_ex * cq_attr,int cq_alloc_flags)451 static struct ibv_cq_ex *create_cq(struct ibv_context *context,
452 struct ibv_cq_init_attr_ex *cq_attr,
453 int cq_alloc_flags)
454 {
455 struct mlx4_cq *cq;
456 int ret;
457 struct mlx4_context *mctx = to_mctx(context);
458
459 /* Sanity check CQ size before proceeding */
460 if (cq_attr->cqe > 0x3fffff) {
461 errno = EINVAL;
462 return NULL;
463 }
464
465 if (cq_attr->comp_mask & ~CREATE_CQ_SUPPORTED_COMP_MASK) {
466 errno = ENOTSUP;
467 return NULL;
468 }
469
470 if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS &&
471 cq_attr->flags & ~CREATE_CQ_SUPPORTED_FLAGS) {
472 errno = ENOTSUP;
473 return NULL;
474 }
475
476 if (cq_attr->wc_flags & ~CREATE_CQ_SUPPORTED_WC_FLAGS)
477 return NULL;
478
479 /* mlx4 devices don't support slid and sl in cqe when completion
480 * timestamp is enabled in the CQ
481 */
482 if ((cq_attr->wc_flags & (IBV_WC_EX_WITH_SLID | IBV_WC_EX_WITH_SL)) &&
483 (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP)) {
484 errno = ENOTSUP;
485 return NULL;
486 }
487
488 cq = malloc(sizeof *cq);
489 if (!cq)
490 return NULL;
491
492 cq->cons_index = 0;
493
494 if (pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE))
495 goto err;
496
497 cq_attr->cqe = align_queue_size(cq_attr->cqe + 1);
498
499 if (mlx4_alloc_cq_buf(to_mdev(context->device), &cq->buf, cq_attr->cqe, mctx->cqe_size))
500 goto err_spl;
501
502 cq->cqe_size = mctx->cqe_size;
503 cq->set_ci_db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_CQ);
504 if (!cq->set_ci_db)
505 goto err_buf;
506
507 cq->arm_db = cq->set_ci_db + 1;
508 *cq->arm_db = 0;
509 cq->arm_sn = 1;
510 *cq->set_ci_db = 0;
511 cq->flags = cq_alloc_flags;
512
513 if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS &&
514 cq_attr->flags & IBV_CREATE_CQ_ATTR_SINGLE_THREADED)
515 cq->flags |= MLX4_CQ_FLAGS_SINGLE_THREADED;
516
517 --cq_attr->cqe;
518 if (cq_alloc_flags & MLX4_CQ_FLAGS_EXTENDED)
519 ret = mlx4_cmd_create_cq_ex(context, cq_attr, cq);
520 else
521 ret = mlx4_cmd_create_cq(context, cq_attr, cq);
522
523 if (ret)
524 goto err_db;
525
526
527 if (cq_alloc_flags & MLX4_CQ_FLAGS_EXTENDED)
528 mlx4_cq_fill_pfns(cq, cq_attr);
529
530 return &cq->ibv_cq;
531
532 err_db:
533 mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_CQ, cq->set_ci_db);
534
535 err_buf:
536 mlx4_free_buf(&cq->buf);
537
538 err_spl:
539 pthread_spin_destroy(&cq->lock);
540
541 err:
542 free(cq);
543
544 return NULL;
545 }
546
mlx4_create_cq(struct ibv_context * context,int cqe,struct ibv_comp_channel * channel,int comp_vector)547 struct ibv_cq *mlx4_create_cq(struct ibv_context *context, int cqe,
548 struct ibv_comp_channel *channel,
549 int comp_vector)
550 {
551 struct ibv_cq_ex *cq;
552 struct ibv_cq_init_attr_ex cq_attr = {.cqe = cqe, .channel = channel,
553 .comp_vector = comp_vector,
554 .wc_flags = IBV_WC_STANDARD_FLAGS};
555
556 cq = create_cq(context, &cq_attr, 0);
557 return cq ? ibv_cq_ex_to_cq(cq) : NULL;
558 }
559
mlx4_create_cq_ex(struct ibv_context * context,struct ibv_cq_init_attr_ex * cq_attr)560 struct ibv_cq_ex *mlx4_create_cq_ex(struct ibv_context *context,
561 struct ibv_cq_init_attr_ex *cq_attr)
562 {
563 /*
564 * Make local copy since some attributes might be adjusted
565 * for internal use.
566 */
567 struct ibv_cq_init_attr_ex cq_attr_c = {.cqe = cq_attr->cqe,
568 .channel = cq_attr->channel,
569 .comp_vector = cq_attr->comp_vector,
570 .wc_flags = cq_attr->wc_flags,
571 .comp_mask = cq_attr->comp_mask,
572 .flags = cq_attr->flags};
573
574 return create_cq(context, &cq_attr_c, MLX4_CQ_FLAGS_EXTENDED);
575 }
576
mlx4_resize_cq(struct ibv_cq * ibcq,int cqe)577 int mlx4_resize_cq(struct ibv_cq *ibcq, int cqe)
578 {
579 struct mlx4_cq *cq = to_mcq(ibcq);
580 struct mlx4_resize_cq cmd;
581 struct ibv_resize_cq_resp resp;
582 struct mlx4_buf buf;
583 int old_cqe, outst_cqe, ret;
584
585 /* Sanity check CQ size before proceeding */
586 if (cqe > 0x3fffff)
587 return EINVAL;
588
589 pthread_spin_lock(&cq->lock);
590
591 cqe = align_queue_size(cqe + 1);
592 if (cqe == ibcq->cqe + 1) {
593 ret = 0;
594 goto out;
595 }
596
597 /* Can't be smaller then the number of outstanding CQEs */
598 outst_cqe = mlx4_get_outstanding_cqes(cq);
599 if (cqe < outst_cqe + 1) {
600 ret = EINVAL;
601 goto out;
602 }
603
604 ret = mlx4_alloc_cq_buf(to_mdev(ibcq->context->device), &buf, cqe, cq->cqe_size);
605 if (ret)
606 goto out;
607
608 old_cqe = ibcq->cqe;
609 cmd.buf_addr = (uintptr_t) buf.buf;
610
611 ret = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof cmd,
612 &resp, sizeof resp);
613 if (ret) {
614 mlx4_free_buf(&buf);
615 goto out;
616 }
617
618 mlx4_cq_resize_copy_cqes(cq, buf.buf, old_cqe);
619
620 mlx4_free_buf(&cq->buf);
621 cq->buf = buf;
622 mlx4_update_cons_index(cq);
623
624 out:
625 pthread_spin_unlock(&cq->lock);
626 return ret;
627 }
628
mlx4_destroy_cq(struct ibv_cq * cq)629 int mlx4_destroy_cq(struct ibv_cq *cq)
630 {
631 int ret;
632
633 ret = ibv_cmd_destroy_cq(cq);
634 if (ret)
635 return ret;
636
637 verbs_cleanup_cq(cq);
638 pthread_spin_destroy(&to_mcq(cq)->lock);
639 mlx4_free_db(to_mctx(cq->context), MLX4_DB_TYPE_CQ, to_mcq(cq)->set_ci_db);
640 mlx4_free_buf(&to_mcq(cq)->buf);
641 free(to_mcq(cq));
642
643 return 0;
644 }
645
mlx4_create_srq(struct ibv_pd * pd,struct ibv_srq_init_attr * attr)646 struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
647 struct ibv_srq_init_attr *attr)
648 {
649 struct mlx4_create_srq cmd;
650 struct mlx4_create_srq_resp resp;
651 struct mlx4_srq *srq;
652 int ret;
653
654 /* Sanity check SRQ size before proceeding */
655 if (attr->attr.max_wr > 1 << 16 || attr->attr.max_sge > 64)
656 return NULL;
657
658 srq = malloc(sizeof *srq);
659 if (!srq)
660 return NULL;
661
662 if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
663 goto err;
664
665 srq->max = align_queue_size(attr->attr.max_wr + 1);
666 srq->max_gs = attr->attr.max_sge;
667 srq->counter = 0;
668 srq->ext_srq = 0;
669
670 if (mlx4_alloc_srq_buf(pd, &attr->attr, srq))
671 goto err_spl;
672
673 srq->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
674 if (!srq->db)
675 goto err_free;
676
677 *srq->db = 0;
678
679 cmd.buf_addr = (uintptr_t) srq->buf.buf;
680 cmd.db_addr = (uintptr_t) srq->db;
681
682 ret = ibv_cmd_create_srq(pd, &srq->verbs_srq.srq, attr,
683 &cmd.ibv_cmd, sizeof cmd,
684 &resp.ibv_resp, sizeof resp);
685 if (ret)
686 goto err_db;
687
688 return &srq->verbs_srq.srq;
689
690 err_db:
691 mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db);
692
693 err_free:
694 free(srq->wrid);
695 mlx4_free_buf(&srq->buf);
696
697 err_spl:
698 pthread_spin_destroy(&srq->lock);
699
700 err:
701 free(srq);
702
703 return NULL;
704 }
705
mlx4_create_srq_ex(struct ibv_context * context,struct ibv_srq_init_attr_ex * attr_ex)706 struct ibv_srq *mlx4_create_srq_ex(struct ibv_context *context,
707 struct ibv_srq_init_attr_ex *attr_ex)
708 {
709 if (!(attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) ||
710 (attr_ex->srq_type == IBV_SRQT_BASIC))
711 return mlx4_create_srq(attr_ex->pd, (struct ibv_srq_init_attr *) attr_ex);
712 else if (attr_ex->srq_type == IBV_SRQT_XRC)
713 return mlx4_create_xrc_srq(context, attr_ex);
714
715 return NULL;
716 }
717
mlx4_modify_srq(struct ibv_srq * srq,struct ibv_srq_attr * attr,int attr_mask)718 int mlx4_modify_srq(struct ibv_srq *srq,
719 struct ibv_srq_attr *attr,
720 int attr_mask)
721 {
722 struct ibv_modify_srq cmd;
723
724 return ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof cmd);
725 }
726
mlx4_query_srq(struct ibv_srq * srq,struct ibv_srq_attr * attr)727 int mlx4_query_srq(struct ibv_srq *srq,
728 struct ibv_srq_attr *attr)
729 {
730 struct ibv_query_srq cmd;
731
732 return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd);
733 }
734
mlx4_destroy_srq(struct ibv_srq * srq)735 int mlx4_destroy_srq(struct ibv_srq *srq)
736 {
737 int ret;
738
739 if (to_msrq(srq)->ext_srq)
740 return mlx4_destroy_xrc_srq(srq);
741
742 ret = ibv_cmd_destroy_srq(srq);
743 if (ret)
744 return ret;
745
746 mlx4_free_db(to_mctx(srq->context), MLX4_DB_TYPE_RQ, to_msrq(srq)->db);
747 mlx4_free_buf(&to_msrq(srq)->buf);
748 free(to_msrq(srq)->wrid);
749 pthread_spin_destroy(&to_msrq(srq)->lock);
750 free(to_msrq(srq));
751
752 return 0;
753 }
754
mlx4_cmd_create_qp_ex(struct ibv_context * context,struct ibv_qp_init_attr_ex * attr,struct mlx4_create_qp * cmd,struct mlx4_qp * qp)755 static int mlx4_cmd_create_qp_ex(struct ibv_context *context,
756 struct ibv_qp_init_attr_ex *attr,
757 struct mlx4_create_qp *cmd,
758 struct mlx4_qp *qp)
759 {
760 struct mlx4_create_qp_ex cmd_ex;
761 struct mlx4_create_qp_resp_ex resp;
762 int ret;
763
764 memset(&cmd_ex, 0, sizeof(cmd_ex));
765 memcpy(&cmd_ex.ibv_cmd.base, &cmd->ibv_cmd.user_handle,
766 offsetof(typeof(cmd->ibv_cmd), is_srq) +
767 sizeof(cmd->ibv_cmd.is_srq) -
768 offsetof(typeof(cmd->ibv_cmd), user_handle));
769
770 memcpy(&cmd_ex.drv_ex, &cmd->buf_addr,
771 offsetof(typeof(*cmd), sq_no_prefetch) +
772 sizeof(cmd->sq_no_prefetch) - sizeof(cmd->ibv_cmd));
773
774 ret = ibv_cmd_create_qp_ex2(context, &qp->verbs_qp,
775 sizeof(qp->verbs_qp), attr,
776 &cmd_ex.ibv_cmd, sizeof(cmd_ex.ibv_cmd),
777 sizeof(cmd_ex), &resp.ibv_resp,
778 sizeof(resp.ibv_resp), sizeof(resp));
779 return ret;
780 }
781
782 enum {
783 MLX4_CREATE_QP_SUP_COMP_MASK = (IBV_QP_INIT_ATTR_PD |
784 IBV_QP_INIT_ATTR_XRCD |
785 IBV_QP_INIT_ATTR_CREATE_FLAGS),
786 };
787
788 enum {
789 MLX4_CREATE_QP_EX2_COMP_MASK = (IBV_QP_INIT_ATTR_CREATE_FLAGS),
790 };
791
mlx4_create_qp_ex(struct ibv_context * context,struct ibv_qp_init_attr_ex * attr)792 struct ibv_qp *mlx4_create_qp_ex(struct ibv_context *context,
793 struct ibv_qp_init_attr_ex *attr)
794 {
795 struct mlx4_context *ctx = to_mctx(context);
796 struct mlx4_create_qp cmd;
797 struct ibv_create_qp_resp resp;
798 struct mlx4_qp *qp;
799 int ret;
800
801 /* Sanity check QP size before proceeding */
802 if (ctx->max_qp_wr) { /* mlx4_query_device succeeded */
803 if (attr->cap.max_send_wr > ctx->max_qp_wr ||
804 attr->cap.max_recv_wr > ctx->max_qp_wr ||
805 attr->cap.max_send_sge > ctx->max_sge ||
806 attr->cap.max_recv_sge > ctx->max_sge)
807 return NULL;
808 } else {
809 if (attr->cap.max_send_wr > 65536 ||
810 attr->cap.max_recv_wr > 65536 ||
811 attr->cap.max_send_sge > 64 ||
812 attr->cap.max_recv_sge > 64)
813 return NULL;
814 }
815 if (attr->cap.max_inline_data > 1024)
816 return NULL;
817
818 if (attr->comp_mask & ~MLX4_CREATE_QP_SUP_COMP_MASK)
819 return NULL;
820
821 qp = calloc(1, sizeof *qp);
822 if (!qp)
823 return NULL;
824
825 if (attr->qp_type == IBV_QPT_XRC_RECV) {
826 attr->cap.max_send_wr = qp->sq.wqe_cnt = 0;
827 } else {
828 mlx4_calc_sq_wqe_size(&attr->cap, attr->qp_type, qp);
829 /*
830 * We need to leave 2 KB + 1 WQE of headroom in the SQ to
831 * allow HW to prefetch.
832 */
833 qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
834 qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes);
835 }
836
837 if (attr->srq || attr->qp_type == IBV_QPT_XRC_SEND ||
838 attr->qp_type == IBV_QPT_XRC_RECV) {
839 attr->cap.max_recv_wr = qp->rq.wqe_cnt = attr->cap.max_recv_sge = 0;
840 } else {
841 qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr);
842 if (attr->cap.max_recv_sge < 1)
843 attr->cap.max_recv_sge = 1;
844 if (attr->cap.max_recv_wr < 1)
845 attr->cap.max_recv_wr = 1;
846 }
847
848 if (mlx4_alloc_qp_buf(context, &attr->cap, attr->qp_type, qp))
849 goto err;
850
851 mlx4_init_qp_indices(qp);
852
853 if (pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE))
854 goto err_free;
855 if (pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
856 goto err_sq_spl;
857
858 if (attr->cap.max_recv_sge) {
859 qp->db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_RQ);
860 if (!qp->db)
861 goto err_rq_spl;
862
863 *qp->db = 0;
864 cmd.db_addr = (uintptr_t) qp->db;
865 } else {
866 cmd.db_addr = 0;
867 }
868
869 cmd.buf_addr = (uintptr_t) qp->buf.buf;
870 cmd.log_sq_stride = qp->sq.wqe_shift;
871 for (cmd.log_sq_bb_count = 0;
872 qp->sq.wqe_cnt > 1 << cmd.log_sq_bb_count;
873 ++cmd.log_sq_bb_count)
874 ; /* nothing */
875 cmd.sq_no_prefetch = 0; /* OK for ABI 2: just a reserved field */
876 memset(cmd.reserved, 0, sizeof cmd.reserved);
877 pthread_mutex_lock(&to_mctx(context)->qp_table_mutex);
878
879 if (attr->comp_mask & MLX4_CREATE_QP_EX2_COMP_MASK)
880 ret = mlx4_cmd_create_qp_ex(context, attr, &cmd, qp);
881 else
882 ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp,
883 sizeof(qp->verbs_qp), attr,
884 &cmd.ibv_cmd, sizeof(cmd), &resp,
885 sizeof(resp));
886 if (ret)
887 goto err_rq_db;
888
889 if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) {
890 ret = mlx4_store_qp(to_mctx(context), qp->verbs_qp.qp.qp_num, qp);
891 if (ret)
892 goto err_destroy;
893 }
894 pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex);
895
896 qp->rq.wqe_cnt = qp->rq.max_post = attr->cap.max_recv_wr;
897 qp->rq.max_gs = attr->cap.max_recv_sge;
898 if (attr->qp_type != IBV_QPT_XRC_RECV)
899 mlx4_set_sq_sizes(qp, &attr->cap, attr->qp_type);
900
901 qp->doorbell_qpn = htobe32(qp->verbs_qp.qp.qp_num << 8);
902 if (attr->sq_sig_all)
903 qp->sq_signal_bits = htobe32(MLX4_WQE_CTRL_CQ_UPDATE);
904 else
905 qp->sq_signal_bits = 0;
906
907 return &qp->verbs_qp.qp;
908
909 err_destroy:
910 ibv_cmd_destroy_qp(&qp->verbs_qp.qp);
911
912 err_rq_db:
913 pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex);
914 if (attr->cap.max_recv_sge)
915 mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_RQ, qp->db);
916 err_rq_spl:
917 pthread_spin_destroy(&qp->rq.lock);
918 err_sq_spl:
919 pthread_spin_destroy(&qp->sq.lock);
920 err_free:
921 free(qp->sq.wrid);
922 if (qp->rq.wqe_cnt)
923 free(qp->rq.wrid);
924 mlx4_free_buf(&qp->buf);
925
926 err:
927 free(qp);
928
929 return NULL;
930 }
931
mlx4_create_qp(struct ibv_pd * pd,struct ibv_qp_init_attr * attr)932 struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
933 {
934 struct ibv_qp_init_attr_ex attr_ex;
935 struct ibv_qp *qp;
936
937 memcpy(&attr_ex, attr, sizeof *attr);
938 attr_ex.comp_mask = IBV_QP_INIT_ATTR_PD;
939 attr_ex.pd = pd;
940 qp = mlx4_create_qp_ex(pd->context, &attr_ex);
941 if (qp)
942 memcpy(attr, &attr_ex, sizeof *attr);
943 return qp;
944 }
945
mlx4_open_qp(struct ibv_context * context,struct ibv_qp_open_attr * attr)946 struct ibv_qp *mlx4_open_qp(struct ibv_context *context, struct ibv_qp_open_attr *attr)
947 {
948 struct ibv_open_qp cmd;
949 struct ibv_create_qp_resp resp;
950 struct mlx4_qp *qp;
951 int ret;
952
953 qp = calloc(1, sizeof *qp);
954 if (!qp)
955 return NULL;
956
957 ret = ibv_cmd_open_qp(context, &qp->verbs_qp, sizeof(qp->verbs_qp), attr,
958 &cmd, sizeof cmd, &resp, sizeof resp);
959 if (ret)
960 goto err;
961
962 return &qp->verbs_qp.qp;
963
964 err:
965 free(qp);
966 return NULL;
967 }
968
mlx4_query_qp(struct ibv_qp * ibqp,struct ibv_qp_attr * attr,int attr_mask,struct ibv_qp_init_attr * init_attr)969 int mlx4_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
970 int attr_mask,
971 struct ibv_qp_init_attr *init_attr)
972 {
973 struct ibv_query_qp cmd;
974 struct mlx4_qp *qp = to_mqp(ibqp);
975 int ret;
976
977 ret = ibv_cmd_query_qp(ibqp, attr, attr_mask, init_attr, &cmd, sizeof cmd);
978 if (ret)
979 return ret;
980
981 init_attr->cap.max_send_wr = qp->sq.max_post;
982 init_attr->cap.max_send_sge = qp->sq.max_gs;
983 init_attr->cap.max_inline_data = qp->max_inline_data;
984
985 attr->cap = init_attr->cap;
986
987 return 0;
988 }
989
mlx4_modify_qp(struct ibv_qp * qp,struct ibv_qp_attr * attr,int attr_mask)990 int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
991 int attr_mask)
992 {
993 struct ibv_modify_qp cmd = {};
994 struct ibv_port_attr port_attr;
995 struct mlx4_qp *mqp = to_mqp(qp);
996 struct ibv_device_attr device_attr;
997 int ret;
998
999 memset(&device_attr, 0, sizeof(device_attr));
1000 if (attr_mask & IBV_QP_PORT) {
1001 ret = ibv_query_port(qp->context, attr->port_num,
1002 &port_attr);
1003 if (ret)
1004 return ret;
1005 mqp->link_layer = port_attr.link_layer;
1006
1007 ret = ibv_query_device(qp->context, &device_attr);
1008 if (ret)
1009 return ret;
1010
1011 switch(qp->qp_type) {
1012 case IBV_QPT_UD:
1013 if ((mqp->link_layer == IBV_LINK_LAYER_INFINIBAND) &&
1014 (device_attr.device_cap_flags & IBV_DEVICE_UD_IP_CSUM))
1015 mqp->qp_cap_cache |= MLX4_CSUM_SUPPORT_UD_OVER_IB |
1016 MLX4_RX_CSUM_VALID;
1017 break;
1018 case IBV_QPT_RAW_PACKET:
1019 if ((mqp->link_layer == IBV_LINK_LAYER_ETHERNET) &&
1020 (device_attr.device_cap_flags & IBV_DEVICE_RAW_IP_CSUM))
1021 mqp->qp_cap_cache |= MLX4_CSUM_SUPPORT_RAW_OVER_ETH |
1022 MLX4_RX_CSUM_VALID;
1023 break;
1024 default:
1025 break;
1026 }
1027
1028 }
1029
1030 if (qp->state == IBV_QPS_RESET &&
1031 attr_mask & IBV_QP_STATE &&
1032 attr->qp_state == IBV_QPS_INIT) {
1033 mlx4_qp_init_sq_ownership(to_mqp(qp));
1034 }
1035
1036 ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof cmd);
1037
1038 if (!ret &&
1039 (attr_mask & IBV_QP_STATE) &&
1040 attr->qp_state == IBV_QPS_RESET) {
1041 if (qp->recv_cq)
1042 mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num,
1043 qp->srq ? to_msrq(qp->srq) : NULL);
1044 if (qp->send_cq && qp->send_cq != qp->recv_cq)
1045 mlx4_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL);
1046
1047 mlx4_init_qp_indices(to_mqp(qp));
1048 if (to_mqp(qp)->rq.wqe_cnt)
1049 *to_mqp(qp)->db = 0;
1050 }
1051
1052 return ret;
1053 }
1054
mlx4_lock_cqs(struct ibv_qp * qp)1055 static void mlx4_lock_cqs(struct ibv_qp *qp)
1056 {
1057 struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
1058 struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
1059
1060 if (!qp->send_cq || !qp->recv_cq) {
1061 if (qp->send_cq)
1062 pthread_spin_lock(&send_cq->lock);
1063 else if (qp->recv_cq)
1064 pthread_spin_lock(&recv_cq->lock);
1065 } else if (send_cq == recv_cq) {
1066 pthread_spin_lock(&send_cq->lock);
1067 } else if (send_cq->cqn < recv_cq->cqn) {
1068 pthread_spin_lock(&send_cq->lock);
1069 pthread_spin_lock(&recv_cq->lock);
1070 } else {
1071 pthread_spin_lock(&recv_cq->lock);
1072 pthread_spin_lock(&send_cq->lock);
1073 }
1074 }
1075
mlx4_unlock_cqs(struct ibv_qp * qp)1076 static void mlx4_unlock_cqs(struct ibv_qp *qp)
1077 {
1078 struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
1079 struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
1080
1081
1082 if (!qp->send_cq || !qp->recv_cq) {
1083 if (qp->send_cq)
1084 pthread_spin_unlock(&send_cq->lock);
1085 else if (qp->recv_cq)
1086 pthread_spin_unlock(&recv_cq->lock);
1087 } else if (send_cq == recv_cq) {
1088 pthread_spin_unlock(&send_cq->lock);
1089 } else if (send_cq->cqn < recv_cq->cqn) {
1090 pthread_spin_unlock(&recv_cq->lock);
1091 pthread_spin_unlock(&send_cq->lock);
1092 } else {
1093 pthread_spin_unlock(&send_cq->lock);
1094 pthread_spin_unlock(&recv_cq->lock);
1095 }
1096 }
1097
mlx4_destroy_qp(struct ibv_qp * ibqp)1098 int mlx4_destroy_qp(struct ibv_qp *ibqp)
1099 {
1100 struct mlx4_qp *qp = to_mqp(ibqp);
1101 int ret;
1102
1103 pthread_mutex_lock(&to_mctx(ibqp->context)->qp_table_mutex);
1104 ret = ibv_cmd_destroy_qp(ibqp);
1105 if (ret) {
1106 pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex);
1107 return ret;
1108 }
1109
1110 mlx4_lock_cqs(ibqp);
1111
1112 if (ibqp->recv_cq)
1113 __mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num,
1114 ibqp->srq ? to_msrq(ibqp->srq) : NULL);
1115 if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq)
1116 __mlx4_cq_clean(to_mcq(ibqp->send_cq), ibqp->qp_num, NULL);
1117
1118 if (qp->sq.wqe_cnt || qp->rq.wqe_cnt)
1119 mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num);
1120
1121 mlx4_unlock_cqs(ibqp);
1122 pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex);
1123
1124 pthread_spin_destroy(&qp->rq.lock);
1125 pthread_spin_destroy(&qp->sq.lock);
1126
1127 if (qp->rq.wqe_cnt) {
1128 mlx4_free_db(to_mctx(ibqp->context), MLX4_DB_TYPE_RQ, qp->db);
1129 free(qp->rq.wrid);
1130 }
1131 if (qp->sq.wqe_cnt)
1132 free(qp->sq.wrid);
1133 mlx4_free_buf(&qp->buf);
1134 free(qp);
1135
1136 return 0;
1137 }
1138
link_local_gid(const union ibv_gid * gid)1139 static int link_local_gid(const union ibv_gid *gid)
1140 {
1141 uint32_t *tmp = (uint32_t *)gid->raw;
1142 uint32_t hi = tmp[0];
1143 uint32_t lo = tmp[1];
1144
1145 if (hi == htobe32(0xfe800000) && lo == 0)
1146 return 1;
1147
1148 return 0;
1149 }
1150
is_multicast_gid(const union ibv_gid * gid)1151 static int is_multicast_gid(const union ibv_gid *gid)
1152 {
1153 return gid->raw[0] == 0xff;
1154 }
1155
get_vlan_id(union ibv_gid * gid)1156 static uint16_t get_vlan_id(union ibv_gid *gid)
1157 {
1158 uint16_t vid;
1159 vid = gid->raw[11] << 8 | gid->raw[12];
1160 return vid < 0x1000 ? vid : 0xffff;
1161 }
1162
mlx4_resolve_grh_to_l2(struct ibv_pd * pd,struct mlx4_ah * ah,struct ibv_ah_attr * attr)1163 static int mlx4_resolve_grh_to_l2(struct ibv_pd *pd, struct mlx4_ah *ah,
1164 struct ibv_ah_attr *attr)
1165 {
1166 int err, i;
1167 uint16_t vid;
1168 union ibv_gid sgid;
1169
1170 if (link_local_gid(&attr->grh.dgid)) {
1171 memcpy(ah->mac, &attr->grh.dgid.raw[8], 3);
1172 memcpy(ah->mac + 3, &attr->grh.dgid.raw[13], 3);
1173 ah->mac[0] ^= 2;
1174
1175 vid = get_vlan_id(&attr->grh.dgid);
1176 } else if (is_multicast_gid(&attr->grh.dgid)) {
1177 ah->mac[0] = 0x33;
1178 ah->mac[1] = 0x33;
1179 for (i = 2; i < 6; ++i)
1180 ah->mac[i] = attr->grh.dgid.raw[i + 10];
1181
1182 err = ibv_query_gid(pd->context, attr->port_num,
1183 attr->grh.sgid_index, &sgid);
1184 if (err)
1185 return err;
1186
1187 ah->av.dlid = htobe16(0xc000);
1188 ah->av.port_pd |= htobe32(1 << 31);
1189
1190 vid = get_vlan_id(&sgid);
1191 } else
1192 return 1;
1193
1194 if (vid != 0xffff) {
1195 ah->av.port_pd |= htobe32(1 << 29);
1196 ah->vlan = vid | ((attr->sl & 7) << 13);
1197 }
1198
1199 return 0;
1200 }
1201
mlx4_create_ah(struct ibv_pd * pd,struct ibv_ah_attr * attr)1202 struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
1203 {
1204 struct mlx4_ah *ah;
1205 struct ibv_port_attr port_attr;
1206
1207 if (query_port_cache(pd->context, attr->port_num, &port_attr))
1208 return NULL;
1209
1210 ah = malloc(sizeof *ah);
1211 if (!ah)
1212 return NULL;
1213
1214 memset(&ah->av, 0, sizeof ah->av);
1215
1216 ah->av.port_pd = htobe32(to_mpd(pd)->pdn | (attr->port_num << 24));
1217
1218 if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) {
1219 ah->av.g_slid = attr->src_path_bits;
1220 ah->av.dlid = htobe16(attr->dlid);
1221 ah->av.sl_tclass_flowlabel = htobe32(attr->sl << 28);
1222 } else
1223 ah->av.sl_tclass_flowlabel = htobe32(attr->sl << 29);
1224
1225 if (attr->static_rate) {
1226 ah->av.stat_rate = attr->static_rate + MLX4_STAT_RATE_OFFSET;
1227 /* XXX check rate cap? */
1228 }
1229 if (attr->is_global) {
1230 ah->av.g_slid |= 0x80;
1231 ah->av.gid_index = attr->grh.sgid_index;
1232 ah->av.hop_limit = attr->grh.hop_limit;
1233 ah->av.sl_tclass_flowlabel |=
1234 htobe32((attr->grh.traffic_class << 20) |
1235 attr->grh.flow_label);
1236 memcpy(ah->av.dgid, attr->grh.dgid.raw, 16);
1237 }
1238
1239 if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) {
1240 if (port_attr.port_cap_flags & IBV_PORT_IP_BASED_GIDS) {
1241 uint16_t vid;
1242
1243 if (ibv_resolve_eth_l2_from_gid(pd->context, attr,
1244 ah->mac, &vid)) {
1245 free(ah);
1246 return NULL;
1247 }
1248
1249 if (vid <= 0xfff) {
1250 ah->av.port_pd |= htobe32(1 << 29);
1251 ah->vlan = vid |
1252 ((attr->sl & 7) << 13);
1253 }
1254
1255 } else {
1256 if (mlx4_resolve_grh_to_l2(pd, ah, attr)) {
1257 free(ah);
1258 return NULL;
1259 }
1260 }
1261 }
1262
1263 return &ah->ibv_ah;
1264 }
1265
mlx4_destroy_ah(struct ibv_ah * ah)1266 int mlx4_destroy_ah(struct ibv_ah *ah)
1267 {
1268 free(to_mah(ah));
1269
1270 return 0;
1271 }
1272