1 /*
2 * Copyright (c) 2007 Cisco, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <config.h>
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <unistd.h>
38 #include <errno.h>
39 #include <sys/mman.h>
40 #include <pthread.h>
41 #include <string.h>
42
43 #include "mlx4.h"
44 #include "mlx4-abi.h"
45
46 #ifndef PCI_VENDOR_ID_MELLANOX
47 #define PCI_VENDOR_ID_MELLANOX 0x15b3
48 #endif
49
50 #define HCA(v, d) \
51 { .vendor = PCI_VENDOR_ID_##v, \
52 .device = d }
53
54 static struct {
55 unsigned vendor;
56 unsigned device;
57 } hca_table[] = {
58 HCA(MELLANOX, 0x6340), /* MT25408 "Hermon" SDR */
59 HCA(MELLANOX, 0x634a), /* MT25408 "Hermon" DDR */
60 HCA(MELLANOX, 0x6354), /* MT25408 "Hermon" QDR */
61 HCA(MELLANOX, 0x6732), /* MT25408 "Hermon" DDR PCIe gen2 */
62 HCA(MELLANOX, 0x673c), /* MT25408 "Hermon" QDR PCIe gen2 */
63 HCA(MELLANOX, 0x6368), /* MT25408 "Hermon" EN 10GigE */
64 HCA(MELLANOX, 0x6750), /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
65 HCA(MELLANOX, 0x6372), /* MT25458 ConnectX EN 10GBASE-T 10GigE */
66 HCA(MELLANOX, 0x675a), /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
67 HCA(MELLANOX, 0x6764), /* MT26468 ConnectX EN 10GigE PCIe gen2*/
68 HCA(MELLANOX, 0x6746), /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
69 HCA(MELLANOX, 0x676e), /* MT26478 ConnectX2 40GigE PCIe gen2 */
70 HCA(MELLANOX, 0x1002), /* MT25400 Family [ConnectX-2 Virtual Function] */
71 HCA(MELLANOX, 0x1003), /* MT27500 Family [ConnectX-3] */
72 HCA(MELLANOX, 0x1004), /* MT27500 Family [ConnectX-3 Virtual Function] */
73 HCA(MELLANOX, 0x1005), /* MT27510 Family */
74 HCA(MELLANOX, 0x1006), /* MT27511 Family */
75 HCA(MELLANOX, 0x1007), /* MT27520 Family */
76 HCA(MELLANOX, 0x1008), /* MT27521 Family */
77 HCA(MELLANOX, 0x1009), /* MT27530 Family */
78 HCA(MELLANOX, 0x100a), /* MT27531 Family */
79 HCA(MELLANOX, 0x100b), /* MT27540 Family */
80 HCA(MELLANOX, 0x100c), /* MT27541 Family */
81 HCA(MELLANOX, 0x100d), /* MT27550 Family */
82 HCA(MELLANOX, 0x100e), /* MT27551 Family */
83 HCA(MELLANOX, 0x100f), /* MT27560 Family */
84 HCA(MELLANOX, 0x1010), /* MT27561 Family */
85 };
86
87 static struct ibv_context_ops mlx4_ctx_ops = {
88 .query_device = mlx4_query_device,
89 .query_port = mlx4_query_port,
90 .alloc_pd = mlx4_alloc_pd,
91 .dealloc_pd = mlx4_free_pd,
92 .reg_mr = mlx4_reg_mr,
93 .rereg_mr = mlx4_rereg_mr,
94 .dereg_mr = mlx4_dereg_mr,
95 .alloc_mw = mlx4_alloc_mw,
96 .dealloc_mw = mlx4_dealloc_mw,
97 .bind_mw = mlx4_bind_mw,
98 .create_cq = mlx4_create_cq,
99 .poll_cq = mlx4_poll_cq,
100 .req_notify_cq = mlx4_arm_cq,
101 .cq_event = mlx4_cq_event,
102 .resize_cq = mlx4_resize_cq,
103 .destroy_cq = mlx4_destroy_cq,
104 .create_srq = mlx4_create_srq,
105 .modify_srq = mlx4_modify_srq,
106 .query_srq = mlx4_query_srq,
107 .destroy_srq = mlx4_destroy_srq,
108 .post_srq_recv = mlx4_post_srq_recv,
109 .create_qp = mlx4_create_qp,
110 .query_qp = mlx4_query_qp,
111 .modify_qp = mlx4_modify_qp,
112 .destroy_qp = mlx4_destroy_qp,
113 .post_send = mlx4_post_send,
114 .post_recv = mlx4_post_recv,
115 .create_ah = mlx4_create_ah,
116 .destroy_ah = mlx4_destroy_ah,
117 .attach_mcast = ibv_cmd_attach_mcast,
118 .detach_mcast = ibv_cmd_detach_mcast
119 };
120
mlx4_map_internal_clock(struct mlx4_device * mdev,struct ibv_context * ibv_ctx)121 static int mlx4_map_internal_clock(struct mlx4_device *mdev,
122 struct ibv_context *ibv_ctx)
123 {
124 struct mlx4_context *context = to_mctx(ibv_ctx);
125 void *hca_clock_page;
126
127 hca_clock_page = mmap(NULL, mdev->page_size,
128 PROT_READ, MAP_SHARED, ibv_ctx->cmd_fd,
129 mdev->page_size * 3);
130
131 if (hca_clock_page == MAP_FAILED) {
132 fprintf(stderr, PFX
133 "Warning: Timestamp available,\n"
134 "but failed to mmap() hca core clock page.\n");
135 return -1;
136 }
137
138 context->hca_core_clock = hca_clock_page +
139 (context->core_clock.offset & (mdev->page_size - 1));
140 return 0;
141 }
142
mlx4_init_context(struct verbs_device * v_device,struct ibv_context * ibv_ctx,int cmd_fd)143 static int mlx4_init_context(struct verbs_device *v_device,
144 struct ibv_context *ibv_ctx, int cmd_fd)
145 {
146 struct mlx4_context *context;
147 struct ibv_get_context cmd;
148 struct mlx4_alloc_ucontext_resp resp;
149 int i;
150 int ret;
151 struct mlx4_alloc_ucontext_resp_v3 resp_v3;
152 __u16 bf_reg_size;
153 struct mlx4_device *dev = to_mdev(&v_device->device);
154 struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx);
155 struct ibv_device_attr_ex dev_attrs;
156
157 /* memory footprint of mlx4_context and verbs_context share
158 * struct ibv_context.
159 */
160 context = to_mctx(ibv_ctx);
161 ibv_ctx->cmd_fd = cmd_fd;
162
163 if (dev->abi_version <= MLX4_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
164 if (ibv_cmd_get_context(ibv_ctx, &cmd, sizeof cmd,
165 &resp_v3.ibv_resp, sizeof resp_v3))
166 return errno;
167
168 context->num_qps = resp_v3.qp_tab_size;
169 bf_reg_size = resp_v3.bf_reg_size;
170 context->cqe_size = sizeof (struct mlx4_cqe);
171 } else {
172 if (ibv_cmd_get_context(ibv_ctx, &cmd, sizeof cmd,
173 &resp.ibv_resp, sizeof resp))
174 return errno;
175
176 context->num_qps = resp.qp_tab_size;
177 bf_reg_size = resp.bf_reg_size;
178 if (resp.dev_caps & MLX4_USER_DEV_CAP_64B_CQE)
179 context->cqe_size = resp.cqe_size;
180 else
181 context->cqe_size = sizeof (struct mlx4_cqe);
182 }
183
184 context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS;
185 context->qp_table_mask = (1 << context->qp_table_shift) - 1;
186 for (i = 0; i < MLX4_PORTS_NUM; ++i)
187 context->port_query_cache[i].valid = 0;
188
189 ret = pthread_mutex_init(&context->qp_table_mutex, NULL);
190 if (ret)
191 return ret;
192 for (i = 0; i < MLX4_QP_TABLE_SIZE; ++i)
193 context->qp_table[i].refcnt = 0;
194
195 for (i = 0; i < MLX4_NUM_DB_TYPE; ++i)
196 context->db_list[i] = NULL;
197
198 ret = mlx4_init_xsrq_table(&context->xsrq_table, context->num_qps);
199 if (ret)
200 goto err;
201
202 ret = pthread_mutex_init(&context->db_list_mutex, NULL);
203 if (ret)
204 goto err_xsrq;
205
206 context->uar = mmap(NULL, dev->page_size, PROT_WRITE,
207 MAP_SHARED, cmd_fd, 0);
208 if (context->uar == MAP_FAILED)
209 return errno;
210
211 if (bf_reg_size) {
212 context->bf_page = mmap(NULL, dev->page_size,
213 PROT_WRITE, MAP_SHARED, cmd_fd,
214 dev->page_size);
215 if (context->bf_page == MAP_FAILED) {
216 fprintf(stderr, PFX "Warning: BlueFlame available, "
217 "but failed to mmap() BlueFlame page.\n");
218 context->bf_page = NULL;
219 context->bf_buf_size = 0;
220 } else {
221 context->bf_buf_size = bf_reg_size / 2;
222 context->bf_offset = 0;
223 ret = pthread_spin_init(&context->bf_lock, PTHREAD_PROCESS_PRIVATE);
224 if (ret)
225 goto err_db_list;
226 }
227 } else {
228 context->bf_page = NULL;
229 context->bf_buf_size = 0;
230 }
231
232 ret = pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
233 if (ret)
234 goto err_bf_lock;
235 ibv_ctx->ops = mlx4_ctx_ops;
236
237 context->hca_core_clock = NULL;
238 memset(&dev_attrs, 0, sizeof(dev_attrs));
239 if (!mlx4_query_device_ex(ibv_ctx, NULL, &dev_attrs,
240 sizeof(struct ibv_device_attr_ex))) {
241 context->max_qp_wr = dev_attrs.orig_attr.max_qp_wr;
242 context->max_sge = dev_attrs.orig_attr.max_sge;
243 if (context->core_clock.offset_valid)
244 mlx4_map_internal_clock(dev, ibv_ctx);
245 }
246
247 verbs_ctx->has_comp_mask = VERBS_CONTEXT_XRCD | VERBS_CONTEXT_SRQ |
248 VERBS_CONTEXT_QP;
249 verbs_set_ctx_op(verbs_ctx, close_xrcd, mlx4_close_xrcd);
250 verbs_set_ctx_op(verbs_ctx, open_xrcd, mlx4_open_xrcd);
251 verbs_set_ctx_op(verbs_ctx, create_srq_ex, mlx4_create_srq_ex);
252 verbs_set_ctx_op(verbs_ctx, get_srq_num, verbs_get_srq_num);
253 verbs_set_ctx_op(verbs_ctx, create_qp_ex, mlx4_create_qp_ex);
254 verbs_set_ctx_op(verbs_ctx, open_qp, mlx4_open_qp);
255 verbs_set_ctx_op(verbs_ctx, ibv_create_flow, ibv_cmd_create_flow);
256 verbs_set_ctx_op(verbs_ctx, ibv_destroy_flow, ibv_cmd_destroy_flow);
257 verbs_set_ctx_op(verbs_ctx, create_cq_ex, mlx4_create_cq_ex);
258 verbs_set_ctx_op(verbs_ctx, query_device_ex, mlx4_query_device_ex);
259 verbs_set_ctx_op(verbs_ctx, query_rt_values, mlx4_query_rt_values);
260
261 return 0;
262
263 err_bf_lock:
264 if (context->bf_buf_size)
265 pthread_spin_destroy(&context->bf_lock);
266 err_db_list:
267 pthread_mutex_destroy(&context->db_list_mutex);
268 err_xsrq:
269 mlx4_cleanup_xsrq_table(&context->xsrq_table);
270 err:
271 pthread_mutex_destroy(&context->qp_table_mutex);
272
273 return ret;
274 }
275
mlx4_uninit_context(struct verbs_device * v_device,struct ibv_context * ibv_ctx)276 static void mlx4_uninit_context(struct verbs_device *v_device,
277 struct ibv_context *ibv_ctx)
278 {
279 struct mlx4_context *context = to_mctx(ibv_ctx);
280
281 pthread_mutex_destroy(&context->qp_table_mutex);
282 mlx4_cleanup_xsrq_table(&context->xsrq_table);
283 pthread_mutex_destroy(&context->db_list_mutex);
284 pthread_spin_destroy(&context->bf_lock);
285 pthread_spin_destroy(&context->uar_lock);
286
287 munmap(context->uar, to_mdev(&v_device->device)->page_size);
288 if (context->bf_page)
289 munmap(context->bf_page, to_mdev(&v_device->device)->page_size);
290 if (context->hca_core_clock)
291 munmap(context->hca_core_clock - context->core_clock.offset,
292 to_mdev(&v_device->device)->page_size);
293 }
294
295 static struct verbs_device_ops mlx4_dev_ops = {
296 .init_context = mlx4_init_context,
297 .uninit_context = mlx4_uninit_context,
298 };
299
mlx4_driver_init(const char * uverbs_sys_path,int abi_version)300 static struct verbs_device *mlx4_driver_init(const char *uverbs_sys_path, int abi_version)
301 {
302 char value[8];
303 struct mlx4_device *dev;
304 unsigned vendor, device;
305 int i;
306
307 if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor",
308 value, sizeof value) < 0)
309 return NULL;
310 vendor = strtol(value, NULL, 16);
311
312 if (ibv_read_sysfs_file(uverbs_sys_path, "device/device",
313 value, sizeof value) < 0)
314 return NULL;
315 device = strtol(value, NULL, 16);
316
317 for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i)
318 if (vendor == hca_table[i].vendor &&
319 device == hca_table[i].device)
320 goto found;
321
322 return NULL;
323
324 found:
325 if (abi_version < MLX4_UVERBS_MIN_ABI_VERSION ||
326 abi_version > MLX4_UVERBS_MAX_ABI_VERSION) {
327 fprintf(stderr, PFX "Fatal: ABI version %d of %s is not supported "
328 "(min supported %d, max supported %d)\n",
329 abi_version, uverbs_sys_path,
330 MLX4_UVERBS_MIN_ABI_VERSION,
331 MLX4_UVERBS_MAX_ABI_VERSION);
332 return NULL;
333 }
334
335 dev = calloc(1, sizeof *dev);
336 if (!dev) {
337 fprintf(stderr, PFX "Fatal: couldn't allocate device for %s\n",
338 uverbs_sys_path);
339 return NULL;
340 }
341
342 dev->page_size = sysconf(_SC_PAGESIZE);
343 dev->abi_version = abi_version;
344
345 dev->verbs_dev.ops = &mlx4_dev_ops;
346 dev->verbs_dev.sz = sizeof(*dev);
347 dev->verbs_dev.size_of_context =
348 sizeof(struct mlx4_context) - sizeof(struct ibv_context);
349
350 return &dev->verbs_dev;
351 }
352
mlx4_register_driver(void)353 static __attribute__((constructor)) void mlx4_register_driver(void)
354 {
355 verbs_register_driver("mlx4", mlx4_driver_init);
356 }
357