1 /* 2 * Copyright (c) 2007 Cisco, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <config.h> 34 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <unistd.h> 38 #include <errno.h> 39 #include <sys/mman.h> 40 #include <pthread.h> 41 #include <string.h> 42 43 #include "mlx4.h" 44 #include "mlx4-abi.h" 45 46 #ifndef PCI_VENDOR_ID_MELLANOX 47 #define PCI_VENDOR_ID_MELLANOX 0x15b3 48 #endif 49 50 #define HCA(v, d) \ 51 { .vendor = PCI_VENDOR_ID_##v, \ 52 .device = d } 53 54 static struct { 55 unsigned vendor; 56 unsigned device; 57 } hca_table[] = { 58 HCA(MELLANOX, 0x6340), /* MT25408 "Hermon" SDR */ 59 HCA(MELLANOX, 0x634a), /* MT25408 "Hermon" DDR */ 60 HCA(MELLANOX, 0x6354), /* MT25408 "Hermon" QDR */ 61 HCA(MELLANOX, 0x6732), /* MT25408 "Hermon" DDR PCIe gen2 */ 62 HCA(MELLANOX, 0x673c), /* MT25408 "Hermon" QDR PCIe gen2 */ 63 HCA(MELLANOX, 0x6368), /* MT25408 "Hermon" EN 10GigE */ 64 HCA(MELLANOX, 0x6750), /* MT25408 "Hermon" EN 10GigE PCIe gen2 */ 65 HCA(MELLANOX, 0x6372), /* MT25458 ConnectX EN 10GBASE-T 10GigE */ 66 HCA(MELLANOX, 0x675a), /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ 67 HCA(MELLANOX, 0x6764), /* MT26468 ConnectX EN 10GigE PCIe gen2*/ 68 HCA(MELLANOX, 0x6746), /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */ 69 HCA(MELLANOX, 0x676e), /* MT26478 ConnectX2 40GigE PCIe gen2 */ 70 HCA(MELLANOX, 0x1002), /* MT25400 Family [ConnectX-2 Virtual Function] */ 71 HCA(MELLANOX, 0x1003), /* MT27500 Family [ConnectX-3] */ 72 HCA(MELLANOX, 0x1004), /* MT27500 Family [ConnectX-3 Virtual Function] */ 73 HCA(MELLANOX, 0x1005), /* MT27510 Family */ 74 HCA(MELLANOX, 0x1006), /* MT27511 Family */ 75 HCA(MELLANOX, 0x1007), /* MT27520 Family */ 76 HCA(MELLANOX, 0x1008), /* MT27521 Family */ 77 HCA(MELLANOX, 0x1009), /* MT27530 Family */ 78 HCA(MELLANOX, 0x100a), /* MT27531 Family */ 79 HCA(MELLANOX, 0x100b), /* MT27540 Family */ 80 HCA(MELLANOX, 0x100c), /* MT27541 Family */ 81 HCA(MELLANOX, 0x100d), /* MT27550 Family */ 82 HCA(MELLANOX, 0x100e), /* MT27551 Family */ 83 HCA(MELLANOX, 0x100f), /* MT27560 Family */ 84 HCA(MELLANOX, 0x1010), /* MT27561 Family */ 85 }; 86 87 static struct ibv_context_ops mlx4_ctx_ops = { 88 .query_device = mlx4_query_device, 89 .query_port = mlx4_query_port, 90 .alloc_pd = mlx4_alloc_pd, 91 .dealloc_pd = mlx4_free_pd, 92 .reg_mr = mlx4_reg_mr, 93 .rereg_mr = mlx4_rereg_mr, 94 .dereg_mr = mlx4_dereg_mr, 95 .alloc_mw = mlx4_alloc_mw, 96 .dealloc_mw = mlx4_dealloc_mw, 97 .bind_mw = mlx4_bind_mw, 98 .create_cq = mlx4_create_cq, 99 .poll_cq = mlx4_poll_cq, 100 .req_notify_cq = mlx4_arm_cq, 101 .cq_event = mlx4_cq_event, 102 .resize_cq = mlx4_resize_cq, 103 .destroy_cq = mlx4_destroy_cq, 104 .create_srq = mlx4_create_srq, 105 .modify_srq = mlx4_modify_srq, 106 .query_srq = mlx4_query_srq, 107 .destroy_srq = mlx4_destroy_srq, 108 .post_srq_recv = mlx4_post_srq_recv, 109 .create_qp = mlx4_create_qp, 110 .query_qp = mlx4_query_qp, 111 .modify_qp = mlx4_modify_qp, 112 .destroy_qp = mlx4_destroy_qp, 113 .post_send = mlx4_post_send, 114 .post_recv = mlx4_post_recv, 115 .create_ah = mlx4_create_ah, 116 .destroy_ah = mlx4_destroy_ah, 117 .attach_mcast = ibv_cmd_attach_mcast, 118 .detach_mcast = ibv_cmd_detach_mcast 119 }; 120 121 static int mlx4_map_internal_clock(struct mlx4_device *mdev, 122 struct ibv_context *ibv_ctx) 123 { 124 struct mlx4_context *context = to_mctx(ibv_ctx); 125 void *hca_clock_page; 126 127 hca_clock_page = mmap(NULL, mdev->page_size, 128 PROT_READ, MAP_SHARED, ibv_ctx->cmd_fd, 129 mdev->page_size * 3); 130 131 if (hca_clock_page == MAP_FAILED) { 132 fprintf(stderr, PFX 133 "Warning: Timestamp available,\n" 134 "but failed to mmap() hca core clock page.\n"); 135 return -1; 136 } 137 138 context->hca_core_clock = hca_clock_page + 139 (context->core_clock.offset & (mdev->page_size - 1)); 140 return 0; 141 } 142 143 static int mlx4_init_context(struct verbs_device *v_device, 144 struct ibv_context *ibv_ctx, int cmd_fd) 145 { 146 struct mlx4_context *context; 147 struct ibv_get_context cmd; 148 struct mlx4_alloc_ucontext_resp resp; 149 int i; 150 int ret; 151 struct mlx4_alloc_ucontext_resp_v3 resp_v3; 152 __u16 bf_reg_size; 153 struct mlx4_device *dev = to_mdev(&v_device->device); 154 struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx); 155 struct ibv_device_attr_ex dev_attrs; 156 157 /* memory footprint of mlx4_context and verbs_context share 158 * struct ibv_context. 159 */ 160 context = to_mctx(ibv_ctx); 161 ibv_ctx->cmd_fd = cmd_fd; 162 163 if (dev->abi_version <= MLX4_UVERBS_NO_DEV_CAPS_ABI_VERSION) { 164 if (ibv_cmd_get_context(ibv_ctx, &cmd, sizeof cmd, 165 &resp_v3.ibv_resp, sizeof resp_v3)) 166 return errno; 167 168 context->num_qps = resp_v3.qp_tab_size; 169 bf_reg_size = resp_v3.bf_reg_size; 170 context->cqe_size = sizeof (struct mlx4_cqe); 171 } else { 172 if (ibv_cmd_get_context(ibv_ctx, &cmd, sizeof cmd, 173 &resp.ibv_resp, sizeof resp)) 174 return errno; 175 176 context->num_qps = resp.qp_tab_size; 177 bf_reg_size = resp.bf_reg_size; 178 if (resp.dev_caps & MLX4_USER_DEV_CAP_64B_CQE) 179 context->cqe_size = resp.cqe_size; 180 else 181 context->cqe_size = sizeof (struct mlx4_cqe); 182 } 183 184 context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS; 185 context->qp_table_mask = (1 << context->qp_table_shift) - 1; 186 for (i = 0; i < MLX4_PORTS_NUM; ++i) 187 context->port_query_cache[i].valid = 0; 188 189 ret = pthread_mutex_init(&context->qp_table_mutex, NULL); 190 if (ret) 191 return ret; 192 for (i = 0; i < MLX4_QP_TABLE_SIZE; ++i) 193 context->qp_table[i].refcnt = 0; 194 195 for (i = 0; i < MLX4_NUM_DB_TYPE; ++i) 196 context->db_list[i] = NULL; 197 198 ret = mlx4_init_xsrq_table(&context->xsrq_table, context->num_qps); 199 if (ret) 200 goto err; 201 202 ret = pthread_mutex_init(&context->db_list_mutex, NULL); 203 if (ret) 204 goto err_xsrq; 205 206 context->uar = mmap(NULL, dev->page_size, PROT_WRITE, 207 MAP_SHARED, cmd_fd, 0); 208 if (context->uar == MAP_FAILED) 209 return errno; 210 211 if (bf_reg_size) { 212 context->bf_page = mmap(NULL, dev->page_size, 213 PROT_WRITE, MAP_SHARED, cmd_fd, 214 dev->page_size); 215 if (context->bf_page == MAP_FAILED) { 216 fprintf(stderr, PFX "Warning: BlueFlame available, " 217 "but failed to mmap() BlueFlame page.\n"); 218 context->bf_page = NULL; 219 context->bf_buf_size = 0; 220 } else { 221 context->bf_buf_size = bf_reg_size / 2; 222 context->bf_offset = 0; 223 ret = pthread_spin_init(&context->bf_lock, PTHREAD_PROCESS_PRIVATE); 224 if (ret) 225 goto err_db_list; 226 } 227 } else { 228 context->bf_page = NULL; 229 context->bf_buf_size = 0; 230 } 231 232 ret = pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); 233 if (ret) 234 goto err_bf_lock; 235 ibv_ctx->ops = mlx4_ctx_ops; 236 237 context->hca_core_clock = NULL; 238 memset(&dev_attrs, 0, sizeof(dev_attrs)); 239 if (!mlx4_query_device_ex(ibv_ctx, NULL, &dev_attrs, 240 sizeof(struct ibv_device_attr_ex))) { 241 context->max_qp_wr = dev_attrs.orig_attr.max_qp_wr; 242 context->max_sge = dev_attrs.orig_attr.max_sge; 243 if (context->core_clock.offset_valid) 244 mlx4_map_internal_clock(dev, ibv_ctx); 245 } 246 247 verbs_ctx->has_comp_mask = VERBS_CONTEXT_XRCD | VERBS_CONTEXT_SRQ | 248 VERBS_CONTEXT_QP; 249 verbs_set_ctx_op(verbs_ctx, close_xrcd, mlx4_close_xrcd); 250 verbs_set_ctx_op(verbs_ctx, open_xrcd, mlx4_open_xrcd); 251 verbs_set_ctx_op(verbs_ctx, create_srq_ex, mlx4_create_srq_ex); 252 verbs_set_ctx_op(verbs_ctx, get_srq_num, verbs_get_srq_num); 253 verbs_set_ctx_op(verbs_ctx, create_qp_ex, mlx4_create_qp_ex); 254 verbs_set_ctx_op(verbs_ctx, open_qp, mlx4_open_qp); 255 verbs_set_ctx_op(verbs_ctx, ibv_create_flow, ibv_cmd_create_flow); 256 verbs_set_ctx_op(verbs_ctx, ibv_destroy_flow, ibv_cmd_destroy_flow); 257 verbs_set_ctx_op(verbs_ctx, create_cq_ex, mlx4_create_cq_ex); 258 verbs_set_ctx_op(verbs_ctx, query_device_ex, mlx4_query_device_ex); 259 verbs_set_ctx_op(verbs_ctx, query_rt_values, mlx4_query_rt_values); 260 261 return 0; 262 263 err_bf_lock: 264 if (context->bf_buf_size) 265 pthread_spin_destroy(&context->bf_lock); 266 err_db_list: 267 pthread_mutex_destroy(&context->db_list_mutex); 268 err_xsrq: 269 mlx4_cleanup_xsrq_table(&context->xsrq_table); 270 err: 271 pthread_mutex_destroy(&context->qp_table_mutex); 272 273 return ret; 274 } 275 276 static void mlx4_uninit_context(struct verbs_device *v_device, 277 struct ibv_context *ibv_ctx) 278 { 279 struct mlx4_context *context = to_mctx(ibv_ctx); 280 281 pthread_mutex_destroy(&context->qp_table_mutex); 282 mlx4_cleanup_xsrq_table(&context->xsrq_table); 283 pthread_mutex_destroy(&context->db_list_mutex); 284 pthread_spin_destroy(&context->bf_lock); 285 pthread_spin_destroy(&context->uar_lock); 286 287 munmap(context->uar, to_mdev(&v_device->device)->page_size); 288 if (context->bf_page) 289 munmap(context->bf_page, to_mdev(&v_device->device)->page_size); 290 if (context->hca_core_clock) 291 munmap(context->hca_core_clock - context->core_clock.offset, 292 to_mdev(&v_device->device)->page_size); 293 } 294 295 static struct verbs_device_ops mlx4_dev_ops = { 296 .init_context = mlx4_init_context, 297 .uninit_context = mlx4_uninit_context, 298 }; 299 300 static struct verbs_device *mlx4_driver_init(const char *uverbs_sys_path, int abi_version) 301 { 302 char value[8]; 303 struct mlx4_device *dev; 304 unsigned vendor, device; 305 int i; 306 307 if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", 308 value, sizeof value) < 0) 309 return NULL; 310 vendor = strtol(value, NULL, 16); 311 312 if (ibv_read_sysfs_file(uverbs_sys_path, "device/device", 313 value, sizeof value) < 0) 314 return NULL; 315 device = strtol(value, NULL, 16); 316 317 for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i) 318 if (vendor == hca_table[i].vendor && 319 device == hca_table[i].device) 320 goto found; 321 322 return NULL; 323 324 found: 325 if (abi_version < MLX4_UVERBS_MIN_ABI_VERSION || 326 abi_version > MLX4_UVERBS_MAX_ABI_VERSION) { 327 fprintf(stderr, PFX "Fatal: ABI version %d of %s is not supported " 328 "(min supported %d, max supported %d)\n", 329 abi_version, uverbs_sys_path, 330 MLX4_UVERBS_MIN_ABI_VERSION, 331 MLX4_UVERBS_MAX_ABI_VERSION); 332 return NULL; 333 } 334 335 dev = calloc(1, sizeof *dev); 336 if (!dev) { 337 fprintf(stderr, PFX "Fatal: couldn't allocate device for %s\n", 338 uverbs_sys_path); 339 return NULL; 340 } 341 342 dev->page_size = sysconf(_SC_PAGESIZE); 343 dev->abi_version = abi_version; 344 345 dev->verbs_dev.ops = &mlx4_dev_ops; 346 dev->verbs_dev.sz = sizeof(*dev); 347 dev->verbs_dev.size_of_context = 348 sizeof(struct mlx4_context) - sizeof(struct ibv_context); 349 350 return &dev->verbs_dev; 351 } 352 353 static __attribute__((constructor)) void mlx4_register_driver(void) 354 { 355 verbs_register_driver("mlx4", mlx4_driver_init); 356 } 357