1 /* 2 * Copyright (c) 2006-2016 Chelsio, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 #include <config.h> 33 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <unistd.h> 37 #include <errno.h> 38 #include <sys/mman.h> 39 #include <pthread.h> 40 #include <string.h> 41 #include <signal.h> 42 #include <stdbool.h> 43 44 #include "libcxgb4.h" 45 #include "cxgb4-abi.h" 46 47 #define PCI_VENDOR_ID_CHELSIO 0x1425 48 49 /* 50 * Macros needed to support the PCI Device ID Table ... 51 */ 52 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \ 53 static struct { \ 54 unsigned vendor; \ 55 unsigned device; \ 56 } hca_table[] = { 57 58 #define CH_PCI_DEVICE_ID_FUNCTION \ 59 0x4 60 61 #define CH_PCI_ID_TABLE_ENTRY(__DeviceID) \ 62 { \ 63 .vendor = PCI_VENDOR_ID_CHELSIO, \ 64 .device = (__DeviceID), \ 65 } 66 67 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END \ 68 } 69 70 #include "t4_chip_type.h" 71 #include "t4_pci_id_tbl.h" 72 73 unsigned long c4iw_page_size; 74 unsigned long c4iw_page_shift; 75 unsigned long c4iw_page_mask; 76 int ma_wr; 77 int t5_en_wc = 1; 78 79 static TAILQ_HEAD(,c4iw_dev) devices = TAILQ_HEAD_INITIALIZER(devices); 80 81 static struct ibv_context_ops c4iw_ctx_ops = { 82 .query_device = c4iw_query_device, 83 .query_port = c4iw_query_port, 84 .alloc_pd = c4iw_alloc_pd, 85 .dealloc_pd = c4iw_free_pd, 86 .reg_mr = c4iw_reg_mr, 87 .dereg_mr = c4iw_dereg_mr, 88 .create_cq = c4iw_create_cq, 89 .resize_cq = c4iw_resize_cq, 90 .destroy_cq = c4iw_destroy_cq, 91 .create_srq = c4iw_create_srq, 92 .modify_srq = c4iw_modify_srq, 93 .destroy_srq = c4iw_destroy_srq, 94 .create_qp = c4iw_create_qp, 95 .modify_qp = c4iw_modify_qp, 96 .destroy_qp = c4iw_destroy_qp, 97 .query_qp = c4iw_query_qp, 98 .create_ah = c4iw_create_ah, 99 .destroy_ah = c4iw_destroy_ah, 100 .attach_mcast = c4iw_attach_mcast, 101 .detach_mcast = c4iw_detach_mcast, 102 .post_srq_recv = c4iw_post_srq_recv, 103 .req_notify_cq = c4iw_arm_cq, 104 }; 105 106 static struct ibv_context *c4iw_alloc_context(struct ibv_device *ibdev, 107 int cmd_fd) 108 { 109 struct c4iw_context *context; 110 struct ibv_get_context cmd; 111 struct c4iw_alloc_ucontext_resp resp; 112 struct c4iw_dev *rhp = to_c4iw_dev(ibdev); 113 struct ibv_query_device qcmd; 114 uint64_t raw_fw_ver; 115 struct ibv_device_attr attr; 116 117 context = malloc(sizeof *context); 118 if (!context) 119 return NULL; 120 121 memset(context, 0, sizeof *context); 122 context->ibv_ctx.cmd_fd = cmd_fd; 123 124 resp.status_page_size = 0; 125 resp.reserved = 0; 126 if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd, 127 &resp.ibv_resp, sizeof resp)) 128 goto err_free; 129 130 if (resp.reserved) 131 PDBG("%s c4iw_alloc_ucontext_resp reserved field modified by kernel\n", 132 __FUNCTION__); 133 134 context->status_page_size = resp.status_page_size; 135 if (resp.status_page_size) { 136 context->status_page = mmap(NULL, resp.status_page_size, 137 PROT_READ, MAP_SHARED, cmd_fd, 138 resp.status_page_key); 139 if (context->status_page == MAP_FAILED) 140 goto err_free; 141 } 142 143 context->ibv_ctx.device = ibdev; 144 context->ibv_ctx.ops = c4iw_ctx_ops; 145 146 switch (rhp->chip_version) { 147 case CHELSIO_T6: 148 PDBG("%s T6/T5/T4 device\n", __FUNCTION__); 149 case CHELSIO_T5: 150 PDBG("%s T5/T4 device\n", __FUNCTION__); 151 case CHELSIO_T4: 152 PDBG("%s T4 device\n", __FUNCTION__); 153 context->ibv_ctx.ops.async_event = c4iw_async_event; 154 context->ibv_ctx.ops.post_send = c4iw_post_send; 155 context->ibv_ctx.ops.post_recv = c4iw_post_receive; 156 context->ibv_ctx.ops.poll_cq = c4iw_poll_cq; 157 context->ibv_ctx.ops.req_notify_cq = c4iw_arm_cq; 158 break; 159 default: 160 PDBG("%s unknown hca type %d\n", __FUNCTION__, 161 rhp->chip_version); 162 goto err_unmap; 163 break; 164 } 165 166 if (!rhp->mmid2ptr) { 167 int ret; 168 169 ret = ibv_cmd_query_device(&context->ibv_ctx, &attr, &raw_fw_ver, &qcmd, 170 sizeof qcmd); 171 if (ret) 172 goto err_unmap; 173 rhp->max_mr = attr.max_mr; 174 rhp->mmid2ptr = calloc(attr.max_mr, sizeof(void *)); 175 if (!rhp->mmid2ptr) { 176 goto err_unmap; 177 } 178 if (rhp->abi_version < 3) { 179 fprintf(stderr, "Warning: iw_cxgb4 driver is of older version" 180 " than libcxgb4:: %d\n", rhp->abi_version); 181 rhp->max_qp = T4_QID_BASE + attr.max_qp; 182 } else { 183 rhp->max_qp = context->status_page->qp_start + 184 context->status_page->qp_size; 185 } 186 rhp->qpid2ptr = calloc(rhp->max_qp, sizeof(void *)); 187 if (!rhp->qpid2ptr) { 188 goto err_unmap; 189 } 190 if (rhp->abi_version < 3) 191 rhp->max_cq = T4_QID_BASE + attr.max_cq; 192 else 193 rhp->max_cq = context->status_page->cq_start + 194 context->status_page->cq_size; 195 rhp->cqid2ptr = calloc(rhp->max_cq, sizeof(void *)); 196 if (!rhp->cqid2ptr) 197 goto err_unmap; 198 199 /* Disable userspace WC if architecture/adapter does not 200 * support WC. 201 * Note: To forcefully disable WC in kernel driver use the 202 * loader tunable "hw.cxl.write_combine=0" 203 */ 204 if (t5_en_wc && !context->status_page->wc_supported) { 205 fprintf(stderr, "iw_cxgb4 driver doesn't support Write " 206 "Combine, so regular DB writes will be used\n"); 207 t5_en_wc = 0; 208 } 209 } 210 211 return &context->ibv_ctx; 212 213 err_unmap: 214 munmap(context->status_page, context->status_page_size); 215 err_free: 216 if (rhp->cqid2ptr) 217 free(rhp->cqid2ptr); 218 if (rhp->qpid2ptr) 219 free(rhp->cqid2ptr); 220 if (rhp->mmid2ptr) 221 free(rhp->cqid2ptr); 222 free(context); 223 return NULL; 224 } 225 226 static void c4iw_free_context(struct ibv_context *ibctx) 227 { 228 struct c4iw_context *context = to_c4iw_context(ibctx); 229 230 if (context->status_page_size) 231 munmap(context->status_page, context->status_page_size); 232 free(context); 233 } 234 235 static struct verbs_device_ops c4iw_dev_ops = { 236 .alloc_context = c4iw_alloc_context, 237 .free_context = c4iw_free_context 238 }; 239 240 #ifdef STALL_DETECTION 241 242 int stall_to; 243 244 static void dump_cq(struct c4iw_cq *chp) 245 { 246 int i; 247 248 fprintf(stderr, 249 "CQ: %p id %u queue %p cidx 0x%08x sw_queue %p sw_cidx %d sw_pidx %d sw_in_use %d depth %u error %u gen %d " 250 "cidx_inc %d bits_type_ts %016" PRIx64 " notempty %d\n", chp, 251 chp->cq.cqid, chp->cq.queue, chp->cq.cidx, 252 chp->cq.sw_queue, chp->cq.sw_cidx, chp->cq.sw_pidx, chp->cq.sw_in_use, 253 chp->cq.size, chp->cq.error, chp->cq.gen, chp->cq.cidx_inc, be64toh(chp->cq.bits_type_ts), 254 t4_cq_notempty(&chp->cq)); 255 256 for (i=0; i < chp->cq.size; i++) { 257 u64 *p = (u64 *)(chp->cq.queue + i); 258 259 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64, i, be64toh(p[0]), be64toh(p[1])); 260 if (i == chp->cq.cidx) 261 fprintf(stderr, " <-- cidx\n"); 262 else 263 fprintf(stderr, "\n"); 264 p+= 2; 265 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1])); 266 p+= 2; 267 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1])); 268 p+= 2; 269 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1])); 270 p+= 2; 271 } 272 } 273 274 static void dump_qp(struct c4iw_qp *qhp) 275 { 276 int i; 277 int j; 278 struct t4_swsqe *swsqe; 279 struct t4_swrqe *swrqe; 280 u16 cidx, pidx; 281 u64 *p; 282 283 fprintf(stderr, 284 "QP: %p id %u error %d flushed %d qid_mask 0x%x\n" 285 " SQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u wq_pidx %u depth %u flags 0x%x flush_cidx %d\n" 286 " RQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u depth %u\n", 287 qhp, 288 qhp->wq.sq.qid, 289 qhp->wq.error, 290 qhp->wq.flushed, 291 qhp->wq.qid_mask, 292 qhp->wq.sq.qid, 293 qhp->wq.sq.queue, 294 qhp->wq.sq.sw_sq, 295 qhp->wq.sq.cidx, 296 qhp->wq.sq.pidx, 297 qhp->wq.sq.in_use, 298 qhp->wq.sq.wq_pidx, 299 qhp->wq.sq.size, 300 qhp->wq.sq.flags, 301 qhp->wq.sq.flush_cidx, 302 qhp->wq.rq.qid, 303 qhp->wq.rq.queue, 304 qhp->wq.rq.sw_rq, 305 qhp->wq.rq.cidx, 306 qhp->wq.rq.pidx, 307 qhp->wq.rq.in_use, 308 qhp->wq.rq.size); 309 cidx = qhp->wq.sq.cidx; 310 pidx = qhp->wq.sq.pidx; 311 if (cidx != pidx) 312 fprintf(stderr, "SQ: \n"); 313 while (cidx != pidx) { 314 swsqe = &qhp->wq.sq.sw_sq[cidx]; 315 fprintf(stderr, "%04u: wr_id %016" PRIx64 316 " sq_wptr %08x read_len %u opcode 0x%x " 317 "complete %u signaled %u cqe %016" PRIx64 " %016" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n", 318 cidx, 319 swsqe->wr_id, 320 swsqe->idx, 321 swsqe->read_len, 322 swsqe->opcode, 323 swsqe->complete, 324 swsqe->signaled, 325 htobe64(((uint64_t *)&swsqe->cqe)[0]), 326 htobe64(((uint64_t *)&swsqe->cqe)[1]), 327 htobe64(((uint64_t *)&swsqe->cqe)[2]), 328 htobe64(((uint64_t *)&swsqe->cqe)[3])); 329 if (++cidx == qhp->wq.sq.size) 330 cidx = 0; 331 } 332 333 fprintf(stderr, "SQ WQ: \n"); 334 p = (u64 *)qhp->wq.sq.queue; 335 for (i=0; i < qhp->wq.sq.size * T4_SQ_NUM_SLOTS; i++) { 336 for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) { 337 fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ", 338 i, be64toh(p[0]), be64toh(p[1])); 339 if (j == 0 && i == qhp->wq.sq.wq_pidx) 340 fprintf(stderr, " <-- pidx"); 341 fprintf(stderr, "\n"); 342 p += 2; 343 } 344 } 345 cidx = qhp->wq.rq.cidx; 346 pidx = qhp->wq.rq.pidx; 347 if (cidx != pidx) 348 fprintf(stderr, "RQ: \n"); 349 while (cidx != pidx) { 350 swrqe = &qhp->wq.rq.sw_rq[cidx]; 351 fprintf(stderr, "%04u: wr_id %016" PRIx64 "\n", 352 cidx, 353 swrqe->wr_id ); 354 if (++cidx == qhp->wq.rq.size) 355 cidx = 0; 356 } 357 358 fprintf(stderr, "RQ WQ: \n"); 359 p = (u64 *)qhp->wq.rq.queue; 360 for (i=0; i < qhp->wq.rq.size * T4_RQ_NUM_SLOTS; i++) { 361 for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) { 362 fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ", 363 i, be64toh(p[0]), be64toh(p[1])); 364 if (j == 0 && i == qhp->wq.rq.pidx) 365 fprintf(stderr, " <-- pidx"); 366 if (j == 0 && i == qhp->wq.rq.cidx) 367 fprintf(stderr, " <-- cidx"); 368 fprintf(stderr, "\n"); 369 p+=2; 370 } 371 } 372 } 373 374 void dump_state(void) 375 { 376 struct c4iw_dev *dev; 377 int i; 378 379 fprintf(stderr, "STALL DETECTED:\n"); 380 TAILQ_FOREACH(dev, &devices, list) { 381 //pthread_spin_lock(&dev->lock); 382 fprintf(stderr, "Device %s\n", dev->ibv_dev.name); 383 for (i=0; i < dev->max_cq; i++) { 384 if (dev->cqid2ptr[i]) { 385 struct c4iw_cq *chp = dev->cqid2ptr[i]; 386 //pthread_spin_lock(&chp->lock); 387 dump_cq(chp); 388 //pthread_spin_unlock(&chp->lock); 389 } 390 } 391 for (i=0; i < dev->max_qp; i++) { 392 if (dev->qpid2ptr[i]) { 393 struct c4iw_qp *qhp = dev->qpid2ptr[i]; 394 //pthread_spin_lock(&qhp->lock); 395 dump_qp(qhp); 396 //pthread_spin_unlock(&qhp->lock); 397 } 398 } 399 //pthread_spin_unlock(&dev->lock); 400 } 401 fprintf(stderr, "DUMP COMPLETE:\n"); 402 fflush(stderr); 403 } 404 #endif /* end of STALL_DETECTION */ 405 406 /* 407 * c4iw_abi_version is used to store ABI for iw_cxgb4 so the user mode library 408 * can know if the driver supports the kernel mode db ringing. 409 */ 410 int c4iw_abi_version = 1; 411 412 static struct verbs_device *cxgb4_driver_init(const char *uverbs_sys_path, 413 int abi_version) 414 { 415 char devstr[IBV_SYSFS_PATH_MAX], ibdev[16], value[128], *cp; 416 char dev_str[IBV_SYSFS_PATH_MAX]; 417 struct c4iw_dev *dev; 418 unsigned vendor, device, fw_maj, fw_min; 419 int i; 420 char devnum; 421 char ib_param[16]; 422 423 #ifndef __linux__ 424 if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev", 425 ibdev, sizeof ibdev) < 0) 426 return NULL; 427 428 devnum = atoi(&ibdev[5]); 429 430 if (ibdev[0] == 't' && ibdev[1] >= '4' && ibdev[1] <= '6' && 431 strstr(&ibdev[2], "nex") && devnum >= 0) { 432 snprintf(dev_str, sizeof(dev_str), "/dev/t%cnex/%d", ibdev[1], 433 devnum); 434 } else 435 return NULL; 436 437 if (ibv_read_sysfs_file(dev_str, "\%pnpinfo", value, sizeof value) < 0) 438 return NULL; 439 else { 440 if (strstr(value, "vendor=")) { 441 strncpy(ib_param, strstr(value, "vendor=") + 442 strlen("vendor="), 6); 443 sscanf(ib_param, "%i", &vendor); 444 } 445 446 if (strstr(value, "device=")) { 447 strncpy(ib_param, strstr(value, "device=") + 448 strlen("device="), 6); 449 sscanf(ib_param, "%i", &device); 450 } 451 } 452 #else 453 if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", 454 value, sizeof value) < 0) 455 return NULL; 456 sscanf(value, "%i", &vendor); 457 458 if (ibv_read_sysfs_file(uverbs_sys_path, "device/device", 459 value, sizeof value) < 0) 460 return NULL; 461 sscanf(value, "%i", &device); 462 #endif 463 464 for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i) 465 if (vendor == hca_table[i].vendor && 466 device == hca_table[i].device) 467 goto found; 468 469 return NULL; 470 471 found: 472 c4iw_abi_version = abi_version; 473 474 #ifndef __linux__ 475 if (ibv_read_sysfs_file(dev_str, "firmware_version", 476 value, sizeof value) < 0) 477 return NULL; 478 #else 479 /* 480 * Verify that the firmware major number matches. Major number 481 * mismatches are fatal. Minor number mismatches are tolerated. 482 */ 483 if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev", 484 ibdev, sizeof ibdev) < 0) 485 return NULL; 486 487 memset(devstr, 0, sizeof devstr); 488 snprintf(devstr, sizeof devstr, "%s/class/infiniband/%s", 489 ibv_get_sysfs_path(), ibdev); 490 if (ibv_read_sysfs_file(devstr, "fw_ver", value, sizeof value) < 0) 491 return NULL; 492 #endif 493 494 cp = strtok(value+1, "."); 495 sscanf(cp, "%i", &fw_maj); 496 cp = strtok(NULL, "."); 497 sscanf(cp, "%i", &fw_min); 498 499 if ((signed int)fw_maj < FW_MAJ) { 500 fprintf(stderr, "libcxgb4: Fatal firmware version mismatch. " 501 "Firmware major number is %u and libcxgb4 needs %u.\n", 502 fw_maj, FW_MAJ); 503 fflush(stderr); 504 return NULL; 505 } 506 507 DBGLOG("libcxgb4"); 508 509 if ((signed int)fw_min < FW_MIN) { 510 PDBG("libcxgb4: non-fatal firmware version mismatch. " 511 "Firmware minor number is %u and libcxgb4 needs %u.\n", 512 fw_min, FW_MIN); 513 fflush(stderr); 514 } 515 516 PDBG("%s found vendor %d device %d type %d\n", 517 __FUNCTION__, vendor, device, CHELSIO_CHIP_VERSION(hca_table[i].device >> 8)); 518 519 dev = calloc(1, sizeof *dev); 520 if (!dev) { 521 return NULL; 522 } 523 524 pthread_spin_init(&dev->lock, PTHREAD_PROCESS_PRIVATE); 525 dev->ibv_dev.ops = &c4iw_dev_ops; 526 dev->chip_version = CHELSIO_CHIP_VERSION(hca_table[i].device >> 8); 527 dev->abi_version = abi_version; 528 529 PDBG("%s device claimed\n", __FUNCTION__); 530 TAILQ_INSERT_TAIL(&devices, dev, list); 531 #ifdef STALL_DETECTION 532 { 533 char *c = getenv("CXGB4_STALL_TIMEOUT"); 534 if (c) { 535 stall_to = strtol(c, NULL, 0); 536 if (errno || stall_to < 0) 537 stall_to = 0; 538 } 539 } 540 #endif 541 { 542 char *c = getenv("CXGB4_MA_WR"); 543 if (c) { 544 ma_wr = strtol(c, NULL, 0); 545 if (ma_wr != 1) 546 ma_wr = 0; 547 } 548 } 549 { 550 char *c = getenv("T5_ENABLE_WC"); 551 if (c) { 552 t5_en_wc = strtol(c, NULL, 0); 553 if (t5_en_wc != 1) 554 t5_en_wc = 0; 555 } 556 } 557 558 return &dev->ibv_dev; 559 } 560 561 static __attribute__((constructor)) void cxgb4_register_driver(void) 562 { 563 c4iw_page_size = sysconf(_SC_PAGESIZE); 564 c4iw_page_shift = long_log2(c4iw_page_size); 565 c4iw_page_mask = ~(c4iw_page_size - 1); 566 verbs_register_driver("cxgb4", cxgb4_driver_init); 567 } 568 569 #ifdef STATS 570 void __attribute__ ((destructor)) cs_fini(void); 571 void __attribute__ ((destructor)) cs_fini(void) 572 { 573 syslog(LOG_NOTICE, "cxgb4 stats - sends %lu recv %lu read %lu " 574 "write %lu arm %lu cqe %lu mr %lu qp %lu cq %lu\n", 575 c4iw_stats.send, c4iw_stats.recv, c4iw_stats.read, 576 c4iw_stats.write, c4iw_stats.arm, c4iw_stats.cqe, 577 c4iw_stats.mr, c4iw_stats.qp, c4iw_stats.cq); 578 } 579 #endif 580