1 /* 2 * Copyright (c) 2006-2016 Chelsio, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 #include <config.h> 33 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <unistd.h> 37 #include <errno.h> 38 #include <sys/mman.h> 39 #include <pthread.h> 40 #include <string.h> 41 #include <signal.h> 42 #include <stdbool.h> 43 44 #include "libcxgb4.h" 45 #include "cxgb4-abi.h" 46 47 #define PCI_VENDOR_ID_CHELSIO 0x1425 48 49 /* 50 * Macros needed to support the PCI Device ID Table ... 51 */ 52 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \ 53 static struct { \ 54 unsigned vendor; \ 55 unsigned device; \ 56 } hca_table[] = { 57 58 #define CH_PCI_DEVICE_ID_FUNCTION \ 59 0x4 60 61 #define CH_PCI_ID_TABLE_ENTRY(__DeviceID) \ 62 { \ 63 .vendor = PCI_VENDOR_ID_CHELSIO, \ 64 .device = (__DeviceID), \ 65 } 66 67 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END \ 68 } 69 70 #include "t4_chip_type.h" 71 #include "t4_pci_id_tbl.h" 72 73 unsigned long c4iw_page_size; 74 unsigned long c4iw_page_shift; 75 unsigned long c4iw_page_mask; 76 int ma_wr; 77 int t5_en_wc = 1; 78 79 static TAILQ_HEAD(,c4iw_dev) devices = TAILQ_HEAD_INITIALIZER(devices); 80 81 static struct ibv_context_ops c4iw_ctx_ops = { 82 .query_device = c4iw_query_device, 83 .query_port = c4iw_query_port, 84 .alloc_pd = c4iw_alloc_pd, 85 .dealloc_pd = c4iw_free_pd, 86 .reg_mr = c4iw_reg_mr, 87 .dereg_mr = c4iw_dereg_mr, 88 .create_cq = c4iw_create_cq, 89 .resize_cq = c4iw_resize_cq, 90 .destroy_cq = c4iw_destroy_cq, 91 .create_srq = c4iw_create_srq, 92 .modify_srq = c4iw_modify_srq, 93 .destroy_srq = c4iw_destroy_srq, 94 .create_qp = c4iw_create_qp, 95 .modify_qp = c4iw_modify_qp, 96 .destroy_qp = c4iw_destroy_qp, 97 .query_qp = c4iw_query_qp, 98 .create_ah = c4iw_create_ah, 99 .destroy_ah = c4iw_destroy_ah, 100 .attach_mcast = c4iw_attach_mcast, 101 .detach_mcast = c4iw_detach_mcast, 102 .post_srq_recv = c4iw_post_srq_recv, 103 .req_notify_cq = c4iw_arm_cq, 104 }; 105 106 static struct ibv_context *c4iw_alloc_context(struct ibv_device *ibdev, 107 int cmd_fd) 108 { 109 struct c4iw_context *context; 110 struct ibv_get_context cmd; 111 struct c4iw_alloc_ucontext_resp resp; 112 struct c4iw_dev *rhp = to_c4iw_dev(ibdev); 113 struct ibv_query_device qcmd; 114 uint64_t raw_fw_ver; 115 struct ibv_device_attr attr; 116 117 context = malloc(sizeof *context); 118 if (!context) 119 return NULL; 120 121 memset(context, 0, sizeof *context); 122 context->ibv_ctx.cmd_fd = cmd_fd; 123 124 resp.status_page_size = 0; 125 resp.reserved = 0; 126 if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd, 127 &resp.ibv_resp, sizeof resp)) 128 goto err_free; 129 130 if (resp.reserved) 131 PDBG("%s c4iw_alloc_ucontext_resp reserved field modified by kernel\n", 132 __FUNCTION__); 133 134 context->status_page_size = resp.status_page_size; 135 if (resp.status_page_size) { 136 context->status_page = mmap(NULL, resp.status_page_size, 137 PROT_READ, MAP_SHARED, cmd_fd, 138 resp.status_page_key); 139 if (context->status_page == MAP_FAILED) 140 goto err_free; 141 } 142 143 context->ibv_ctx.device = ibdev; 144 context->ibv_ctx.ops = c4iw_ctx_ops; 145 146 switch (rhp->chip_version) { 147 case CHELSIO_T6: 148 PDBG("%s T6/T5/T4 device\n", __FUNCTION__); 149 case CHELSIO_T5: 150 PDBG("%s T5/T4 device\n", __FUNCTION__); 151 case CHELSIO_T4: 152 PDBG("%s T4 device\n", __FUNCTION__); 153 context->ibv_ctx.ops.async_event = c4iw_async_event; 154 context->ibv_ctx.ops.post_send = c4iw_post_send; 155 context->ibv_ctx.ops.post_recv = c4iw_post_receive; 156 context->ibv_ctx.ops.poll_cq = c4iw_poll_cq; 157 context->ibv_ctx.ops.req_notify_cq = c4iw_arm_cq; 158 break; 159 default: 160 PDBG("%s unknown hca type %d\n", __FUNCTION__, 161 rhp->chip_version); 162 goto err_unmap; 163 break; 164 } 165 166 if (!rhp->mmid2ptr) { 167 int ret; 168 169 ret = ibv_cmd_query_device(&context->ibv_ctx, &attr, &raw_fw_ver, &qcmd, 170 sizeof qcmd); 171 if (ret) 172 goto err_unmap; 173 rhp->max_mr = attr.max_mr; 174 rhp->mmid2ptr = calloc(attr.max_mr, sizeof(void *)); 175 if (!rhp->mmid2ptr) { 176 goto err_unmap; 177 } 178 if (rhp->abi_version < 3) { 179 fprintf(stderr, "Warning: iw_cxgb4 driver is of older version" 180 " than libcxgb4:: %d\n", rhp->abi_version); 181 rhp->max_qp = T4_QID_BASE + attr.max_qp; 182 } else { 183 rhp->max_qp = context->status_page->qp_start + 184 context->status_page->qp_size; 185 } 186 rhp->qpid2ptr = calloc(rhp->max_qp, sizeof(void *)); 187 if (!rhp->qpid2ptr) { 188 goto err_unmap; 189 } 190 if (rhp->abi_version < 3) 191 rhp->max_cq = T4_QID_BASE + attr.max_cq; 192 else 193 rhp->max_cq = context->status_page->cq_start + 194 context->status_page->cq_size; 195 rhp->cqid2ptr = calloc(rhp->max_cq, sizeof(void *)); 196 if (!rhp->cqid2ptr) 197 goto err_unmap; 198 199 /* Disable userspace WC if architecture/adapter does not 200 * support WC. 201 * Note: To forcefully disable WC in kernel driver use the 202 * loader tunable "hw.cxl.write_combine=0" 203 */ 204 if (t5_en_wc && !context->status_page->wc_supported) { 205 t5_en_wc = 0; 206 } 207 } 208 209 return &context->ibv_ctx; 210 211 err_unmap: 212 munmap(context->status_page, context->status_page_size); 213 err_free: 214 if (rhp->cqid2ptr) 215 free(rhp->cqid2ptr); 216 if (rhp->qpid2ptr) 217 free(rhp->cqid2ptr); 218 if (rhp->mmid2ptr) 219 free(rhp->cqid2ptr); 220 free(context); 221 return NULL; 222 } 223 224 static void c4iw_free_context(struct ibv_context *ibctx) 225 { 226 struct c4iw_context *context = to_c4iw_context(ibctx); 227 228 if (context->status_page_size) 229 munmap(context->status_page, context->status_page_size); 230 free(context); 231 } 232 233 static struct verbs_device_ops c4iw_dev_ops = { 234 .alloc_context = c4iw_alloc_context, 235 .free_context = c4iw_free_context 236 }; 237 238 #ifdef STALL_DETECTION 239 240 int stall_to; 241 242 static void dump_cq(struct c4iw_cq *chp) 243 { 244 int i; 245 246 fprintf(stderr, 247 "CQ: %p id %u queue %p cidx 0x%08x sw_queue %p sw_cidx %d sw_pidx %d sw_in_use %d depth %u error %u gen %d " 248 "cidx_inc %d bits_type_ts %016" PRIx64 " notempty %d\n", chp, 249 chp->cq.cqid, chp->cq.queue, chp->cq.cidx, 250 chp->cq.sw_queue, chp->cq.sw_cidx, chp->cq.sw_pidx, chp->cq.sw_in_use, 251 chp->cq.size, chp->cq.error, chp->cq.gen, chp->cq.cidx_inc, be64toh(chp->cq.bits_type_ts), 252 t4_cq_notempty(&chp->cq)); 253 254 for (i=0; i < chp->cq.size; i++) { 255 u64 *p = (u64 *)(chp->cq.queue + i); 256 257 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64, i, be64toh(p[0]), be64toh(p[1])); 258 if (i == chp->cq.cidx) 259 fprintf(stderr, " <-- cidx\n"); 260 else 261 fprintf(stderr, "\n"); 262 p+= 2; 263 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1])); 264 p+= 2; 265 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1])); 266 p+= 2; 267 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1])); 268 p+= 2; 269 } 270 } 271 272 static void dump_qp(struct c4iw_qp *qhp) 273 { 274 int i; 275 int j; 276 struct t4_swsqe *swsqe; 277 struct t4_swrqe *swrqe; 278 u16 cidx, pidx; 279 u64 *p; 280 281 fprintf(stderr, 282 "QP: %p id %u error %d flushed %d qid_mask 0x%x\n" 283 " SQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u wq_pidx %u depth %u flags 0x%x flush_cidx %d\n" 284 " RQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u depth %u\n", 285 qhp, 286 qhp->wq.sq.qid, 287 qhp->wq.error, 288 qhp->wq.flushed, 289 qhp->wq.qid_mask, 290 qhp->wq.sq.qid, 291 qhp->wq.sq.queue, 292 qhp->wq.sq.sw_sq, 293 qhp->wq.sq.cidx, 294 qhp->wq.sq.pidx, 295 qhp->wq.sq.in_use, 296 qhp->wq.sq.wq_pidx, 297 qhp->wq.sq.size, 298 qhp->wq.sq.flags, 299 qhp->wq.sq.flush_cidx, 300 qhp->wq.rq.qid, 301 qhp->wq.rq.queue, 302 qhp->wq.rq.sw_rq, 303 qhp->wq.rq.cidx, 304 qhp->wq.rq.pidx, 305 qhp->wq.rq.in_use, 306 qhp->wq.rq.size); 307 cidx = qhp->wq.sq.cidx; 308 pidx = qhp->wq.sq.pidx; 309 if (cidx != pidx) 310 fprintf(stderr, "SQ: \n"); 311 while (cidx != pidx) { 312 swsqe = &qhp->wq.sq.sw_sq[cidx]; 313 fprintf(stderr, "%04u: wr_id %016" PRIx64 314 " sq_wptr %08x read_len %u opcode 0x%x " 315 "complete %u signaled %u cqe %016" PRIx64 " %016" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n", 316 cidx, 317 swsqe->wr_id, 318 swsqe->idx, 319 swsqe->read_len, 320 swsqe->opcode, 321 swsqe->complete, 322 swsqe->signaled, 323 htobe64(((uint64_t *)&swsqe->cqe)[0]), 324 htobe64(((uint64_t *)&swsqe->cqe)[1]), 325 htobe64(((uint64_t *)&swsqe->cqe)[2]), 326 htobe64(((uint64_t *)&swsqe->cqe)[3])); 327 if (++cidx == qhp->wq.sq.size) 328 cidx = 0; 329 } 330 331 fprintf(stderr, "SQ WQ: \n"); 332 p = (u64 *)qhp->wq.sq.queue; 333 for (i=0; i < qhp->wq.sq.size * T4_SQ_NUM_SLOTS; i++) { 334 for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) { 335 fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ", 336 i, be64toh(p[0]), be64toh(p[1])); 337 if (j == 0 && i == qhp->wq.sq.wq_pidx) 338 fprintf(stderr, " <-- pidx"); 339 fprintf(stderr, "\n"); 340 p += 2; 341 } 342 } 343 cidx = qhp->wq.rq.cidx; 344 pidx = qhp->wq.rq.pidx; 345 if (cidx != pidx) 346 fprintf(stderr, "RQ: \n"); 347 while (cidx != pidx) { 348 swrqe = &qhp->wq.rq.sw_rq[cidx]; 349 fprintf(stderr, "%04u: wr_id %016" PRIx64 "\n", 350 cidx, 351 swrqe->wr_id ); 352 if (++cidx == qhp->wq.rq.size) 353 cidx = 0; 354 } 355 356 fprintf(stderr, "RQ WQ: \n"); 357 p = (u64 *)qhp->wq.rq.queue; 358 for (i=0; i < qhp->wq.rq.size * T4_RQ_NUM_SLOTS; i++) { 359 for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) { 360 fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ", 361 i, be64toh(p[0]), be64toh(p[1])); 362 if (j == 0 && i == qhp->wq.rq.pidx) 363 fprintf(stderr, " <-- pidx"); 364 if (j == 0 && i == qhp->wq.rq.cidx) 365 fprintf(stderr, " <-- cidx"); 366 fprintf(stderr, "\n"); 367 p+=2; 368 } 369 } 370 } 371 372 void dump_state(void) 373 { 374 struct c4iw_dev *dev; 375 int i; 376 377 fprintf(stderr, "STALL DETECTED:\n"); 378 TAILQ_FOREACH(dev, &devices, list) { 379 //pthread_spin_lock(&dev->lock); 380 fprintf(stderr, "Device %s\n", dev->ibv_dev.name); 381 for (i=0; i < dev->max_cq; i++) { 382 if (dev->cqid2ptr[i]) { 383 struct c4iw_cq *chp = dev->cqid2ptr[i]; 384 //pthread_spin_lock(&chp->lock); 385 dump_cq(chp); 386 //pthread_spin_unlock(&chp->lock); 387 } 388 } 389 for (i=0; i < dev->max_qp; i++) { 390 if (dev->qpid2ptr[i]) { 391 struct c4iw_qp *qhp = dev->qpid2ptr[i]; 392 //pthread_spin_lock(&qhp->lock); 393 dump_qp(qhp); 394 //pthread_spin_unlock(&qhp->lock); 395 } 396 } 397 //pthread_spin_unlock(&dev->lock); 398 } 399 fprintf(stderr, "DUMP COMPLETE:\n"); 400 fflush(stderr); 401 } 402 #endif /* end of STALL_DETECTION */ 403 404 /* 405 * c4iw_abi_version is used to store ABI for iw_cxgb4 so the user mode library 406 * can know if the driver supports the kernel mode db ringing. 407 */ 408 int c4iw_abi_version = 1; 409 410 static struct verbs_device *cxgb4_driver_init(const char *uverbs_sys_path, 411 int abi_version) 412 { 413 char devstr[IBV_SYSFS_PATH_MAX], ibdev[16], value[128], *cp; 414 char dev_str[IBV_SYSFS_PATH_MAX]; 415 struct c4iw_dev *dev; 416 unsigned vendor, device, fw_maj, fw_min; 417 int i; 418 char devnum; 419 char ib_param[16]; 420 421 #ifndef __linux__ 422 if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev", 423 ibdev, sizeof ibdev) < 0) 424 return NULL; 425 426 devnum = atoi(&ibdev[5]); 427 428 if (ibdev[0] == 't' && ibdev[1] >= '4' && ibdev[1] <= '6' && 429 strstr(&ibdev[2], "nex") && devnum >= 0) { 430 snprintf(dev_str, sizeof(dev_str), "/dev/t%cnex/%d", ibdev[1], 431 devnum); 432 } else 433 return NULL; 434 435 if (ibv_read_sysfs_file(dev_str, "\%pnpinfo", value, sizeof value) < 0) 436 return NULL; 437 else { 438 if (strstr(value, "vendor=")) { 439 strncpy(ib_param, strstr(value, "vendor=") + 440 strlen("vendor="), 6); 441 sscanf(ib_param, "%i", &vendor); 442 } 443 444 if (strstr(value, "device=")) { 445 strncpy(ib_param, strstr(value, "device=") + 446 strlen("device="), 6); 447 sscanf(ib_param, "%i", &device); 448 } 449 } 450 #else 451 if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", 452 value, sizeof value) < 0) 453 return NULL; 454 sscanf(value, "%i", &vendor); 455 456 if (ibv_read_sysfs_file(uverbs_sys_path, "device/device", 457 value, sizeof value) < 0) 458 return NULL; 459 sscanf(value, "%i", &device); 460 #endif 461 462 for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i) 463 if (vendor == hca_table[i].vendor && 464 device == hca_table[i].device) 465 goto found; 466 467 return NULL; 468 469 found: 470 c4iw_abi_version = abi_version; 471 472 #ifndef __linux__ 473 if (ibv_read_sysfs_file(dev_str, "firmware_version", 474 value, sizeof value) < 0) 475 return NULL; 476 #else 477 /* 478 * Verify that the firmware major number matches. Major number 479 * mismatches are fatal. Minor number mismatches are tolerated. 480 */ 481 if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev", 482 ibdev, sizeof ibdev) < 0) 483 return NULL; 484 485 memset(devstr, 0, sizeof devstr); 486 snprintf(devstr, sizeof devstr, "%s/class/infiniband/%s", 487 ibv_get_sysfs_path(), ibdev); 488 if (ibv_read_sysfs_file(devstr, "fw_ver", value, sizeof value) < 0) 489 return NULL; 490 #endif 491 492 cp = strtok(value+1, "."); 493 sscanf(cp, "%i", &fw_maj); 494 cp = strtok(NULL, "."); 495 sscanf(cp, "%i", &fw_min); 496 497 if ((signed int)fw_maj < FW_MAJ) { 498 fprintf(stderr, "libcxgb4: Fatal firmware version mismatch. " 499 "Firmware major number is %u and libcxgb4 needs %u.\n", 500 fw_maj, FW_MAJ); 501 fflush(stderr); 502 return NULL; 503 } 504 505 DBGLOG("libcxgb4"); 506 507 if ((signed int)fw_min < FW_MIN) { 508 PDBG("libcxgb4: non-fatal firmware version mismatch. " 509 "Firmware minor number is %u and libcxgb4 needs %u.\n", 510 fw_min, FW_MIN); 511 fflush(stderr); 512 } 513 514 PDBG("%s found vendor %d device %d type %d\n", 515 __FUNCTION__, vendor, device, CHELSIO_CHIP_VERSION(hca_table[i].device >> 8)); 516 517 dev = calloc(1, sizeof *dev); 518 if (!dev) { 519 return NULL; 520 } 521 522 pthread_spin_init(&dev->lock, PTHREAD_PROCESS_PRIVATE); 523 dev->ibv_dev.ops = &c4iw_dev_ops; 524 dev->chip_version = CHELSIO_CHIP_VERSION(hca_table[i].device >> 8); 525 dev->abi_version = abi_version; 526 527 PDBG("%s device claimed\n", __FUNCTION__); 528 TAILQ_INSERT_TAIL(&devices, dev, list); 529 #ifdef STALL_DETECTION 530 { 531 char *c = getenv("CXGB4_STALL_TIMEOUT"); 532 if (c) { 533 stall_to = strtol(c, NULL, 0); 534 if (errno || stall_to < 0) 535 stall_to = 0; 536 } 537 } 538 #endif 539 { 540 char *c = getenv("CXGB4_MA_WR"); 541 if (c) { 542 ma_wr = strtol(c, NULL, 0); 543 if (ma_wr != 1) 544 ma_wr = 0; 545 } 546 } 547 { 548 char *c = getenv("T5_ENABLE_WC"); 549 if (c) { 550 t5_en_wc = strtol(c, NULL, 0); 551 if (t5_en_wc != 1) 552 t5_en_wc = 0; 553 } 554 } 555 556 return &dev->ibv_dev; 557 } 558 559 static __attribute__((constructor)) void cxgb4_register_driver(void) 560 { 561 c4iw_page_size = sysconf(_SC_PAGESIZE); 562 c4iw_page_shift = long_log2(c4iw_page_size); 563 c4iw_page_mask = ~(c4iw_page_size - 1); 564 verbs_register_driver("cxgb4", cxgb4_driver_init); 565 } 566 567 #ifdef STATS 568 void __attribute__ ((destructor)) cs_fini(void); 569 void __attribute__ ((destructor)) cs_fini(void) 570 { 571 syslog(LOG_NOTICE, "cxgb4 stats - sends %lu recv %lu read %lu " 572 "write %lu arm %lu cqe %lu mr %lu qp %lu cq %lu\n", 573 c4iw_stats.send, c4iw_stats.recv, c4iw_stats.read, 574 c4iw_stats.write, c4iw_stats.arm, c4iw_stats.cqe, 575 c4iw_stats.mr, c4iw_stats.qp, c4iw_stats.cq); 576 } 577 #endif 578