1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 3 * 4 * Copyright (c) 2004 Topspin Communications. All rights reserved. 5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 6 * Copyright (c) 2004 Voltaire, Inc. All rights reserved. 7 * 8 * This software is available to you under a choice of one of two 9 * licenses. You may choose to be licensed under the terms of the GNU 10 * General Public License (GPL) Version 2, available from the file 11 * COPYING in the main directory of this source tree, or the 12 * OpenIB.org BSD license below: 13 * 14 * Redistribution and use in source and binary forms, with or 15 * without modification, are permitted provided that the following 16 * conditions are met: 17 * 18 * - Redistributions of source code must retain the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer. 21 * 22 * - Redistributions in binary form must reproduce the above 23 * copyright notice, this list of conditions and the following 24 * disclaimer in the documentation and/or other materials 25 * provided with the distribution. 26 * 27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 34 * SOFTWARE. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "ipoib.h" 41 #include <sys/eventhandler.h> 42 43 #include <linux/module.h> 44 45 #include <linux/slab.h> 46 #include <linux/kernel.h> 47 #include <linux/vmalloc.h> 48 49 #include <linux/if_vlan.h> 50 51 #include <net/infiniband.h> 52 53 #include <rdma/ib_addr.h> 54 #include <rdma/ib_cache.h> 55 56 MODULE_AUTHOR("Roland Dreier"); 57 MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); 58 MODULE_LICENSE("Dual BSD/GPL"); 59 60 int ipoib_sendq_size = IPOIB_TX_RING_SIZE; 61 int ipoib_recvq_size = IPOIB_RX_RING_SIZE; 62 63 module_param_named(send_queue_size, ipoib_sendq_size, int, 0444); 64 MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); 65 module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); 66 MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); 67 68 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 69 int ipoib_debug_level = 1; 70 71 module_param_named(debug_level, ipoib_debug_level, int, 0644); 72 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); 73 #endif 74 75 struct ipoib_path_iter { 76 struct ipoib_dev_priv *priv; 77 struct ipoib_path path; 78 }; 79 80 static const u8 ipv4_bcast_addr[] = { 81 0x00, 0xff, 0xff, 0xff, 82 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, 83 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff 84 }; 85 86 struct workqueue_struct *ipoib_workqueue; 87 88 struct ib_sa_client ipoib_sa_client; 89 90 static void ipoib_add_one(struct ib_device *device); 91 static void ipoib_remove_one(struct ib_device *device, void *client_data); 92 static struct net_device *ipoib_get_net_dev_by_params( 93 struct ib_device *dev, u8 port, u16 pkey, 94 const union ib_gid *gid, const struct sockaddr *addr, 95 void *client_data); 96 static void ipoib_start(struct ifnet *dev); 97 static int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data); 98 99 static struct unrhdr *ipoib_unrhdr; 100 101 static void 102 ipoib_unrhdr_init(void *arg) 103 { 104 105 ipoib_unrhdr = new_unrhdr(0, 65535, NULL); 106 } 107 SYSINIT(ipoib_unrhdr_init, SI_SUB_KLD - 1, SI_ORDER_ANY, ipoib_unrhdr_init, NULL); 108 109 static void 110 ipoib_unrhdr_uninit(void *arg) 111 { 112 113 if (ipoib_unrhdr != NULL) { 114 struct unrhdr *hdr; 115 116 hdr = ipoib_unrhdr; 117 ipoib_unrhdr = NULL; 118 119 delete_unrhdr(hdr); 120 } 121 } 122 SYSUNINIT(ipoib_unrhdr_uninit, SI_SUB_KLD - 1, SI_ORDER_ANY, ipoib_unrhdr_uninit, NULL); 123 124 static struct ib_client ipoib_client = { 125 .name = "ipoib", 126 .add = ipoib_add_one, 127 .remove = ipoib_remove_one, 128 .get_net_dev_by_params = ipoib_get_net_dev_by_params, 129 }; 130 131 int 132 ipoib_open(struct ipoib_dev_priv *priv) 133 { 134 struct ifnet *dev = priv->dev; 135 136 ipoib_dbg(priv, "bringing up interface\n"); 137 138 set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 139 140 if (ipoib_pkey_dev_delay_open(priv)) 141 return 0; 142 143 if (ipoib_ib_dev_open(priv)) 144 goto err_disable; 145 146 if (ipoib_ib_dev_up(priv)) 147 goto err_stop; 148 149 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 150 struct ipoib_dev_priv *cpriv; 151 152 /* Bring up any child interfaces too */ 153 mutex_lock(&priv->vlan_mutex); 154 list_for_each_entry(cpriv, &priv->child_intfs, list) 155 if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) == 0) 156 ipoib_open(cpriv); 157 mutex_unlock(&priv->vlan_mutex); 158 } 159 dev->if_drv_flags |= IFF_DRV_RUNNING; 160 dev->if_drv_flags &= ~IFF_DRV_OACTIVE; 161 162 return 0; 163 164 err_stop: 165 ipoib_ib_dev_stop(priv, 1); 166 167 err_disable: 168 clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 169 170 return -EINVAL; 171 } 172 173 static void 174 ipoib_init(void *arg) 175 { 176 struct ifnet *dev; 177 struct ipoib_dev_priv *priv; 178 179 priv = arg; 180 dev = priv->dev; 181 if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) 182 ipoib_open(priv); 183 queue_work(ipoib_workqueue, &priv->flush_light); 184 } 185 186 187 static int 188 ipoib_stop(struct ipoib_dev_priv *priv) 189 { 190 struct ifnet *dev = priv->dev; 191 192 ipoib_dbg(priv, "stopping interface\n"); 193 194 clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 195 196 dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 197 198 ipoib_ib_dev_down(priv, 0); 199 ipoib_ib_dev_stop(priv, 0); 200 201 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 202 struct ipoib_dev_priv *cpriv; 203 204 /* Bring down any child interfaces too */ 205 mutex_lock(&priv->vlan_mutex); 206 list_for_each_entry(cpriv, &priv->child_intfs, list) 207 if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) != 0) 208 ipoib_stop(cpriv); 209 mutex_unlock(&priv->vlan_mutex); 210 } 211 212 return 0; 213 } 214 215 static int 216 ipoib_propagate_ifnet_mtu(struct ipoib_dev_priv *priv, int new_mtu, 217 bool propagate) 218 { 219 struct ifnet *ifp; 220 struct ifreq ifr; 221 int error; 222 223 ifp = priv->dev; 224 if (ifp->if_mtu == new_mtu) 225 return (0); 226 if (propagate) { 227 strlcpy(ifr.ifr_name, if_name(ifp), IFNAMSIZ); 228 ifr.ifr_mtu = new_mtu; 229 CURVNET_SET(ifp->if_vnet); 230 error = ifhwioctl(SIOCSIFMTU, ifp, (caddr_t)&ifr, curthread); 231 CURVNET_RESTORE(); 232 } else { 233 ifp->if_mtu = new_mtu; 234 error = 0; 235 } 236 return (error); 237 } 238 239 int 240 ipoib_change_mtu(struct ipoib_dev_priv *priv, int new_mtu, bool propagate) 241 { 242 int error, prev_admin_mtu; 243 244 /* dev->if_mtu > 2K ==> connected mode */ 245 if (ipoib_cm_admin_enabled(priv)) { 246 if (new_mtu > IPOIB_CM_MTU(ipoib_cm_max_mtu(priv))) 247 return -EINVAL; 248 249 if (new_mtu > priv->mcast_mtu) 250 ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", 251 priv->mcast_mtu); 252 253 return (ipoib_propagate_ifnet_mtu(priv, new_mtu, propagate)); 254 } 255 256 if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) 257 return -EINVAL; 258 259 prev_admin_mtu = priv->admin_mtu; 260 priv->admin_mtu = new_mtu; 261 error = ipoib_propagate_ifnet_mtu(priv, min(priv->mcast_mtu, 262 priv->admin_mtu), propagate); 263 if (error == 0) { 264 /* check for MTU change to avoid infinite loop */ 265 if (prev_admin_mtu != new_mtu) 266 queue_work(ipoib_workqueue, &priv->flush_light); 267 } else 268 priv->admin_mtu = prev_admin_mtu; 269 return (error); 270 } 271 272 static int 273 ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 274 { 275 struct ipoib_dev_priv *priv = ifp->if_softc; 276 struct ifaddr *ifa = (struct ifaddr *) data; 277 struct ifreq *ifr = (struct ifreq *) data; 278 int error = 0; 279 280 /* check if detaching */ 281 if (priv == NULL || priv->gone != 0) 282 return (ENXIO); 283 284 switch (command) { 285 case SIOCSIFFLAGS: 286 if (ifp->if_flags & IFF_UP) { 287 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 288 error = -ipoib_open(priv); 289 } else 290 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 291 ipoib_stop(priv); 292 break; 293 case SIOCADDMULTI: 294 case SIOCDELMULTI: 295 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 296 queue_work(ipoib_workqueue, &priv->restart_task); 297 break; 298 case SIOCSIFADDR: 299 ifp->if_flags |= IFF_UP; 300 301 switch (ifa->ifa_addr->sa_family) { 302 #ifdef INET 303 case AF_INET: 304 ifp->if_init(ifp->if_softc); /* before arpwhohas */ 305 arp_ifinit(ifp, ifa); 306 break; 307 #endif 308 default: 309 ifp->if_init(ifp->if_softc); 310 break; 311 } 312 break; 313 314 case SIOCGIFADDR: 315 bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0], 316 INFINIBAND_ALEN); 317 break; 318 319 case SIOCSIFMTU: 320 /* 321 * Set the interface MTU. 322 */ 323 error = -ipoib_change_mtu(priv, ifr->ifr_mtu, false); 324 break; 325 default: 326 error = EINVAL; 327 break; 328 } 329 return (error); 330 } 331 332 333 static struct ipoib_path * 334 __path_find(struct ipoib_dev_priv *priv, void *gid) 335 { 336 struct rb_node *n = priv->path_tree.rb_node; 337 struct ipoib_path *path; 338 int ret; 339 340 while (n) { 341 path = rb_entry(n, struct ipoib_path, rb_node); 342 343 ret = memcmp(gid, path->pathrec.dgid.raw, 344 sizeof (union ib_gid)); 345 346 if (ret < 0) 347 n = n->rb_left; 348 else if (ret > 0) 349 n = n->rb_right; 350 else 351 return path; 352 } 353 354 return NULL; 355 } 356 357 static int 358 __path_add(struct ipoib_dev_priv *priv, struct ipoib_path *path) 359 { 360 struct rb_node **n = &priv->path_tree.rb_node; 361 struct rb_node *pn = NULL; 362 struct ipoib_path *tpath; 363 int ret; 364 365 while (*n) { 366 pn = *n; 367 tpath = rb_entry(pn, struct ipoib_path, rb_node); 368 369 ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw, 370 sizeof (union ib_gid)); 371 if (ret < 0) 372 n = &pn->rb_left; 373 else if (ret > 0) 374 n = &pn->rb_right; 375 else 376 return -EEXIST; 377 } 378 379 rb_link_node(&path->rb_node, pn, n); 380 rb_insert_color(&path->rb_node, &priv->path_tree); 381 382 list_add_tail(&path->list, &priv->path_list); 383 384 return 0; 385 } 386 387 void 388 ipoib_path_free(struct ipoib_dev_priv *priv, struct ipoib_path *path) 389 { 390 391 _IF_DRAIN(&path->queue); 392 393 if (path->ah) 394 ipoib_put_ah(path->ah); 395 if (ipoib_cm_get(path)) 396 ipoib_cm_destroy_tx(ipoib_cm_get(path)); 397 398 kfree(path); 399 } 400 401 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 402 403 struct ipoib_path_iter * 404 ipoib_path_iter_init(struct ipoib_dev_priv *priv) 405 { 406 struct ipoib_path_iter *iter; 407 408 iter = kmalloc(sizeof *iter, GFP_KERNEL); 409 if (!iter) 410 return NULL; 411 412 iter->priv = priv; 413 memset(iter->path.pathrec.dgid.raw, 0, 16); 414 415 if (ipoib_path_iter_next(iter)) { 416 kfree(iter); 417 return NULL; 418 } 419 420 return iter; 421 } 422 423 int 424 ipoib_path_iter_next(struct ipoib_path_iter *iter) 425 { 426 struct ipoib_dev_priv *priv = iter->priv; 427 struct rb_node *n; 428 struct ipoib_path *path; 429 int ret = 1; 430 431 spin_lock_irq(&priv->lock); 432 433 n = rb_first(&priv->path_tree); 434 435 while (n) { 436 path = rb_entry(n, struct ipoib_path, rb_node); 437 438 if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw, 439 sizeof (union ib_gid)) < 0) { 440 iter->path = *path; 441 ret = 0; 442 break; 443 } 444 445 n = rb_next(n); 446 } 447 448 spin_unlock_irq(&priv->lock); 449 450 return ret; 451 } 452 453 void 454 ipoib_path_iter_read(struct ipoib_path_iter *iter, struct ipoib_path *path) 455 { 456 *path = iter->path; 457 } 458 459 #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ 460 461 void 462 ipoib_mark_paths_invalid(struct ipoib_dev_priv *priv) 463 { 464 struct ipoib_path *path, *tp; 465 466 spin_lock_irq(&priv->lock); 467 468 list_for_each_entry_safe(path, tp, &priv->path_list, list) { 469 ipoib_dbg(priv, "mark path LID 0x%04x GID %16D invalid\n", 470 be16_to_cpu(path->pathrec.dlid), 471 path->pathrec.dgid.raw, ":"); 472 path->valid = 0; 473 } 474 475 spin_unlock_irq(&priv->lock); 476 } 477 478 void 479 ipoib_flush_paths(struct ipoib_dev_priv *priv) 480 { 481 struct ipoib_path *path, *tp; 482 LIST_HEAD(remove_list); 483 unsigned long flags; 484 485 spin_lock_irqsave(&priv->lock, flags); 486 487 list_splice_init(&priv->path_list, &remove_list); 488 489 list_for_each_entry(path, &remove_list, list) 490 rb_erase(&path->rb_node, &priv->path_tree); 491 492 list_for_each_entry_safe(path, tp, &remove_list, list) { 493 if (path->query) 494 ib_sa_cancel_query(path->query_id, path->query); 495 spin_unlock_irqrestore(&priv->lock, flags); 496 wait_for_completion(&path->done); 497 ipoib_path_free(priv, path); 498 spin_lock_irqsave(&priv->lock, flags); 499 } 500 501 spin_unlock_irqrestore(&priv->lock, flags); 502 } 503 504 static void 505 path_rec_completion(int status, struct ib_sa_path_rec *pathrec, void *path_ptr) 506 { 507 struct ipoib_path *path = path_ptr; 508 struct ipoib_dev_priv *priv = path->priv; 509 struct ifnet *dev = priv->dev; 510 struct ipoib_ah *ah = NULL; 511 struct ipoib_ah *old_ah = NULL; 512 struct epoch_tracker et; 513 struct ifqueue mbqueue; 514 struct mbuf *mb; 515 unsigned long flags; 516 517 if (!status) 518 ipoib_dbg(priv, "PathRec LID 0x%04x for GID %16D\n", 519 be16_to_cpu(pathrec->dlid), pathrec->dgid.raw, ":"); 520 else 521 ipoib_dbg(priv, "PathRec status %d for GID %16D\n", 522 status, path->pathrec.dgid.raw, ":"); 523 524 bzero(&mbqueue, sizeof(mbqueue)); 525 526 if (!status) { 527 struct ib_ah_attr av; 528 529 if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av)) 530 ah = ipoib_create_ah(priv, priv->pd, &av); 531 } 532 533 spin_lock_irqsave(&priv->lock, flags); 534 535 if (ah) { 536 path->pathrec = *pathrec; 537 538 old_ah = path->ah; 539 path->ah = ah; 540 541 ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n", 542 ah, be16_to_cpu(pathrec->dlid), pathrec->sl); 543 544 for (;;) { 545 _IF_DEQUEUE(&path->queue, mb); 546 if (mb == NULL) 547 break; 548 _IF_ENQUEUE(&mbqueue, mb); 549 } 550 551 #ifdef CONFIG_INFINIBAND_IPOIB_CM 552 if (ipoib_cm_enabled(priv, path->hwaddr) && !ipoib_cm_get(path)) 553 ipoib_cm_set(path, ipoib_cm_create_tx(priv, path)); 554 #endif 555 556 path->valid = 1; 557 } 558 559 path->query = NULL; 560 complete(&path->done); 561 562 spin_unlock_irqrestore(&priv->lock, flags); 563 564 if (old_ah) 565 ipoib_put_ah(old_ah); 566 567 NET_EPOCH_ENTER(et); 568 for (;;) { 569 _IF_DEQUEUE(&mbqueue, mb); 570 if (mb == NULL) 571 break; 572 mb->m_pkthdr.rcvif = dev; 573 if (dev->if_transmit(dev, mb)) 574 ipoib_warn(priv, "dev_queue_xmit failed " 575 "to requeue packet\n"); 576 } 577 NET_EPOCH_EXIT(et); 578 } 579 580 static struct ipoib_path * 581 path_rec_create(struct ipoib_dev_priv *priv, uint8_t *hwaddr) 582 { 583 struct ipoib_path *path; 584 585 if (!priv->broadcast) 586 return NULL; 587 588 path = kzalloc(sizeof *path, GFP_ATOMIC); 589 if (!path) 590 return NULL; 591 592 path->priv = priv; 593 594 bzero(&path->queue, sizeof(path->queue)); 595 596 #ifdef CONFIG_INFINIBAND_IPOIB_CM 597 memcpy(&path->hwaddr, hwaddr, INFINIBAND_ALEN); 598 #endif 599 memcpy(path->pathrec.dgid.raw, &hwaddr[4], sizeof (union ib_gid)); 600 path->pathrec.sgid = priv->local_gid; 601 path->pathrec.pkey = cpu_to_be16(priv->pkey); 602 path->pathrec.numb_path = 1; 603 path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class; 604 605 return path; 606 } 607 608 static int 609 path_rec_start(struct ipoib_dev_priv *priv, struct ipoib_path *path) 610 { 611 struct ifnet *dev = priv->dev; 612 613 ib_sa_comp_mask comp_mask = IB_SA_PATH_REC_MTU_SELECTOR | IB_SA_PATH_REC_MTU; 614 struct ib_sa_path_rec p_rec; 615 616 p_rec = path->pathrec; 617 p_rec.mtu_selector = IB_SA_GT; 618 619 switch (roundup_pow_of_two(dev->if_mtu + IPOIB_ENCAP_LEN)) { 620 case 512: 621 p_rec.mtu = IB_MTU_256; 622 break; 623 case 1024: 624 p_rec.mtu = IB_MTU_512; 625 break; 626 case 2048: 627 p_rec.mtu = IB_MTU_1024; 628 break; 629 case 4096: 630 p_rec.mtu = IB_MTU_2048; 631 break; 632 default: 633 /* Wildcard everything */ 634 comp_mask = 0; 635 p_rec.mtu = 0; 636 p_rec.mtu_selector = 0; 637 } 638 639 ipoib_dbg(priv, "Start path record lookup for %16D MTU > %d\n", 640 p_rec.dgid.raw, ":", 641 comp_mask ? ib_mtu_enum_to_int(p_rec.mtu) : 0); 642 643 init_completion(&path->done); 644 645 path->query_id = 646 ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port, 647 &p_rec, comp_mask | 648 IB_SA_PATH_REC_DGID | 649 IB_SA_PATH_REC_SGID | 650 IB_SA_PATH_REC_NUMB_PATH | 651 IB_SA_PATH_REC_TRAFFIC_CLASS | 652 IB_SA_PATH_REC_PKEY, 653 1000, GFP_ATOMIC, 654 path_rec_completion, 655 path, &path->query); 656 if (path->query_id < 0) { 657 ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id); 658 path->query = NULL; 659 complete(&path->done); 660 return path->query_id; 661 } 662 663 return 0; 664 } 665 666 static void 667 ipoib_unicast_send(struct mbuf *mb, struct ipoib_dev_priv *priv, struct ipoib_header *eh) 668 { 669 struct ipoib_path *path; 670 671 path = __path_find(priv, eh->hwaddr + 4); 672 if (!path || !path->valid) { 673 int new_path = 0; 674 675 if (!path) { 676 path = path_rec_create(priv, eh->hwaddr); 677 new_path = 1; 678 } 679 if (path) { 680 if (_IF_QLEN(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) 681 _IF_ENQUEUE(&path->queue, mb); 682 else { 683 if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1); 684 m_freem(mb); 685 } 686 687 if (!path->query && path_rec_start(priv, path)) { 688 if (new_path) 689 ipoib_path_free(priv, path); 690 return; 691 } else 692 __path_add(priv, path); 693 } else { 694 if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1); 695 m_freem(mb); 696 } 697 698 return; 699 } 700 701 if (ipoib_cm_get(path) && ipoib_cm_up(path)) { 702 ipoib_cm_send(priv, mb, ipoib_cm_get(path)); 703 } else if (path->ah) { 704 ipoib_send(priv, mb, path->ah, IPOIB_QPN(eh->hwaddr)); 705 } else if ((path->query || !path_rec_start(priv, path)) && 706 path->queue.ifq_len < IPOIB_MAX_PATH_REC_QUEUE) { 707 _IF_ENQUEUE(&path->queue, mb); 708 } else { 709 if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1); 710 m_freem(mb); 711 } 712 } 713 714 static int 715 ipoib_send_one(struct ipoib_dev_priv *priv, struct mbuf *mb) 716 { 717 struct ipoib_header *eh; 718 719 eh = mtod(mb, struct ipoib_header *); 720 if (IPOIB_IS_MULTICAST(eh->hwaddr)) { 721 /* Add in the P_Key for multicast*/ 722 eh->hwaddr[8] = (priv->pkey >> 8) & 0xff; 723 eh->hwaddr[9] = priv->pkey & 0xff; 724 725 ipoib_mcast_send(priv, eh->hwaddr + 4, mb); 726 } else 727 ipoib_unicast_send(mb, priv, eh); 728 729 return 0; 730 } 731 732 void 733 ipoib_start_locked(struct ifnet *dev, struct ipoib_dev_priv *priv) 734 { 735 struct mbuf *mb; 736 737 assert_spin_locked(&priv->lock); 738 739 while (!IFQ_DRV_IS_EMPTY(&dev->if_snd) && 740 (dev->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 741 IFQ_DRV_DEQUEUE(&dev->if_snd, mb); 742 if (mb == NULL) 743 break; 744 INFINIBAND_BPF_MTAP(dev, mb); 745 ipoib_send_one(priv, mb); 746 } 747 } 748 749 static void 750 _ipoib_start(struct ifnet *dev, struct ipoib_dev_priv *priv) 751 { 752 753 if ((dev->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 754 IFF_DRV_RUNNING) 755 return; 756 757 spin_lock(&priv->lock); 758 ipoib_start_locked(dev, priv); 759 spin_unlock(&priv->lock); 760 } 761 762 static void 763 ipoib_start(struct ifnet *dev) 764 { 765 _ipoib_start(dev, dev->if_softc); 766 } 767 768 static void 769 ipoib_vlan_start(struct ifnet *dev) 770 { 771 struct ipoib_dev_priv *priv; 772 struct mbuf *mb; 773 774 priv = VLAN_COOKIE(dev); 775 if (priv != NULL) 776 return _ipoib_start(dev, priv); 777 while (!IFQ_DRV_IS_EMPTY(&dev->if_snd)) { 778 IFQ_DRV_DEQUEUE(&dev->if_snd, mb); 779 if (mb == NULL) 780 break; 781 m_freem(mb); 782 if_inc_counter(dev, IFCOUNTER_OERRORS, 1); 783 } 784 } 785 786 int 787 ipoib_dev_init(struct ipoib_dev_priv *priv, struct ib_device *ca, int port) 788 { 789 790 /* Allocate RX/TX "rings" to hold queued mbs */ 791 priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, 792 GFP_KERNEL); 793 if (!priv->rx_ring) { 794 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", 795 ca->name, ipoib_recvq_size); 796 goto out; 797 } 798 799 priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, GFP_KERNEL); 800 if (!priv->tx_ring) { 801 printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", 802 ca->name, ipoib_sendq_size); 803 goto out_rx_ring_cleanup; 804 } 805 memset(priv->tx_ring, 0, ipoib_sendq_size * sizeof *priv->tx_ring); 806 807 /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ 808 809 if (ipoib_ib_dev_init(priv, ca, port)) 810 goto out_tx_ring_cleanup; 811 812 return 0; 813 814 out_tx_ring_cleanup: 815 kfree(priv->tx_ring); 816 817 out_rx_ring_cleanup: 818 kfree(priv->rx_ring); 819 820 out: 821 return -ENOMEM; 822 } 823 824 static void 825 ipoib_detach(struct ipoib_dev_priv *priv) 826 { 827 struct ifnet *dev; 828 829 dev = priv->dev; 830 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 831 priv->gone = 1; 832 infiniband_ifdetach(dev); 833 if_free(dev); 834 free_unr(ipoib_unrhdr, priv->unit); 835 } else 836 VLAN_SETCOOKIE(priv->dev, NULL); 837 838 free(priv, M_TEMP); 839 } 840 841 void 842 ipoib_dev_cleanup(struct ipoib_dev_priv *priv) 843 { 844 struct ipoib_dev_priv *cpriv, *tcpriv; 845 846 /* Delete any child interfaces first */ 847 list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { 848 ipoib_dev_cleanup(cpriv); 849 ipoib_detach(cpriv); 850 } 851 852 ipoib_ib_dev_cleanup(priv); 853 854 kfree(priv->rx_ring); 855 kfree(priv->tx_ring); 856 857 priv->rx_ring = NULL; 858 priv->tx_ring = NULL; 859 } 860 861 static struct ipoib_dev_priv * 862 ipoib_priv_alloc(void) 863 { 864 struct ipoib_dev_priv *priv; 865 866 priv = malloc(sizeof(struct ipoib_dev_priv), M_TEMP, M_ZERO|M_WAITOK); 867 spin_lock_init(&priv->lock); 868 spin_lock_init(&priv->drain_lock); 869 mutex_init(&priv->vlan_mutex); 870 INIT_LIST_HEAD(&priv->path_list); 871 INIT_LIST_HEAD(&priv->child_intfs); 872 INIT_LIST_HEAD(&priv->dead_ahs); 873 INIT_LIST_HEAD(&priv->multicast_list); 874 INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); 875 INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); 876 INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task); 877 INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); 878 INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal); 879 INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); 880 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); 881 INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); 882 memcpy(priv->broadcastaddr, ipv4_bcast_addr, INFINIBAND_ALEN); 883 884 return (priv); 885 } 886 887 struct ipoib_dev_priv * 888 ipoib_intf_alloc(const char *name) 889 { 890 struct ipoib_dev_priv *priv; 891 struct ifnet *dev; 892 893 priv = ipoib_priv_alloc(); 894 dev = priv->dev = if_alloc(IFT_INFINIBAND); 895 if (!dev) { 896 free(priv, M_TEMP); 897 return NULL; 898 } 899 dev->if_softc = priv; 900 priv->unit = alloc_unr(ipoib_unrhdr); 901 if (priv->unit == -1) { 902 if_free(dev); 903 free(priv, M_TEMP); 904 return NULL; 905 } 906 if_initname(dev, name, priv->unit); 907 dev->if_flags = IFF_BROADCAST | IFF_MULTICAST; 908 909 infiniband_ifattach(dev, NULL, priv->broadcastaddr); 910 911 dev->if_init = ipoib_init; 912 dev->if_ioctl = ipoib_ioctl; 913 dev->if_start = ipoib_start; 914 915 dev->if_snd.ifq_maxlen = ipoib_sendq_size * 2; 916 917 priv->dev = dev; 918 if_link_state_change(dev, LINK_STATE_DOWN); 919 920 return dev->if_softc; 921 } 922 923 int 924 ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca) 925 { 926 struct ib_device_attr *device_attr = &hca->attrs; 927 928 priv->hca_caps = device_attr->device_cap_flags; 929 930 priv->dev->if_hwassist = 0; 931 priv->dev->if_capabilities = 0; 932 933 #ifndef CONFIG_INFINIBAND_IPOIB_CM 934 if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { 935 set_bit(IPOIB_FLAG_CSUM, &priv->flags); 936 priv->dev->if_hwassist = CSUM_IP | CSUM_TCP | CSUM_UDP; 937 priv->dev->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; 938 } 939 940 #if 0 941 if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO) { 942 priv->dev->if_capabilities |= IFCAP_TSO4; 943 priv->dev->if_hwassist |= CSUM_TSO; 944 } 945 #endif 946 #endif 947 priv->dev->if_capabilities |= 948 IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_LINKSTATE; 949 priv->dev->if_capenable = priv->dev->if_capabilities; 950 951 return 0; 952 } 953 954 955 static struct ifnet * 956 ipoib_add_port(const char *format, struct ib_device *hca, u8 port) 957 { 958 struct ipoib_dev_priv *priv; 959 struct ib_port_attr attr; 960 int result = -ENOMEM; 961 962 priv = ipoib_intf_alloc(format); 963 if (!priv) 964 goto alloc_mem_failed; 965 966 if (!ib_query_port(hca, port, &attr)) 967 priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); 968 else { 969 printk(KERN_WARNING "%s: ib_query_port %d failed\n", 970 hca->name, port); 971 goto device_init_failed; 972 } 973 974 /* MTU will be reset when mcast join happens */ 975 priv->dev->if_mtu = IPOIB_UD_MTU(priv->max_ib_mtu); 976 priv->mcast_mtu = priv->admin_mtu = priv->dev->if_mtu; 977 978 result = ib_query_pkey(hca, port, 0, &priv->pkey); 979 if (result) { 980 printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", 981 hca->name, port, result); 982 goto device_init_failed; 983 } 984 985 if (ipoib_set_dev_features(priv, hca)) 986 goto device_init_failed; 987 988 /* 989 * Set the full membership bit, so that we join the right 990 * broadcast group, etc. 991 */ 992 priv->pkey |= 0x8000; 993 994 priv->broadcastaddr[8] = priv->pkey >> 8; 995 priv->broadcastaddr[9] = priv->pkey & 0xff; 996 997 result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL); 998 if (result) { 999 printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n", 1000 hca->name, port, result); 1001 goto device_init_failed; 1002 } 1003 memcpy(IF_LLADDR(priv->dev) + 4, priv->local_gid.raw, sizeof (union ib_gid)); 1004 1005 result = ipoib_dev_init(priv, hca, port); 1006 if (result < 0) { 1007 printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n", 1008 hca->name, port, result); 1009 goto device_init_failed; 1010 } 1011 if (ipoib_cm_admin_enabled(priv)) 1012 priv->dev->if_mtu = IPOIB_CM_MTU(ipoib_cm_max_mtu(priv)); 1013 1014 INIT_IB_EVENT_HANDLER(&priv->event_handler, 1015 priv->ca, ipoib_event); 1016 result = ib_register_event_handler(&priv->event_handler); 1017 if (result < 0) { 1018 printk(KERN_WARNING "%s: ib_register_event_handler failed for " 1019 "port %d (ret = %d)\n", 1020 hca->name, port, result); 1021 goto event_failed; 1022 } 1023 if_printf(priv->dev, "Attached to %s port %d\n", hca->name, port); 1024 1025 return priv->dev; 1026 1027 event_failed: 1028 ipoib_dev_cleanup(priv); 1029 1030 device_init_failed: 1031 ipoib_detach(priv); 1032 1033 alloc_mem_failed: 1034 return ERR_PTR(result); 1035 } 1036 1037 static void 1038 ipoib_add_one(struct ib_device *device) 1039 { 1040 struct list_head *dev_list; 1041 struct ifnet *dev; 1042 struct ipoib_dev_priv *priv; 1043 int s, e, p; 1044 1045 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) 1046 return; 1047 1048 dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); 1049 if (!dev_list) 1050 return; 1051 1052 INIT_LIST_HEAD(dev_list); 1053 1054 if (device->node_type == RDMA_NODE_IB_SWITCH) { 1055 s = 0; 1056 e = 0; 1057 } else { 1058 s = 1; 1059 e = device->phys_port_cnt; 1060 } 1061 1062 for (p = s; p <= e; ++p) { 1063 if (rdma_port_get_link_layer(device, p) != IB_LINK_LAYER_INFINIBAND) 1064 continue; 1065 dev = ipoib_add_port("ib", device, p); 1066 if (!IS_ERR(dev)) { 1067 priv = dev->if_softc; 1068 list_add_tail(&priv->list, dev_list); 1069 } 1070 } 1071 1072 ib_set_client_data(device, &ipoib_client, dev_list); 1073 } 1074 1075 static void 1076 ipoib_remove_one(struct ib_device *device, void *client_data) 1077 { 1078 struct ipoib_dev_priv *priv, *tmp; 1079 struct list_head *dev_list = client_data; 1080 1081 if (!dev_list) 1082 return; 1083 1084 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) 1085 return; 1086 1087 list_for_each_entry_safe(priv, tmp, dev_list, list) { 1088 if (rdma_port_get_link_layer(device, priv->port) != IB_LINK_LAYER_INFINIBAND) 1089 continue; 1090 1091 ipoib_stop(priv); 1092 1093 ib_unregister_event_handler(&priv->event_handler); 1094 1095 /* dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); */ 1096 1097 flush_workqueue(ipoib_workqueue); 1098 1099 ipoib_dev_cleanup(priv); 1100 ipoib_detach(priv); 1101 } 1102 1103 kfree(dev_list); 1104 } 1105 1106 static int 1107 ipoib_match_dev_addr(const struct sockaddr *addr, struct net_device *dev) 1108 { 1109 struct epoch_tracker et; 1110 struct ifaddr *ifa; 1111 int retval = 0; 1112 1113 NET_EPOCH_ENTER(et); 1114 CK_STAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) { 1115 if (ifa->ifa_addr == NULL || 1116 ifa->ifa_addr->sa_family != addr->sa_family || 1117 ifa->ifa_addr->sa_len != addr->sa_len) { 1118 continue; 1119 } 1120 if (memcmp(ifa->ifa_addr, addr, addr->sa_len) == 0) { 1121 retval = 1; 1122 break; 1123 } 1124 } 1125 NET_EPOCH_EXIT(et); 1126 1127 return (retval); 1128 } 1129 1130 /* 1131 * ipoib_match_gid_pkey_addr - returns the number of IPoIB netdevs on 1132 * top a given ipoib device matching a pkey_index and address, if one 1133 * exists. 1134 * 1135 * @found_net_dev: contains a matching net_device if the return value 1136 * >= 1, with a reference held. 1137 */ 1138 static int 1139 ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv, 1140 const union ib_gid *gid, u16 pkey_index, const struct sockaddr *addr, 1141 struct net_device **found_net_dev) 1142 { 1143 struct ipoib_dev_priv *child_priv; 1144 int matches = 0; 1145 1146 if (priv->pkey_index == pkey_index && 1147 (!gid || !memcmp(gid, &priv->local_gid, sizeof(*gid)))) { 1148 if (addr == NULL || ipoib_match_dev_addr(addr, priv->dev) != 0) { 1149 if (*found_net_dev == NULL) { 1150 struct net_device *net_dev; 1151 1152 if (priv->parent != NULL) 1153 net_dev = priv->parent; 1154 else 1155 net_dev = priv->dev; 1156 *found_net_dev = net_dev; 1157 dev_hold(net_dev); 1158 } 1159 matches++; 1160 } 1161 } 1162 1163 /* Check child interfaces */ 1164 mutex_lock(&priv->vlan_mutex); 1165 list_for_each_entry(child_priv, &priv->child_intfs, list) { 1166 matches += ipoib_match_gid_pkey_addr(child_priv, gid, 1167 pkey_index, addr, found_net_dev); 1168 if (matches > 1) 1169 break; 1170 } 1171 mutex_unlock(&priv->vlan_mutex); 1172 1173 return matches; 1174 } 1175 1176 /* 1177 * __ipoib_get_net_dev_by_params - returns the number of matching 1178 * net_devs found (between 0 and 2). Also return the matching 1179 * net_device in the @net_dev parameter, holding a reference to the 1180 * net_device, if the number of matches >= 1 1181 */ 1182 static int 1183 __ipoib_get_net_dev_by_params(struct list_head *dev_list, u8 port, 1184 u16 pkey_index, const union ib_gid *gid, 1185 const struct sockaddr *addr, struct net_device **net_dev) 1186 { 1187 struct ipoib_dev_priv *priv; 1188 int matches = 0; 1189 1190 *net_dev = NULL; 1191 1192 list_for_each_entry(priv, dev_list, list) { 1193 if (priv->port != port) 1194 continue; 1195 1196 matches += ipoib_match_gid_pkey_addr(priv, gid, pkey_index, 1197 addr, net_dev); 1198 1199 if (matches > 1) 1200 break; 1201 } 1202 1203 return matches; 1204 } 1205 1206 static struct net_device * 1207 ipoib_get_net_dev_by_params(struct ib_device *dev, u8 port, u16 pkey, 1208 const union ib_gid *gid, const struct sockaddr *addr, void *client_data) 1209 { 1210 struct net_device *net_dev; 1211 struct list_head *dev_list = client_data; 1212 u16 pkey_index; 1213 int matches; 1214 int ret; 1215 1216 if (!rdma_protocol_ib(dev, port)) 1217 return NULL; 1218 1219 ret = ib_find_cached_pkey(dev, port, pkey, &pkey_index); 1220 if (ret) 1221 return NULL; 1222 1223 if (!dev_list) 1224 return NULL; 1225 1226 /* See if we can find a unique device matching the L2 parameters */ 1227 matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index, 1228 gid, NULL, &net_dev); 1229 1230 switch (matches) { 1231 case 0: 1232 return NULL; 1233 case 1: 1234 return net_dev; 1235 } 1236 1237 dev_put(net_dev); 1238 1239 /* Couldn't find a unique device with L2 parameters only. Use L3 1240 * address to uniquely match the net device */ 1241 matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index, 1242 gid, addr, &net_dev); 1243 switch (matches) { 1244 case 0: 1245 return NULL; 1246 default: 1247 dev_warn_ratelimited(&dev->dev, 1248 "duplicate IP address detected\n"); 1249 /* Fall through */ 1250 case 1: 1251 return net_dev; 1252 } 1253 } 1254 1255 static void 1256 ipoib_config_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) 1257 { 1258 struct ipoib_dev_priv *parent; 1259 struct ipoib_dev_priv *priv; 1260 struct epoch_tracker et; 1261 struct ifnet *dev; 1262 uint16_t pkey; 1263 int error; 1264 1265 if (ifp->if_type != IFT_INFINIBAND) 1266 return; 1267 NET_EPOCH_ENTER(et); 1268 dev = VLAN_DEVAT(ifp, vtag); 1269 NET_EPOCH_EXIT(et); 1270 if (dev == NULL) 1271 return; 1272 priv = NULL; 1273 error = 0; 1274 parent = ifp->if_softc; 1275 /* We only support 15 bits of pkey. */ 1276 if (vtag & 0x8000) 1277 return; 1278 pkey = vtag | 0x8000; /* Set full membership bit. */ 1279 if (pkey == parent->pkey) 1280 return; 1281 /* Check for dups */ 1282 mutex_lock(&parent->vlan_mutex); 1283 list_for_each_entry(priv, &parent->child_intfs, list) { 1284 if (priv->pkey == pkey) { 1285 priv = NULL; 1286 error = EBUSY; 1287 goto out; 1288 } 1289 } 1290 priv = ipoib_priv_alloc(); 1291 priv->dev = dev; 1292 priv->max_ib_mtu = parent->max_ib_mtu; 1293 priv->mcast_mtu = priv->admin_mtu = parent->dev->if_mtu; 1294 set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); 1295 error = ipoib_set_dev_features(priv, parent->ca); 1296 if (error) 1297 goto out; 1298 priv->pkey = pkey; 1299 priv->broadcastaddr[8] = pkey >> 8; 1300 priv->broadcastaddr[9] = pkey & 0xff; 1301 dev->if_broadcastaddr = priv->broadcastaddr; 1302 error = ipoib_dev_init(priv, parent->ca, parent->port); 1303 if (error) 1304 goto out; 1305 priv->parent = parent->dev; 1306 list_add_tail(&priv->list, &parent->child_intfs); 1307 VLAN_SETCOOKIE(dev, priv); 1308 dev->if_start = ipoib_vlan_start; 1309 dev->if_drv_flags &= ~IFF_DRV_RUNNING; 1310 dev->if_hdrlen = IPOIB_HEADER_LEN; 1311 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1312 ipoib_open(priv); 1313 mutex_unlock(&parent->vlan_mutex); 1314 return; 1315 out: 1316 mutex_unlock(&parent->vlan_mutex); 1317 if (priv) 1318 free(priv, M_TEMP); 1319 if (error) 1320 ipoib_warn(parent, 1321 "failed to initialize subinterface: device %s, port %d vtag 0x%X", 1322 parent->ca->name, parent->port, vtag); 1323 return; 1324 } 1325 1326 static void 1327 ipoib_unconfig_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag) 1328 { 1329 struct ipoib_dev_priv *parent; 1330 struct ipoib_dev_priv *priv; 1331 struct epoch_tracker et; 1332 struct ifnet *dev; 1333 uint16_t pkey; 1334 1335 if (ifp->if_type != IFT_INFINIBAND) 1336 return; 1337 1338 NET_EPOCH_ENTER(et); 1339 dev = VLAN_DEVAT(ifp, vtag); 1340 NET_EPOCH_EXIT(et); 1341 if (dev) 1342 VLAN_SETCOOKIE(dev, NULL); 1343 pkey = vtag | 0x8000; 1344 parent = ifp->if_softc; 1345 mutex_lock(&parent->vlan_mutex); 1346 list_for_each_entry(priv, &parent->child_intfs, list) { 1347 if (priv->pkey == pkey) { 1348 ipoib_dev_cleanup(priv); 1349 list_del(&priv->list); 1350 break; 1351 } 1352 } 1353 mutex_unlock(&parent->vlan_mutex); 1354 } 1355 1356 eventhandler_tag ipoib_vlan_attach; 1357 eventhandler_tag ipoib_vlan_detach; 1358 1359 static int __init 1360 ipoib_init_module(void) 1361 { 1362 int ret; 1363 1364 ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size); 1365 ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE); 1366 ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE); 1367 1368 ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); 1369 ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); 1370 ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE, 1371 IPOIB_MIN_QUEUE_SIZE)); 1372 #ifdef CONFIG_INFINIBAND_IPOIB_CM 1373 ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); 1374 #endif 1375 1376 ipoib_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 1377 ipoib_config_vlan, NULL, EVENTHANDLER_PRI_FIRST); 1378 ipoib_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 1379 ipoib_unconfig_vlan, NULL, EVENTHANDLER_PRI_FIRST); 1380 1381 /* 1382 * We create our own workqueue mainly because we want to be 1383 * able to flush it when devices are being removed. We can't 1384 * use schedule_work()/flush_scheduled_work() because both 1385 * unregister_netdev() and linkwatch_event take the rtnl lock, 1386 * so flush_scheduled_work() can deadlock during device 1387 * removal. 1388 */ 1389 ipoib_workqueue = create_singlethread_workqueue("ipoib"); 1390 if (!ipoib_workqueue) { 1391 ret = -ENOMEM; 1392 goto err_fs; 1393 } 1394 1395 ib_sa_register_client(&ipoib_sa_client); 1396 1397 ret = ib_register_client(&ipoib_client); 1398 if (ret) 1399 goto err_sa; 1400 1401 return 0; 1402 1403 err_sa: 1404 ib_sa_unregister_client(&ipoib_sa_client); 1405 destroy_workqueue(ipoib_workqueue); 1406 1407 err_fs: 1408 return ret; 1409 } 1410 1411 static void __exit 1412 ipoib_cleanup_module(void) 1413 { 1414 1415 EVENTHANDLER_DEREGISTER(vlan_config, ipoib_vlan_attach); 1416 EVENTHANDLER_DEREGISTER(vlan_unconfig, ipoib_vlan_detach); 1417 ib_unregister_client(&ipoib_client); 1418 ib_sa_unregister_client(&ipoib_sa_client); 1419 destroy_workqueue(ipoib_workqueue); 1420 } 1421 module_init_order(ipoib_init_module, SI_ORDER_FIFTH); 1422 module_exit_order(ipoib_cleanup_module, SI_ORDER_FIFTH); 1423 1424 static int 1425 ipoib_evhand(module_t mod, int event, void *arg) 1426 { 1427 return (0); 1428 } 1429 1430 static moduledata_t ipoib_mod = { 1431 .name = "ipoib", 1432 .evhand = ipoib_evhand, 1433 }; 1434 1435 DECLARE_MODULE(ipoib, ipoib_mod, SI_SUB_LAST, SI_ORDER_ANY); 1436 MODULE_DEPEND(ipoib, ibcore, 1, 1, 1); 1437 MODULE_DEPEND(ipoib, if_infiniband, 1, 1, 1); 1438 MODULE_DEPEND(ipoib, linuxkpi, 1, 1, 1); 1439