1 /* 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * Copyright (c) 2004 Voltaire, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 * 34 * $Id: ipoib_multicast.c 1362 2004-12-18 15:56:29Z roland $ 35 */ 36 37 #include <linux/skbuff.h> 38 #include <linux/rtnetlink.h> 39 #include <linux/ip.h> 40 #include <linux/in.h> 41 #include <linux/igmp.h> 42 #include <linux/inetdevice.h> 43 #include <linux/delay.h> 44 #include <linux/completion.h> 45 46 #include <net/dst.h> 47 48 #include "ipoib.h" 49 50 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 51 static int mcast_debug_level; 52 53 module_param(mcast_debug_level, int, 0644); 54 MODULE_PARM_DESC(mcast_debug_level, 55 "Enable multicast debug tracing if > 0"); 56 #endif 57 58 static DEFINE_MUTEX(mcast_mutex); 59 60 /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ 61 struct ipoib_mcast { 62 struct ib_sa_mcmember_rec mcmember; 63 struct ipoib_ah *ah; 64 65 struct rb_node rb_node; 66 struct list_head list; 67 struct completion done; 68 69 int query_id; 70 struct ib_sa_query *query; 71 72 unsigned long created; 73 unsigned long backoff; 74 75 unsigned long flags; 76 unsigned char logcount; 77 78 struct list_head neigh_list; 79 80 struct sk_buff_head pkt_queue; 81 82 struct net_device *dev; 83 }; 84 85 struct ipoib_mcast_iter { 86 struct net_device *dev; 87 union ib_gid mgid; 88 unsigned long created; 89 unsigned int queuelen; 90 unsigned int complete; 91 unsigned int send_only; 92 }; 93 94 static void ipoib_mcast_free(struct ipoib_mcast *mcast) 95 { 96 struct net_device *dev = mcast->dev; 97 struct ipoib_dev_priv *priv = netdev_priv(dev); 98 struct ipoib_neigh *neigh, *tmp; 99 unsigned long flags; 100 int tx_dropped = 0; 101 102 ipoib_dbg_mcast(netdev_priv(dev), 103 "deleting multicast group " IPOIB_GID_FMT "\n", 104 IPOIB_GID_ARG(mcast->mcmember.mgid)); 105 106 spin_lock_irqsave(&priv->lock, flags); 107 108 list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) { 109 /* 110 * It's safe to call ipoib_put_ah() inside priv->lock 111 * here, because we know that mcast->ah will always 112 * hold one more reference, so ipoib_put_ah() will 113 * never do more than decrement the ref count. 114 */ 115 if (neigh->ah) 116 ipoib_put_ah(neigh->ah); 117 *to_ipoib_neigh(neigh->neighbour) = NULL; 118 kfree(neigh); 119 } 120 121 spin_unlock_irqrestore(&priv->lock, flags); 122 123 if (mcast->ah) 124 ipoib_put_ah(mcast->ah); 125 126 while (!skb_queue_empty(&mcast->pkt_queue)) { 127 ++tx_dropped; 128 dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); 129 } 130 131 spin_lock_irqsave(&priv->tx_lock, flags); 132 priv->stats.tx_dropped += tx_dropped; 133 spin_unlock_irqrestore(&priv->tx_lock, flags); 134 135 kfree(mcast); 136 } 137 138 static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, 139 int can_sleep) 140 { 141 struct ipoib_mcast *mcast; 142 143 mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC); 144 if (!mcast) 145 return NULL; 146 147 mcast->dev = dev; 148 mcast->created = jiffies; 149 mcast->backoff = 1; 150 151 INIT_LIST_HEAD(&mcast->list); 152 INIT_LIST_HEAD(&mcast->neigh_list); 153 skb_queue_head_init(&mcast->pkt_queue); 154 155 return mcast; 156 } 157 158 static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, union ib_gid *mgid) 159 { 160 struct ipoib_dev_priv *priv = netdev_priv(dev); 161 struct rb_node *n = priv->multicast_tree.rb_node; 162 163 while (n) { 164 struct ipoib_mcast *mcast; 165 int ret; 166 167 mcast = rb_entry(n, struct ipoib_mcast, rb_node); 168 169 ret = memcmp(mgid->raw, mcast->mcmember.mgid.raw, 170 sizeof (union ib_gid)); 171 if (ret < 0) 172 n = n->rb_left; 173 else if (ret > 0) 174 n = n->rb_right; 175 else 176 return mcast; 177 } 178 179 return NULL; 180 } 181 182 static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast) 183 { 184 struct ipoib_dev_priv *priv = netdev_priv(dev); 185 struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL; 186 187 while (*n) { 188 struct ipoib_mcast *tmcast; 189 int ret; 190 191 pn = *n; 192 tmcast = rb_entry(pn, struct ipoib_mcast, rb_node); 193 194 ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw, 195 sizeof (union ib_gid)); 196 if (ret < 0) 197 n = &pn->rb_left; 198 else if (ret > 0) 199 n = &pn->rb_right; 200 else 201 return -EEXIST; 202 } 203 204 rb_link_node(&mcast->rb_node, pn, n); 205 rb_insert_color(&mcast->rb_node, &priv->multicast_tree); 206 207 return 0; 208 } 209 210 static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, 211 struct ib_sa_mcmember_rec *mcmember) 212 { 213 struct net_device *dev = mcast->dev; 214 struct ipoib_dev_priv *priv = netdev_priv(dev); 215 struct ipoib_ah *ah; 216 int ret; 217 218 mcast->mcmember = *mcmember; 219 220 /* Set the cached Q_Key before we attach if it's the broadcast group */ 221 if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4, 222 sizeof (union ib_gid))) { 223 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); 224 priv->tx_wr.wr.ud.remote_qkey = priv->qkey; 225 } 226 227 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 228 if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 229 ipoib_warn(priv, "multicast group " IPOIB_GID_FMT 230 " already attached\n", 231 IPOIB_GID_ARG(mcast->mcmember.mgid)); 232 233 return 0; 234 } 235 236 ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), 237 &mcast->mcmember.mgid); 238 if (ret < 0) { 239 ipoib_warn(priv, "couldn't attach QP to multicast group " 240 IPOIB_GID_FMT "\n", 241 IPOIB_GID_ARG(mcast->mcmember.mgid)); 242 243 clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags); 244 return ret; 245 } 246 } 247 248 { 249 struct ib_ah_attr av = { 250 .dlid = be16_to_cpu(mcast->mcmember.mlid), 251 .port_num = priv->port, 252 .sl = mcast->mcmember.sl, 253 .ah_flags = IB_AH_GRH, 254 .grh = { 255 .flow_label = be32_to_cpu(mcast->mcmember.flow_label), 256 .hop_limit = mcast->mcmember.hop_limit, 257 .sgid_index = 0, 258 .traffic_class = mcast->mcmember.traffic_class 259 } 260 }; 261 int path_rate = ib_sa_rate_enum_to_int(mcast->mcmember.rate); 262 263 av.grh.dgid = mcast->mcmember.mgid; 264 265 if (path_rate > 0 && priv->local_rate > path_rate) 266 av.static_rate = (priv->local_rate - 1) / path_rate; 267 268 ipoib_dbg_mcast(priv, "static_rate %d for local port %dX, mcmember %dX\n", 269 av.static_rate, priv->local_rate, 270 ib_sa_rate_enum_to_int(mcast->mcmember.rate)); 271 272 ah = ipoib_create_ah(dev, priv->pd, &av); 273 if (!ah) { 274 ipoib_warn(priv, "ib_address_create failed\n"); 275 } else { 276 ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT 277 " AV %p, LID 0x%04x, SL %d\n", 278 IPOIB_GID_ARG(mcast->mcmember.mgid), 279 mcast->ah->ah, 280 be16_to_cpu(mcast->mcmember.mlid), 281 mcast->mcmember.sl); 282 } 283 284 spin_lock_irq(&priv->lock); 285 mcast->ah = ah; 286 spin_unlock_irq(&priv->lock); 287 } 288 289 /* actually send any queued packets */ 290 spin_lock_irq(&priv->tx_lock); 291 while (!skb_queue_empty(&mcast->pkt_queue)) { 292 struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); 293 spin_unlock_irq(&priv->tx_lock); 294 295 skb->dev = dev; 296 297 if (!skb->dst || !skb->dst->neighbour) { 298 /* put pseudoheader back on for next time */ 299 skb_push(skb, sizeof (struct ipoib_pseudoheader)); 300 } 301 302 if (dev_queue_xmit(skb)) 303 ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); 304 spin_lock_irq(&priv->tx_lock); 305 } 306 spin_unlock_irq(&priv->tx_lock); 307 308 return 0; 309 } 310 311 static void 312 ipoib_mcast_sendonly_join_complete(int status, 313 struct ib_sa_mcmember_rec *mcmember, 314 void *mcast_ptr) 315 { 316 struct ipoib_mcast *mcast = mcast_ptr; 317 struct net_device *dev = mcast->dev; 318 struct ipoib_dev_priv *priv = netdev_priv(dev); 319 320 if (!status) 321 ipoib_mcast_join_finish(mcast, mcmember); 322 else { 323 if (mcast->logcount++ < 20) 324 ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for " 325 IPOIB_GID_FMT ", status %d\n", 326 IPOIB_GID_ARG(mcast->mcmember.mgid), status); 327 328 /* Flush out any queued packets */ 329 spin_lock_irq(&priv->tx_lock); 330 while (!skb_queue_empty(&mcast->pkt_queue)) { 331 ++priv->stats.tx_dropped; 332 dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); 333 } 334 spin_unlock_irq(&priv->tx_lock); 335 336 /* Clear the busy flag so we try again */ 337 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 338 } 339 340 complete(&mcast->done); 341 } 342 343 static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) 344 { 345 struct net_device *dev = mcast->dev; 346 struct ipoib_dev_priv *priv = netdev_priv(dev); 347 struct ib_sa_mcmember_rec rec = { 348 #if 0 /* Some SMs don't support send-only yet */ 349 .join_state = 4 350 #else 351 .join_state = 1 352 #endif 353 }; 354 int ret = 0; 355 356 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { 357 ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n"); 358 return -ENODEV; 359 } 360 361 if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { 362 ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n"); 363 return -EBUSY; 364 } 365 366 rec.mgid = mcast->mcmember.mgid; 367 rec.port_gid = priv->local_gid; 368 rec.pkey = cpu_to_be16(priv->pkey); 369 370 init_completion(&mcast->done); 371 372 ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, 373 IB_SA_MCMEMBER_REC_MGID | 374 IB_SA_MCMEMBER_REC_PORT_GID | 375 IB_SA_MCMEMBER_REC_PKEY | 376 IB_SA_MCMEMBER_REC_JOIN_STATE, 377 1000, GFP_ATOMIC, 378 ipoib_mcast_sendonly_join_complete, 379 mcast, &mcast->query); 380 if (ret < 0) { 381 ipoib_warn(priv, "ib_sa_mcmember_rec_set failed (ret = %d)\n", 382 ret); 383 } else { 384 ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT 385 ", starting join\n", 386 IPOIB_GID_ARG(mcast->mcmember.mgid)); 387 388 mcast->query_id = ret; 389 } 390 391 return ret; 392 } 393 394 static void ipoib_mcast_join_complete(int status, 395 struct ib_sa_mcmember_rec *mcmember, 396 void *mcast_ptr) 397 { 398 struct ipoib_mcast *mcast = mcast_ptr; 399 struct net_device *dev = mcast->dev; 400 struct ipoib_dev_priv *priv = netdev_priv(dev); 401 402 ipoib_dbg_mcast(priv, "join completion for " IPOIB_GID_FMT 403 " (status %d)\n", 404 IPOIB_GID_ARG(mcast->mcmember.mgid), status); 405 406 if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) { 407 mcast->backoff = 1; 408 mutex_lock(&mcast_mutex); 409 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 410 queue_work(ipoib_workqueue, &priv->mcast_task); 411 mutex_unlock(&mcast_mutex); 412 complete(&mcast->done); 413 return; 414 } 415 416 if (status == -EINTR) { 417 complete(&mcast->done); 418 return; 419 } 420 421 if (status && mcast->logcount++ < 20) { 422 if (status == -ETIMEDOUT || status == -EINTR) { 423 ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT 424 ", status %d\n", 425 IPOIB_GID_ARG(mcast->mcmember.mgid), 426 status); 427 } else { 428 ipoib_warn(priv, "multicast join failed for " 429 IPOIB_GID_FMT ", status %d\n", 430 IPOIB_GID_ARG(mcast->mcmember.mgid), 431 status); 432 } 433 } 434 435 mcast->backoff *= 2; 436 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 437 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; 438 439 mutex_lock(&mcast_mutex); 440 441 spin_lock_irq(&priv->lock); 442 mcast->query = NULL; 443 444 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) { 445 if (status == -ETIMEDOUT) 446 queue_work(ipoib_workqueue, &priv->mcast_task); 447 else 448 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 449 mcast->backoff * HZ); 450 } else 451 complete(&mcast->done); 452 spin_unlock_irq(&priv->lock); 453 mutex_unlock(&mcast_mutex); 454 455 return; 456 } 457 458 static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, 459 int create) 460 { 461 struct ipoib_dev_priv *priv = netdev_priv(dev); 462 struct ib_sa_mcmember_rec rec = { 463 .join_state = 1 464 }; 465 ib_sa_comp_mask comp_mask; 466 int ret = 0; 467 468 ipoib_dbg_mcast(priv, "joining MGID " IPOIB_GID_FMT "\n", 469 IPOIB_GID_ARG(mcast->mcmember.mgid)); 470 471 rec.mgid = mcast->mcmember.mgid; 472 rec.port_gid = priv->local_gid; 473 rec.pkey = cpu_to_be16(priv->pkey); 474 475 comp_mask = 476 IB_SA_MCMEMBER_REC_MGID | 477 IB_SA_MCMEMBER_REC_PORT_GID | 478 IB_SA_MCMEMBER_REC_PKEY | 479 IB_SA_MCMEMBER_REC_JOIN_STATE; 480 481 if (create) { 482 comp_mask |= 483 IB_SA_MCMEMBER_REC_QKEY | 484 IB_SA_MCMEMBER_REC_SL | 485 IB_SA_MCMEMBER_REC_FLOW_LABEL | 486 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; 487 488 rec.qkey = priv->broadcast->mcmember.qkey; 489 rec.sl = priv->broadcast->mcmember.sl; 490 rec.flow_label = priv->broadcast->mcmember.flow_label; 491 rec.traffic_class = priv->broadcast->mcmember.traffic_class; 492 } 493 494 init_completion(&mcast->done); 495 496 ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask, 497 mcast->backoff * 1000, GFP_ATOMIC, 498 ipoib_mcast_join_complete, 499 mcast, &mcast->query); 500 501 if (ret < 0) { 502 ipoib_warn(priv, "ib_sa_mcmember_rec_set failed, status %d\n", ret); 503 504 mcast->backoff *= 2; 505 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 506 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; 507 508 mutex_lock(&mcast_mutex); 509 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 510 queue_delayed_work(ipoib_workqueue, 511 &priv->mcast_task, 512 mcast->backoff * HZ); 513 mutex_unlock(&mcast_mutex); 514 } else 515 mcast->query_id = ret; 516 } 517 518 void ipoib_mcast_join_task(void *dev_ptr) 519 { 520 struct net_device *dev = dev_ptr; 521 struct ipoib_dev_priv *priv = netdev_priv(dev); 522 523 if (!test_bit(IPOIB_MCAST_RUN, &priv->flags)) 524 return; 525 526 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) 527 ipoib_warn(priv, "ib_gid_entry_get() failed\n"); 528 else 529 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); 530 531 { 532 struct ib_port_attr attr; 533 534 if (!ib_query_port(priv->ca, priv->port, &attr)) { 535 priv->local_lid = attr.lid; 536 priv->local_rate = attr.active_speed * 537 ib_width_enum_to_int(attr.active_width); 538 } else 539 ipoib_warn(priv, "ib_query_port failed\n"); 540 } 541 542 if (!priv->broadcast) { 543 struct ipoib_mcast *broadcast; 544 545 broadcast = ipoib_mcast_alloc(dev, 1); 546 if (!broadcast) { 547 ipoib_warn(priv, "failed to allocate broadcast group\n"); 548 mutex_lock(&mcast_mutex); 549 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 550 queue_delayed_work(ipoib_workqueue, 551 &priv->mcast_task, HZ); 552 mutex_unlock(&mcast_mutex); 553 return; 554 } 555 556 spin_lock_irq(&priv->lock); 557 memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4, 558 sizeof (union ib_gid)); 559 priv->broadcast = broadcast; 560 561 __ipoib_mcast_add(dev, priv->broadcast); 562 spin_unlock_irq(&priv->lock); 563 } 564 565 if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { 566 ipoib_mcast_join(dev, priv->broadcast, 0); 567 return; 568 } 569 570 while (1) { 571 struct ipoib_mcast *mcast = NULL; 572 573 spin_lock_irq(&priv->lock); 574 list_for_each_entry(mcast, &priv->multicast_list, list) { 575 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) 576 && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) 577 && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 578 /* Found the next unjoined group */ 579 break; 580 } 581 } 582 spin_unlock_irq(&priv->lock); 583 584 if (&mcast->list == &priv->multicast_list) { 585 /* All done */ 586 break; 587 } 588 589 ipoib_mcast_join(dev, mcast, 1); 590 return; 591 } 592 593 priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - 594 IPOIB_ENCAP_LEN; 595 dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); 596 597 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); 598 599 clear_bit(IPOIB_MCAST_RUN, &priv->flags); 600 netif_carrier_on(dev); 601 } 602 603 int ipoib_mcast_start_thread(struct net_device *dev) 604 { 605 struct ipoib_dev_priv *priv = netdev_priv(dev); 606 607 ipoib_dbg_mcast(priv, "starting multicast thread\n"); 608 609 mutex_lock(&mcast_mutex); 610 if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags)) 611 queue_work(ipoib_workqueue, &priv->mcast_task); 612 mutex_unlock(&mcast_mutex); 613 614 spin_lock_irq(&priv->lock); 615 set_bit(IPOIB_MCAST_STARTED, &priv->flags); 616 spin_unlock_irq(&priv->lock); 617 618 return 0; 619 } 620 621 int ipoib_mcast_stop_thread(struct net_device *dev, int flush) 622 { 623 struct ipoib_dev_priv *priv = netdev_priv(dev); 624 struct ipoib_mcast *mcast; 625 626 ipoib_dbg_mcast(priv, "stopping multicast thread\n"); 627 628 spin_lock_irq(&priv->lock); 629 clear_bit(IPOIB_MCAST_STARTED, &priv->flags); 630 spin_unlock_irq(&priv->lock); 631 632 mutex_lock(&mcast_mutex); 633 clear_bit(IPOIB_MCAST_RUN, &priv->flags); 634 cancel_delayed_work(&priv->mcast_task); 635 mutex_unlock(&mcast_mutex); 636 637 if (flush) 638 flush_workqueue(ipoib_workqueue); 639 640 spin_lock_irq(&priv->lock); 641 if (priv->broadcast && priv->broadcast->query) { 642 ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query); 643 priv->broadcast->query = NULL; 644 spin_unlock_irq(&priv->lock); 645 ipoib_dbg_mcast(priv, "waiting for bcast\n"); 646 wait_for_completion(&priv->broadcast->done); 647 } else 648 spin_unlock_irq(&priv->lock); 649 650 list_for_each_entry(mcast, &priv->multicast_list, list) { 651 spin_lock_irq(&priv->lock); 652 if (mcast->query) { 653 ib_sa_cancel_query(mcast->query_id, mcast->query); 654 mcast->query = NULL; 655 spin_unlock_irq(&priv->lock); 656 ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n", 657 IPOIB_GID_ARG(mcast->mcmember.mgid)); 658 wait_for_completion(&mcast->done); 659 } else 660 spin_unlock_irq(&priv->lock); 661 } 662 663 return 0; 664 } 665 666 static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) 667 { 668 struct ipoib_dev_priv *priv = netdev_priv(dev); 669 struct ib_sa_mcmember_rec rec = { 670 .join_state = 1 671 }; 672 int ret = 0; 673 674 if (!test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) 675 return 0; 676 677 ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n", 678 IPOIB_GID_ARG(mcast->mcmember.mgid)); 679 680 rec.mgid = mcast->mcmember.mgid; 681 rec.port_gid = priv->local_gid; 682 rec.pkey = cpu_to_be16(priv->pkey); 683 684 /* Remove ourselves from the multicast group */ 685 ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), 686 &mcast->mcmember.mgid); 687 if (ret) 688 ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret); 689 690 /* 691 * Just make one shot at leaving and don't wait for a reply; 692 * if we fail, too bad. 693 */ 694 ret = ib_sa_mcmember_rec_delete(priv->ca, priv->port, &rec, 695 IB_SA_MCMEMBER_REC_MGID | 696 IB_SA_MCMEMBER_REC_PORT_GID | 697 IB_SA_MCMEMBER_REC_PKEY | 698 IB_SA_MCMEMBER_REC_JOIN_STATE, 699 0, GFP_ATOMIC, NULL, 700 mcast, &mcast->query); 701 if (ret < 0) 702 ipoib_warn(priv, "ib_sa_mcmember_rec_delete failed " 703 "for leave (result = %d)\n", ret); 704 705 return 0; 706 } 707 708 void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid, 709 struct sk_buff *skb) 710 { 711 struct ipoib_dev_priv *priv = netdev_priv(dev); 712 struct ipoib_mcast *mcast; 713 714 /* 715 * We can only be called from ipoib_start_xmit, so we're 716 * inside tx_lock -- no need to save/restore flags. 717 */ 718 spin_lock(&priv->lock); 719 720 if (!test_bit(IPOIB_MCAST_STARTED, &priv->flags) || 721 !priv->broadcast || 722 !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { 723 ++priv->stats.tx_dropped; 724 dev_kfree_skb_any(skb); 725 goto unlock; 726 } 727 728 mcast = __ipoib_mcast_find(dev, mgid); 729 if (!mcast) { 730 /* Let's create a new send only group now */ 731 ipoib_dbg_mcast(priv, "setting up send only multicast group for " 732 IPOIB_GID_FMT "\n", IPOIB_GID_ARG(*mgid)); 733 734 mcast = ipoib_mcast_alloc(dev, 0); 735 if (!mcast) { 736 ipoib_warn(priv, "unable to allocate memory for " 737 "multicast structure\n"); 738 ++priv->stats.tx_dropped; 739 dev_kfree_skb_any(skb); 740 goto out; 741 } 742 743 set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); 744 mcast->mcmember.mgid = *mgid; 745 __ipoib_mcast_add(dev, mcast); 746 list_add_tail(&mcast->list, &priv->multicast_list); 747 } 748 749 if (!mcast->ah) { 750 if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) 751 skb_queue_tail(&mcast->pkt_queue, skb); 752 else { 753 ++priv->stats.tx_dropped; 754 dev_kfree_skb_any(skb); 755 } 756 757 if (mcast->query) 758 ipoib_dbg_mcast(priv, "no address vector, " 759 "but multicast join already started\n"); 760 else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) 761 ipoib_mcast_sendonly_join(mcast); 762 763 /* 764 * If lookup completes between here and out:, don't 765 * want to send packet twice. 766 */ 767 mcast = NULL; 768 } 769 770 out: 771 if (mcast && mcast->ah) { 772 if (skb->dst && 773 skb->dst->neighbour && 774 !*to_ipoib_neigh(skb->dst->neighbour)) { 775 struct ipoib_neigh *neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); 776 777 if (neigh) { 778 kref_get(&mcast->ah->ref); 779 neigh->ah = mcast->ah; 780 neigh->neighbour = skb->dst->neighbour; 781 *to_ipoib_neigh(skb->dst->neighbour) = neigh; 782 list_add_tail(&neigh->list, &mcast->neigh_list); 783 } 784 } 785 786 ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); 787 } 788 789 unlock: 790 spin_unlock(&priv->lock); 791 } 792 793 void ipoib_mcast_dev_flush(struct net_device *dev) 794 { 795 struct ipoib_dev_priv *priv = netdev_priv(dev); 796 LIST_HEAD(remove_list); 797 struct ipoib_mcast *mcast, *tmcast; 798 unsigned long flags; 799 800 ipoib_dbg_mcast(priv, "flushing multicast list\n"); 801 802 spin_lock_irqsave(&priv->lock, flags); 803 804 list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { 805 list_del(&mcast->list); 806 rb_erase(&mcast->rb_node, &priv->multicast_tree); 807 list_add_tail(&mcast->list, &remove_list); 808 } 809 810 if (priv->broadcast) { 811 rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree); 812 list_add_tail(&priv->broadcast->list, &remove_list); 813 priv->broadcast = NULL; 814 } 815 816 spin_unlock_irqrestore(&priv->lock, flags); 817 818 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 819 ipoib_mcast_leave(dev, mcast); 820 ipoib_mcast_free(mcast); 821 } 822 } 823 824 void ipoib_mcast_restart_task(void *dev_ptr) 825 { 826 struct net_device *dev = dev_ptr; 827 struct ipoib_dev_priv *priv = netdev_priv(dev); 828 struct dev_mc_list *mclist; 829 struct ipoib_mcast *mcast, *tmcast; 830 LIST_HEAD(remove_list); 831 unsigned long flags; 832 833 ipoib_dbg_mcast(priv, "restarting multicast task\n"); 834 835 ipoib_mcast_stop_thread(dev, 0); 836 837 spin_lock_irqsave(&dev->xmit_lock, flags); 838 spin_lock(&priv->lock); 839 840 /* 841 * Unfortunately, the networking core only gives us a list of all of 842 * the multicast hardware addresses. We need to figure out which ones 843 * are new and which ones have been removed 844 */ 845 846 /* Clear out the found flag */ 847 list_for_each_entry(mcast, &priv->multicast_list, list) 848 clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); 849 850 /* Mark all of the entries that are found or don't exist */ 851 for (mclist = dev->mc_list; mclist; mclist = mclist->next) { 852 union ib_gid mgid; 853 854 memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid); 855 856 /* Add in the P_Key */ 857 mgid.raw[4] = (priv->pkey >> 8) & 0xff; 858 mgid.raw[5] = priv->pkey & 0xff; 859 860 mcast = __ipoib_mcast_find(dev, &mgid); 861 if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 862 struct ipoib_mcast *nmcast; 863 864 /* Not found or send-only group, let's add a new entry */ 865 ipoib_dbg_mcast(priv, "adding multicast entry for mgid " 866 IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid)); 867 868 nmcast = ipoib_mcast_alloc(dev, 0); 869 if (!nmcast) { 870 ipoib_warn(priv, "unable to allocate memory for multicast structure\n"); 871 continue; 872 } 873 874 set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags); 875 876 nmcast->mcmember.mgid = mgid; 877 878 if (mcast) { 879 /* Destroy the send only entry */ 880 list_del(&mcast->list); 881 list_add_tail(&mcast->list, &remove_list); 882 883 rb_replace_node(&mcast->rb_node, 884 &nmcast->rb_node, 885 &priv->multicast_tree); 886 } else 887 __ipoib_mcast_add(dev, nmcast); 888 889 list_add_tail(&nmcast->list, &priv->multicast_list); 890 } 891 892 if (mcast) 893 set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); 894 } 895 896 /* Remove all of the entries don't exist anymore */ 897 list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { 898 if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) && 899 !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 900 ipoib_dbg_mcast(priv, "deleting multicast group " IPOIB_GID_FMT "\n", 901 IPOIB_GID_ARG(mcast->mcmember.mgid)); 902 903 rb_erase(&mcast->rb_node, &priv->multicast_tree); 904 905 /* Move to the remove list */ 906 list_del(&mcast->list); 907 list_add_tail(&mcast->list, &remove_list); 908 } 909 } 910 911 spin_unlock(&priv->lock); 912 spin_unlock_irqrestore(&dev->xmit_lock, flags); 913 914 /* We have to cancel outside of the spinlock */ 915 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 916 ipoib_mcast_leave(mcast->dev, mcast); 917 ipoib_mcast_free(mcast); 918 } 919 920 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 921 ipoib_mcast_start_thread(dev); 922 } 923 924 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 925 926 struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev) 927 { 928 struct ipoib_mcast_iter *iter; 929 930 iter = kmalloc(sizeof *iter, GFP_KERNEL); 931 if (!iter) 932 return NULL; 933 934 iter->dev = dev; 935 memset(iter->mgid.raw, 0, 16); 936 937 if (ipoib_mcast_iter_next(iter)) { 938 kfree(iter); 939 return NULL; 940 } 941 942 return iter; 943 } 944 945 int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter) 946 { 947 struct ipoib_dev_priv *priv = netdev_priv(iter->dev); 948 struct rb_node *n; 949 struct ipoib_mcast *mcast; 950 int ret = 1; 951 952 spin_lock_irq(&priv->lock); 953 954 n = rb_first(&priv->multicast_tree); 955 956 while (n) { 957 mcast = rb_entry(n, struct ipoib_mcast, rb_node); 958 959 if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw, 960 sizeof (union ib_gid)) < 0) { 961 iter->mgid = mcast->mcmember.mgid; 962 iter->created = mcast->created; 963 iter->queuelen = skb_queue_len(&mcast->pkt_queue); 964 iter->complete = !!mcast->ah; 965 iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY)); 966 967 ret = 0; 968 969 break; 970 } 971 972 n = rb_next(n); 973 } 974 975 spin_unlock_irq(&priv->lock); 976 977 return ret; 978 } 979 980 void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, 981 union ib_gid *mgid, 982 unsigned long *created, 983 unsigned int *queuelen, 984 unsigned int *complete, 985 unsigned int *send_only) 986 { 987 *mgid = iter->mgid; 988 *created = iter->created; 989 *queuelen = iter->queuelen; 990 *complete = iter->complete; 991 *send_only = iter->send_only; 992 } 993 994 #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ 995