1 /* 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * Copyright (c) 2004 Voltaire, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35 #include "ipoib.h" 36 37 #include <linux/delay.h> 38 #include <linux/completion.h> 39 40 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 41 static int mcast_debug_level = 1; 42 43 module_param(mcast_debug_level, int, 0644); 44 MODULE_PARM_DESC(mcast_debug_level, 45 "Enable multicast debug tracing if > 0"); 46 #endif 47 48 static DEFINE_MUTEX(mcast_mutex); 49 50 struct ipoib_mcast_iter { 51 struct ipoib_dev_priv *priv; 52 union ib_gid mgid; 53 unsigned long created; 54 unsigned int queuelen; 55 unsigned int complete; 56 unsigned int send_only; 57 }; 58 59 static void ipoib_mcast_free(struct ipoib_mcast *mcast) 60 { 61 struct ifnet *dev = mcast->priv->dev; 62 int tx_dropped = 0; 63 64 ipoib_dbg_mcast(mcast->priv, "deleting multicast group %16D\n", 65 mcast->mcmember.mgid.raw, ":"); 66 67 if (mcast->ah) 68 ipoib_put_ah(mcast->ah); 69 70 tx_dropped = mcast->pkt_queue.ifq_len; 71 _IF_DRAIN(&mcast->pkt_queue); /* XXX Locking. */ 72 73 dev->if_oerrors += tx_dropped; 74 75 kfree(mcast); 76 } 77 78 static struct ipoib_mcast *ipoib_mcast_alloc(struct ipoib_dev_priv *priv, 79 int can_sleep) 80 { 81 struct ipoib_mcast *mcast; 82 83 mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC); 84 if (!mcast) 85 return NULL; 86 87 mcast->priv = priv; 88 mcast->created = jiffies; 89 mcast->backoff = 1; 90 91 INIT_LIST_HEAD(&mcast->list); 92 bzero(&mcast->pkt_queue, sizeof(mcast->pkt_queue)); 93 94 return mcast; 95 } 96 97 static struct ipoib_mcast *__ipoib_mcast_find(struct ipoib_dev_priv *priv, 98 void *mgid) 99 { 100 struct rb_node *n = priv->multicast_tree.rb_node; 101 102 while (n) { 103 struct ipoib_mcast *mcast; 104 int ret; 105 106 mcast = rb_entry(n, struct ipoib_mcast, rb_node); 107 108 ret = memcmp(mgid, mcast->mcmember.mgid.raw, 109 sizeof (union ib_gid)); 110 if (ret < 0) 111 n = n->rb_left; 112 else if (ret > 0) 113 n = n->rb_right; 114 else 115 return mcast; 116 } 117 118 return NULL; 119 } 120 121 static int __ipoib_mcast_add(struct ipoib_dev_priv *priv, 122 struct ipoib_mcast *mcast) 123 { 124 struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL; 125 126 while (*n) { 127 struct ipoib_mcast *tmcast; 128 int ret; 129 130 pn = *n; 131 tmcast = rb_entry(pn, struct ipoib_mcast, rb_node); 132 133 ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw, 134 sizeof (union ib_gid)); 135 if (ret < 0) 136 n = &pn->rb_left; 137 else if (ret > 0) 138 n = &pn->rb_right; 139 else 140 return -EEXIST; 141 } 142 143 rb_link_node(&mcast->rb_node, pn, n); 144 rb_insert_color(&mcast->rb_node, &priv->multicast_tree); 145 146 return 0; 147 } 148 149 static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, 150 struct ib_sa_mcmember_rec *mcmember) 151 { 152 struct ipoib_dev_priv *priv = mcast->priv; 153 struct ifnet *dev = priv->dev; 154 struct ipoib_ah *ah; 155 int ret; 156 int set_qkey = 0; 157 158 mcast->mcmember = *mcmember; 159 160 /* Set the cached Q_Key before we attach if it's the broadcast group */ 161 if (!memcmp(mcast->mcmember.mgid.raw, dev->if_broadcastaddr + 4, 162 sizeof (union ib_gid))) { 163 spin_lock_irq(&priv->lock); 164 if (!priv->broadcast) { 165 spin_unlock_irq(&priv->lock); 166 return -EAGAIN; 167 } 168 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); 169 spin_unlock_irq(&priv->lock); 170 priv->tx_wr.wr.ud.remote_qkey = priv->qkey; 171 set_qkey = 1; 172 } 173 174 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 175 if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 176 ipoib_warn(priv, "multicast group %16D already attached\n", 177 mcast->mcmember.mgid.raw, ":"); 178 179 return 0; 180 } 181 182 ret = ipoib_mcast_attach(priv, be16_to_cpu(mcast->mcmember.mlid), 183 &mcast->mcmember.mgid, set_qkey); 184 if (ret < 0) { 185 ipoib_warn(priv, "couldn't attach QP to multicast group %16D\n", 186 mcast->mcmember.mgid.raw, ":"); 187 188 clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags); 189 return ret; 190 } 191 } 192 193 { 194 struct ib_ah_attr av = { 195 .dlid = be16_to_cpu(mcast->mcmember.mlid), 196 .port_num = priv->port, 197 .sl = mcast->mcmember.sl, 198 .ah_flags = IB_AH_GRH, 199 .static_rate = mcast->mcmember.rate, 200 .grh = { 201 .flow_label = be32_to_cpu(mcast->mcmember.flow_label), 202 .hop_limit = mcast->mcmember.hop_limit, 203 .sgid_index = 0, 204 .traffic_class = mcast->mcmember.traffic_class 205 } 206 }; 207 av.grh.dgid = mcast->mcmember.mgid; 208 209 ah = ipoib_create_ah(priv, priv->pd, &av); 210 if (!ah) { 211 ipoib_warn(priv, "ib_address_create failed\n"); 212 } else { 213 spin_lock_irq(&priv->lock); 214 mcast->ah = ah; 215 spin_unlock_irq(&priv->lock); 216 217 ipoib_dbg_mcast(priv, "MGID %16D AV %p, LID 0x%04x, SL %d\n", 218 mcast->mcmember.mgid.raw, ":", 219 mcast->ah->ah, 220 be16_to_cpu(mcast->mcmember.mlid), 221 mcast->mcmember.sl); 222 } 223 } 224 225 /* actually send any queued packets */ 226 while (mcast->pkt_queue.ifq_len) { 227 struct mbuf *mb; 228 _IF_DEQUEUE(&mcast->pkt_queue, mb); 229 mb->m_pkthdr.rcvif = dev; 230 231 if (dev->if_transmit(dev, mb)) 232 ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); 233 } 234 235 return 0; 236 } 237 238 static int 239 ipoib_mcast_sendonly_join_complete(int status, 240 struct ib_sa_multicast *multicast) 241 { 242 struct ipoib_mcast *mcast = multicast->context; 243 struct ipoib_dev_priv *priv = mcast->priv; 244 245 /* We trap for port events ourselves. */ 246 if (status == -ENETRESET) 247 return 0; 248 249 if (!status) 250 status = ipoib_mcast_join_finish(mcast, &multicast->rec); 251 252 if (status) { 253 if (mcast->logcount++ < 20) 254 ipoib_dbg_mcast(priv, "multicast join failed for %16D, status %d\n", 255 mcast->mcmember.mgid.raw, ":", status); 256 257 /* Flush out any queued packets */ 258 priv->dev->if_oerrors += mcast->pkt_queue.ifq_len; 259 _IF_DRAIN(&mcast->pkt_queue); 260 261 /* Clear the busy flag so we try again */ 262 status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, 263 &mcast->flags); 264 } 265 return status; 266 } 267 268 static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) 269 { 270 struct ipoib_dev_priv *priv = mcast->priv; 271 struct ib_sa_mcmember_rec rec = { 272 #if 0 /* Some SMs don't support send-only yet */ 273 .join_state = 4 274 #else 275 .join_state = 1 276 #endif 277 }; 278 int ret = 0; 279 280 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { 281 ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n"); 282 return -ENODEV; 283 } 284 285 if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { 286 ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n"); 287 return -EBUSY; 288 } 289 290 rec.mgid = mcast->mcmember.mgid; 291 rec.port_gid = priv->local_gid; 292 rec.pkey = cpu_to_be16(priv->pkey); 293 294 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, 295 priv->port, &rec, 296 IB_SA_MCMEMBER_REC_MGID | 297 IB_SA_MCMEMBER_REC_PORT_GID | 298 IB_SA_MCMEMBER_REC_PKEY | 299 IB_SA_MCMEMBER_REC_JOIN_STATE, 300 GFP_ATOMIC, 301 ipoib_mcast_sendonly_join_complete, 302 mcast); 303 if (IS_ERR(mcast->mc)) { 304 ret = PTR_ERR(mcast->mc); 305 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 306 ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n", 307 ret); 308 } else { 309 ipoib_dbg_mcast(priv, "no multicast record for %16D, starting join\n", 310 mcast->mcmember.mgid.raw, ":"); 311 } 312 313 return ret; 314 } 315 316 void ipoib_mcast_carrier_on_task(struct work_struct *work) 317 { 318 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 319 carrier_on_task); 320 struct ib_port_attr attr; 321 322 /* 323 * Take rtnl_lock to avoid racing with ipoib_stop() and 324 * turning the carrier back on while a device is being 325 * removed. 326 */ 327 if (ib_query_port(priv->ca, priv->port, &attr) || 328 attr.state != IB_PORT_ACTIVE) { 329 ipoib_dbg(priv, "Keeping carrier off until IB port is active\n"); 330 return; 331 } 332 if_link_state_change(priv->dev, LINK_STATE_UP); 333 } 334 335 static int ipoib_mcast_join_complete(int status, 336 struct ib_sa_multicast *multicast) 337 { 338 struct ipoib_mcast *mcast = multicast->context; 339 struct ipoib_dev_priv *priv = mcast->priv; 340 341 ipoib_dbg_mcast(priv, "join completion for %16D (status %d)\n", 342 mcast->mcmember.mgid.raw, ":", status); 343 344 /* We trap for port events ourselves. */ 345 if (status == -ENETRESET) 346 return 0; 347 348 if (!status) 349 status = ipoib_mcast_join_finish(mcast, &multicast->rec); 350 351 if (!status) { 352 mcast->backoff = 1; 353 mutex_lock(&mcast_mutex); 354 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 355 queue_delayed_work(ipoib_workqueue, 356 &priv->mcast_task, 0); 357 mutex_unlock(&mcast_mutex); 358 359 /* 360 * Defer carrier on work to ipoib_workqueue to avoid a 361 * deadlock on rtnl_lock here. 362 */ 363 if (mcast == priv->broadcast) 364 queue_work(ipoib_workqueue, &priv->carrier_on_task); 365 366 return 0; 367 } 368 369 if (mcast->logcount++ < 20) { 370 if (status == -ETIMEDOUT || status == -EAGAIN) { 371 ipoib_dbg_mcast(priv, "multicast join failed for %16D, status %d\n", 372 mcast->mcmember.mgid.raw, ":", status); 373 } else { 374 ipoib_warn(priv, "multicast join failed for %16D, status %d\n", 375 mcast->mcmember.mgid.raw, ":", status); 376 } 377 } 378 379 mcast->backoff *= 2; 380 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 381 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; 382 383 /* Clear the busy flag so we try again */ 384 status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 385 386 mutex_lock(&mcast_mutex); 387 spin_lock_irq(&priv->lock); 388 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 389 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 390 mcast->backoff * HZ); 391 spin_unlock_irq(&priv->lock); 392 mutex_unlock(&mcast_mutex); 393 394 return status; 395 } 396 397 static void ipoib_mcast_join(struct ipoib_dev_priv *priv, 398 struct ipoib_mcast *mcast, int create) 399 { 400 struct ib_sa_mcmember_rec rec = { 401 .join_state = 1 402 }; 403 ib_sa_comp_mask comp_mask; 404 int ret = 0; 405 406 ipoib_dbg_mcast(priv, "joining MGID %16D\n", 407 mcast->mcmember.mgid.raw, ":"); 408 409 rec.mgid = mcast->mcmember.mgid; 410 rec.port_gid = priv->local_gid; 411 rec.pkey = cpu_to_be16(priv->pkey); 412 413 comp_mask = 414 IB_SA_MCMEMBER_REC_MGID | 415 IB_SA_MCMEMBER_REC_PORT_GID | 416 IB_SA_MCMEMBER_REC_PKEY | 417 IB_SA_MCMEMBER_REC_JOIN_STATE; 418 419 if (create) { 420 comp_mask |= 421 IB_SA_MCMEMBER_REC_QKEY | 422 IB_SA_MCMEMBER_REC_MTU_SELECTOR | 423 IB_SA_MCMEMBER_REC_MTU | 424 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS | 425 IB_SA_MCMEMBER_REC_RATE_SELECTOR | 426 IB_SA_MCMEMBER_REC_RATE | 427 IB_SA_MCMEMBER_REC_SL | 428 IB_SA_MCMEMBER_REC_FLOW_LABEL | 429 IB_SA_MCMEMBER_REC_HOP_LIMIT; 430 431 rec.qkey = priv->broadcast->mcmember.qkey; 432 rec.mtu_selector = IB_SA_EQ; 433 rec.mtu = priv->broadcast->mcmember.mtu; 434 rec.traffic_class = priv->broadcast->mcmember.traffic_class; 435 rec.rate_selector = IB_SA_EQ; 436 rec.rate = priv->broadcast->mcmember.rate; 437 rec.sl = priv->broadcast->mcmember.sl; 438 rec.flow_label = priv->broadcast->mcmember.flow_label; 439 rec.hop_limit = priv->broadcast->mcmember.hop_limit; 440 } 441 442 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 443 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, 444 &rec, comp_mask, GFP_KERNEL, 445 ipoib_mcast_join_complete, mcast); 446 if (IS_ERR(mcast->mc)) { 447 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 448 ret = PTR_ERR(mcast->mc); 449 ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); 450 451 mcast->backoff *= 2; 452 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 453 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; 454 455 mutex_lock(&mcast_mutex); 456 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 457 queue_delayed_work(ipoib_workqueue, 458 &priv->mcast_task, 459 mcast->backoff * HZ); 460 mutex_unlock(&mcast_mutex); 461 } 462 } 463 464 void ipoib_mcast_join_task(struct work_struct *work) 465 { 466 struct ipoib_dev_priv *priv = 467 container_of(work, struct ipoib_dev_priv, mcast_task.work); 468 struct ifnet *dev = priv->dev; 469 470 ipoib_dbg_mcast(priv, "Running join task. flags 0x%lX\n", priv->flags); 471 472 if (!test_bit(IPOIB_MCAST_RUN, &priv->flags)) 473 return; 474 475 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) 476 ipoib_warn(priv, "ib_query_gid() failed\n"); 477 else 478 memcpy(IF_LLADDR(dev) + 4, priv->local_gid.raw, sizeof (union ib_gid)); 479 480 { 481 struct ib_port_attr attr; 482 483 if (!ib_query_port(priv->ca, priv->port, &attr)) 484 priv->local_lid = attr.lid; 485 else 486 ipoib_warn(priv, "ib_query_port failed\n"); 487 } 488 489 if (!priv->broadcast) { 490 struct ipoib_mcast *broadcast; 491 492 if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 493 return; 494 495 broadcast = ipoib_mcast_alloc(priv, 1); 496 if (!broadcast) { 497 ipoib_warn(priv, "failed to allocate broadcast group\n"); 498 mutex_lock(&mcast_mutex); 499 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 500 queue_delayed_work(ipoib_workqueue, 501 &priv->mcast_task, HZ); 502 mutex_unlock(&mcast_mutex); 503 return; 504 } 505 506 spin_lock_irq(&priv->lock); 507 memcpy(broadcast->mcmember.mgid.raw, dev->if_broadcastaddr + 4, 508 sizeof (union ib_gid)); 509 priv->broadcast = broadcast; 510 511 __ipoib_mcast_add(priv, priv->broadcast); 512 spin_unlock_irq(&priv->lock); 513 } 514 515 if (priv->broadcast && 516 !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { 517 if (priv->broadcast && 518 !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) 519 ipoib_mcast_join(priv, priv->broadcast, 0); 520 return; 521 } 522 523 while (1) { 524 struct ipoib_mcast *mcast = NULL; 525 526 spin_lock_irq(&priv->lock); 527 list_for_each_entry(mcast, &priv->multicast_list, list) { 528 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) 529 && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) 530 && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 531 /* Found the next unjoined group */ 532 break; 533 } 534 } 535 spin_unlock_irq(&priv->lock); 536 537 if (&mcast->list == &priv->multicast_list) { 538 /* All done */ 539 break; 540 } 541 542 ipoib_mcast_join(priv, mcast, 1); 543 return; 544 } 545 546 spin_lock_irq(&priv->lock); 547 if (priv->broadcast) 548 priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); 549 else 550 priv->mcast_mtu = priv->admin_mtu; 551 spin_unlock_irq(&priv->lock); 552 553 if (!ipoib_cm_admin_enabled(priv)) 554 ipoib_change_mtu(priv, min(priv->mcast_mtu, priv->admin_mtu)); 555 556 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); 557 558 clear_bit(IPOIB_MCAST_RUN, &priv->flags); 559 } 560 561 int ipoib_mcast_start_thread(struct ipoib_dev_priv *priv) 562 { 563 ipoib_dbg_mcast(priv, "starting multicast thread flags 0x%lX\n", 564 priv->flags); 565 566 mutex_lock(&mcast_mutex); 567 if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags)) 568 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0); 569 mutex_unlock(&mcast_mutex); 570 571 return 0; 572 } 573 574 int ipoib_mcast_stop_thread(struct ipoib_dev_priv *priv, int flush) 575 { 576 577 ipoib_dbg_mcast(priv, "stopping multicast thread\n"); 578 579 mutex_lock(&mcast_mutex); 580 clear_bit(IPOIB_MCAST_RUN, &priv->flags); 581 cancel_delayed_work(&priv->mcast_task); 582 mutex_unlock(&mcast_mutex); 583 584 if (flush) 585 flush_workqueue(ipoib_workqueue); 586 587 return 0; 588 } 589 590 static int ipoib_mcast_leave(struct ipoib_dev_priv *priv, struct ipoib_mcast *mcast) 591 { 592 int ret = 0; 593 594 if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) 595 ib_sa_free_multicast(mcast->mc); 596 597 if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 598 ipoib_dbg_mcast(priv, "leaving MGID %16D\n", 599 mcast->mcmember.mgid.raw, ":"); 600 601 /* Remove ourselves from the multicast group */ 602 ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid, 603 be16_to_cpu(mcast->mcmember.mlid)); 604 if (ret) 605 ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); 606 } 607 608 return 0; 609 } 610 611 void 612 ipoib_mcast_send(struct ipoib_dev_priv *priv, void *mgid, struct mbuf *mb) 613 { 614 struct ifnet *dev = priv->dev; 615 struct ipoib_mcast *mcast; 616 617 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) || 618 !priv->broadcast || 619 !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { 620 ++dev->if_oerrors; 621 m_freem(mb); 622 return; 623 } 624 625 mcast = __ipoib_mcast_find(priv, mgid); 626 if (!mcast) { 627 /* Let's create a new send only group now */ 628 ipoib_dbg_mcast(priv, "setting up send only multicast group for %16D\n", 629 mgid, ":"); 630 631 mcast = ipoib_mcast_alloc(priv, 0); 632 if (!mcast) { 633 ipoib_warn(priv, "unable to allocate memory for " 634 "multicast structure\n"); 635 ++dev->if_oerrors; 636 m_freem(mb); 637 goto out; 638 } 639 640 set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); 641 memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid)); 642 __ipoib_mcast_add(priv, mcast); 643 list_add_tail(&mcast->list, &priv->multicast_list); 644 } 645 646 if (!mcast->ah) { 647 if (mcast->pkt_queue.ifq_len < IPOIB_MAX_MCAST_QUEUE) { 648 _IF_ENQUEUE(&mcast->pkt_queue, mb); 649 } else { 650 ++dev->if_oerrors; 651 m_freem(mb); 652 } 653 654 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) 655 ipoib_dbg_mcast(priv, "no address vector, " 656 "but multicast join already started\n"); 657 else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) 658 ipoib_mcast_sendonly_join(mcast); 659 660 /* 661 * If lookup completes between here and out:, don't 662 * want to send packet twice. 663 */ 664 mcast = NULL; 665 } 666 667 out: 668 if (mcast && mcast->ah) 669 ipoib_send(priv, mb, mcast->ah, IB_MULTICAST_QPN); 670 } 671 672 void ipoib_mcast_dev_flush(struct ipoib_dev_priv *priv) 673 { 674 LIST_HEAD(remove_list); 675 struct ipoib_mcast *mcast, *tmcast; 676 unsigned long flags; 677 678 ipoib_dbg_mcast(priv, "flushing multicast list\n"); 679 680 spin_lock_irqsave(&priv->lock, flags); 681 682 list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { 683 list_del(&mcast->list); 684 rb_erase(&mcast->rb_node, &priv->multicast_tree); 685 list_add_tail(&mcast->list, &remove_list); 686 } 687 688 if (priv->broadcast) { 689 rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree); 690 list_add_tail(&priv->broadcast->list, &remove_list); 691 priv->broadcast = NULL; 692 } 693 694 spin_unlock_irqrestore(&priv->lock, flags); 695 696 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 697 ipoib_mcast_leave(priv, mcast); 698 ipoib_mcast_free(mcast); 699 } 700 } 701 702 static int ipoib_mcast_addr_is_valid(const u8 *addr, unsigned int addrlen, 703 const u8 *broadcast) 704 { 705 if (addrlen != INFINIBAND_ALEN) 706 return 0; 707 /* reserved QPN, prefix, scope */ 708 if (memcmp(addr, broadcast, 6)) 709 return 0; 710 /* signature lower, pkey */ 711 if (memcmp(addr + 7, broadcast + 7, 3)) 712 return 0; 713 return 1; 714 } 715 716 void ipoib_mcast_restart_task(struct work_struct *work) 717 { 718 struct ipoib_dev_priv *priv = 719 container_of(work, struct ipoib_dev_priv, restart_task); 720 ipoib_mcast_restart(priv); 721 } 722 723 void ipoib_mcast_restart(struct ipoib_dev_priv *priv) 724 { 725 struct ifnet *dev = priv->dev; 726 struct ifmultiaddr *ifma;; 727 struct ipoib_mcast *mcast, *tmcast; 728 LIST_HEAD(remove_list); 729 struct ib_sa_mcmember_rec rec; 730 int addrlen; 731 732 ipoib_dbg_mcast(priv, "restarting multicast task flags 0x%lX\n", 733 priv->flags); 734 735 ipoib_mcast_stop_thread(priv, 0); 736 737 if_maddr_rlock(dev); 738 spin_lock(&priv->lock); 739 740 /* 741 * Unfortunately, the networking core only gives us a list of all of 742 * the multicast hardware addresses. We need to figure out which ones 743 * are new and which ones have been removed 744 */ 745 746 /* Clear out the found flag */ 747 list_for_each_entry(mcast, &priv->multicast_list, list) 748 clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); 749 750 /* Mark all of the entries that are found or don't exist */ 751 752 753 TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) { 754 union ib_gid mgid; 755 uint8_t *addr; 756 757 if (ifma->ifma_addr->sa_family != AF_LINK) 758 continue; 759 addr = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); 760 addrlen = ((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen; 761 if (!ipoib_mcast_addr_is_valid(addr, addrlen, 762 dev->if_broadcastaddr)) 763 continue; 764 765 memcpy(mgid.raw, addr + 4, sizeof mgid); 766 767 mcast = __ipoib_mcast_find(priv, &mgid); 768 if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 769 struct ipoib_mcast *nmcast; 770 771 /* ignore group which is directly joined by userspace */ 772 if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) && 773 !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) { 774 ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %16D\n", 775 mgid.raw, ":"); 776 continue; 777 } 778 779 /* Not found or send-only group, let's add a new entry */ 780 ipoib_dbg_mcast(priv, "adding multicast entry for mgid %16D\n", 781 mgid.raw, ":"); 782 783 nmcast = ipoib_mcast_alloc(priv, 0); 784 if (!nmcast) { 785 ipoib_warn(priv, "unable to allocate memory for multicast structure\n"); 786 continue; 787 } 788 789 set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags); 790 791 nmcast->mcmember.mgid = mgid; 792 793 if (mcast) { 794 /* Destroy the send only entry */ 795 list_move_tail(&mcast->list, &remove_list); 796 797 rb_replace_node(&mcast->rb_node, 798 &nmcast->rb_node, 799 &priv->multicast_tree); 800 } else 801 __ipoib_mcast_add(priv, nmcast); 802 803 list_add_tail(&nmcast->list, &priv->multicast_list); 804 } 805 806 if (mcast) 807 set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); 808 } 809 810 /* Remove all of the entries don't exist anymore */ 811 list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { 812 if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) && 813 !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 814 ipoib_dbg_mcast(priv, "deleting multicast group %16D\n", 815 mcast->mcmember.mgid.raw, ":"); 816 817 rb_erase(&mcast->rb_node, &priv->multicast_tree); 818 819 /* Move to the remove list */ 820 list_move_tail(&mcast->list, &remove_list); 821 } 822 } 823 824 spin_unlock(&priv->lock); 825 if_maddr_runlock(dev); 826 827 /* We have to cancel outside of the spinlock */ 828 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 829 ipoib_mcast_leave(mcast->priv, mcast); 830 ipoib_mcast_free(mcast); 831 } 832 833 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 834 ipoib_mcast_start_thread(priv); 835 } 836 837 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 838 839 struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct ipoib_dev_priv *priv) 840 { 841 struct ipoib_mcast_iter *iter; 842 843 iter = kmalloc(sizeof *iter, GFP_KERNEL); 844 if (!iter) 845 return NULL; 846 847 iter->priv = priv; 848 memset(iter->mgid.raw, 0, 16); 849 850 if (ipoib_mcast_iter_next(iter)) { 851 kfree(iter); 852 return NULL; 853 } 854 855 return iter; 856 } 857 858 int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter) 859 { 860 struct ipoib_dev_priv *priv = iter->priv; 861 struct rb_node *n; 862 struct ipoib_mcast *mcast; 863 int ret = 1; 864 865 spin_lock_irq(&priv->lock); 866 867 n = rb_first(&priv->multicast_tree); 868 869 while (n) { 870 mcast = rb_entry(n, struct ipoib_mcast, rb_node); 871 872 if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw, 873 sizeof (union ib_gid)) < 0) { 874 iter->mgid = mcast->mcmember.mgid; 875 iter->created = mcast->created; 876 iter->queuelen = mcast->pkt_queue.ifq_len; 877 iter->complete = !!mcast->ah; 878 iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY)); 879 880 ret = 0; 881 882 break; 883 } 884 885 n = rb_next(n); 886 } 887 888 spin_unlock_irq(&priv->lock); 889 890 return ret; 891 } 892 893 void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, 894 union ib_gid *mgid, 895 unsigned long *created, 896 unsigned int *queuelen, 897 unsigned int *complete, 898 unsigned int *send_only) 899 { 900 *mgid = iter->mgid; 901 *created = iter->created; 902 *queuelen = iter->queuelen; 903 *complete = iter->complete; 904 *send_only = iter->send_only; 905 } 906 907 #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ 908