1 /* 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * Copyright (c) 2004 Voltaire, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35 #include "ipoib.h" 36 37 #include <linux/delay.h> 38 #include <linux/completion.h> 39 40 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 41 static int mcast_debug_level = 1; 42 43 module_param(mcast_debug_level, int, 0644); 44 MODULE_PARM_DESC(mcast_debug_level, 45 "Enable multicast debug tracing if > 0"); 46 #endif 47 48 static DEFINE_MUTEX(mcast_mutex); 49 50 struct ipoib_mcast_iter { 51 struct ipoib_dev_priv *priv; 52 union ib_gid mgid; 53 unsigned long created; 54 unsigned int queuelen; 55 unsigned int complete; 56 unsigned int send_only; 57 }; 58 59 static void ipoib_mcast_free(struct ipoib_mcast *mcast) 60 { 61 struct ifnet *dev = mcast->priv->dev; 62 int tx_dropped = 0; 63 64 ipoib_dbg_mcast(mcast->priv, "deleting multicast group %16D\n", 65 mcast->mcmember.mgid.raw, ":"); 66 67 if (mcast->ah) 68 ipoib_put_ah(mcast->ah); 69 70 tx_dropped = mcast->pkt_queue.ifq_len; 71 _IF_DRAIN(&mcast->pkt_queue); /* XXX Locking. */ 72 73 if_inc_counter(dev, IFCOUNTER_OERRORS, tx_dropped); 74 75 kfree(mcast); 76 } 77 78 static struct ipoib_mcast *ipoib_mcast_alloc(struct ipoib_dev_priv *priv, 79 int can_sleep) 80 { 81 struct ipoib_mcast *mcast; 82 83 mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC); 84 if (!mcast) 85 return NULL; 86 87 mcast->priv = priv; 88 mcast->created = jiffies; 89 mcast->backoff = 1; 90 91 INIT_LIST_HEAD(&mcast->list); 92 bzero(&mcast->pkt_queue, sizeof(mcast->pkt_queue)); 93 94 return mcast; 95 } 96 97 static struct ipoib_mcast *__ipoib_mcast_find(struct ipoib_dev_priv *priv, 98 void *mgid) 99 { 100 struct rb_node *n = priv->multicast_tree.rb_node; 101 102 while (n) { 103 struct ipoib_mcast *mcast; 104 int ret; 105 106 mcast = rb_entry(n, struct ipoib_mcast, rb_node); 107 108 ret = memcmp(mgid, mcast->mcmember.mgid.raw, 109 sizeof (union ib_gid)); 110 if (ret < 0) 111 n = n->rb_left; 112 else if (ret > 0) 113 n = n->rb_right; 114 else 115 return mcast; 116 } 117 118 return NULL; 119 } 120 121 static int __ipoib_mcast_add(struct ipoib_dev_priv *priv, 122 struct ipoib_mcast *mcast) 123 { 124 struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL; 125 126 while (*n) { 127 struct ipoib_mcast *tmcast; 128 int ret; 129 130 pn = *n; 131 tmcast = rb_entry(pn, struct ipoib_mcast, rb_node); 132 133 ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw, 134 sizeof (union ib_gid)); 135 if (ret < 0) 136 n = &pn->rb_left; 137 else if (ret > 0) 138 n = &pn->rb_right; 139 else 140 return -EEXIST; 141 } 142 143 rb_link_node(&mcast->rb_node, pn, n); 144 rb_insert_color(&mcast->rb_node, &priv->multicast_tree); 145 146 return 0; 147 } 148 149 static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, 150 struct ib_sa_mcmember_rec *mcmember) 151 { 152 struct ipoib_dev_priv *priv = mcast->priv; 153 struct ifnet *dev = priv->dev; 154 struct ipoib_ah *ah; 155 int ret; 156 int set_qkey = 0; 157 158 mcast->mcmember = *mcmember; 159 160 /* Set the cached Q_Key before we attach if it's the broadcast group */ 161 if (!memcmp(mcast->mcmember.mgid.raw, dev->if_broadcastaddr + 4, 162 sizeof (union ib_gid))) { 163 spin_lock_irq(&priv->lock); 164 if (!priv->broadcast) { 165 spin_unlock_irq(&priv->lock); 166 return -EAGAIN; 167 } 168 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); 169 spin_unlock_irq(&priv->lock); 170 priv->tx_wr.wr.ud.remote_qkey = priv->qkey; 171 set_qkey = 1; 172 } 173 174 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 175 if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 176 ipoib_warn(priv, "multicast group %16D already attached\n", 177 mcast->mcmember.mgid.raw, ":"); 178 179 return 0; 180 } 181 182 ret = ipoib_mcast_attach(priv, be16_to_cpu(mcast->mcmember.mlid), 183 &mcast->mcmember.mgid, set_qkey); 184 if (ret < 0) { 185 ipoib_warn(priv, "couldn't attach QP to multicast group %16D\n", 186 mcast->mcmember.mgid.raw, ":"); 187 188 clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags); 189 return ret; 190 } 191 } 192 193 { 194 struct ib_ah_attr av = { 195 .dlid = be16_to_cpu(mcast->mcmember.mlid), 196 .port_num = priv->port, 197 .sl = mcast->mcmember.sl, 198 .ah_flags = IB_AH_GRH, 199 .static_rate = mcast->mcmember.rate, 200 .grh = { 201 .flow_label = be32_to_cpu(mcast->mcmember.flow_label), 202 .hop_limit = mcast->mcmember.hop_limit, 203 .sgid_index = 0, 204 .traffic_class = mcast->mcmember.traffic_class 205 } 206 }; 207 av.grh.dgid = mcast->mcmember.mgid; 208 209 ah = ipoib_create_ah(priv, priv->pd, &av); 210 if (!ah) { 211 ipoib_warn(priv, "ib_address_create failed\n"); 212 } else { 213 spin_lock_irq(&priv->lock); 214 mcast->ah = ah; 215 spin_unlock_irq(&priv->lock); 216 217 ipoib_dbg_mcast(priv, "MGID %16D AV %p, LID 0x%04x, SL %d\n", 218 mcast->mcmember.mgid.raw, ":", 219 mcast->ah->ah, 220 be16_to_cpu(mcast->mcmember.mlid), 221 mcast->mcmember.sl); 222 } 223 } 224 225 /* actually send any queued packets */ 226 while (mcast->pkt_queue.ifq_len) { 227 struct mbuf *mb; 228 _IF_DEQUEUE(&mcast->pkt_queue, mb); 229 mb->m_pkthdr.rcvif = dev; 230 231 if (dev->if_transmit(dev, mb)) 232 ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); 233 } 234 235 return 0; 236 } 237 238 static int 239 ipoib_mcast_sendonly_join_complete(int status, 240 struct ib_sa_multicast *multicast) 241 { 242 struct ipoib_mcast *mcast = multicast->context; 243 struct ipoib_dev_priv *priv = mcast->priv; 244 245 /* We trap for port events ourselves. */ 246 if (status == -ENETRESET) 247 return 0; 248 249 if (!status) 250 status = ipoib_mcast_join_finish(mcast, &multicast->rec); 251 252 if (status) { 253 if (mcast->logcount++ < 20) 254 ipoib_dbg_mcast(priv, "multicast join failed for %16D, status %d\n", 255 mcast->mcmember.mgid.raw, ":", status); 256 257 /* Flush out any queued packets */ 258 if_inc_counter(priv->dev, IFCOUNTER_OERRORS, mcast->pkt_queue.ifq_len); 259 _IF_DRAIN(&mcast->pkt_queue); 260 261 /* Clear the busy flag so we try again */ 262 status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, 263 &mcast->flags); 264 } 265 return status; 266 } 267 268 static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) 269 { 270 struct ipoib_dev_priv *priv = mcast->priv; 271 struct ib_sa_mcmember_rec rec = { 272 #if 0 /* Some SMs don't support send-only yet */ 273 .join_state = 4 274 #else 275 .join_state = 1 276 #endif 277 }; 278 int ret = 0; 279 280 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { 281 ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n"); 282 return -ENODEV; 283 } 284 285 if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { 286 ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n"); 287 return -EBUSY; 288 } 289 290 rec.mgid = mcast->mcmember.mgid; 291 rec.port_gid = priv->local_gid; 292 rec.pkey = cpu_to_be16(priv->pkey); 293 294 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, 295 priv->port, &rec, 296 IB_SA_MCMEMBER_REC_MGID | 297 IB_SA_MCMEMBER_REC_PORT_GID | 298 IB_SA_MCMEMBER_REC_PKEY | 299 IB_SA_MCMEMBER_REC_JOIN_STATE, 300 GFP_ATOMIC, 301 ipoib_mcast_sendonly_join_complete, 302 mcast); 303 if (IS_ERR(mcast->mc)) { 304 ret = PTR_ERR(mcast->mc); 305 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 306 ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n", 307 ret); 308 } else { 309 ipoib_dbg_mcast(priv, "no multicast record for %16D, starting join\n", 310 mcast->mcmember.mgid.raw, ":"); 311 } 312 313 return ret; 314 } 315 316 void ipoib_mcast_carrier_on_task(struct work_struct *work) 317 { 318 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 319 carrier_on_task); 320 struct ib_port_attr attr; 321 322 /* 323 * Take rtnl_lock to avoid racing with ipoib_stop() and 324 * turning the carrier back on while a device is being 325 * removed. 326 */ 327 if (ib_query_port(priv->ca, priv->port, &attr) || 328 attr.state != IB_PORT_ACTIVE) { 329 ipoib_dbg(priv, "Keeping carrier off until IB port is active\n"); 330 return; 331 } 332 if_link_state_change(priv->dev, LINK_STATE_UP); 333 } 334 335 static int ipoib_mcast_join_complete(int status, 336 struct ib_sa_multicast *multicast) 337 { 338 struct ipoib_mcast *mcast = multicast->context; 339 struct ipoib_dev_priv *priv = mcast->priv; 340 341 ipoib_dbg_mcast(priv, "join completion for %16D (status %d)\n", 342 mcast->mcmember.mgid.raw, ":", status); 343 344 /* We trap for port events ourselves. */ 345 if (status == -ENETRESET) 346 return 0; 347 348 if (!status) 349 status = ipoib_mcast_join_finish(mcast, &multicast->rec); 350 351 if (!status) { 352 mcast->backoff = 1; 353 mutex_lock(&mcast_mutex); 354 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 355 queue_delayed_work(ipoib_workqueue, 356 &priv->mcast_task, 0); 357 mutex_unlock(&mcast_mutex); 358 359 /* 360 * Defer carrier on work to ipoib_workqueue to avoid a 361 * deadlock on rtnl_lock here. 362 */ 363 if (mcast == priv->broadcast) 364 queue_work(ipoib_workqueue, &priv->carrier_on_task); 365 366 return 0; 367 } 368 369 if (mcast->logcount++ < 20) { 370 if (status == -ETIMEDOUT || status == -EAGAIN) { 371 ipoib_dbg_mcast(priv, "multicast join failed for %16D, status %d\n", 372 mcast->mcmember.mgid.raw, ":", status); 373 } else { 374 ipoib_warn(priv, "multicast join failed for %16D, status %d\n", 375 mcast->mcmember.mgid.raw, ":", status); 376 } 377 } 378 379 mcast->backoff *= 2; 380 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 381 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; 382 383 /* Clear the busy flag so we try again */ 384 status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 385 386 mutex_lock(&mcast_mutex); 387 spin_lock_irq(&priv->lock); 388 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 389 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 390 mcast->backoff * HZ); 391 spin_unlock_irq(&priv->lock); 392 mutex_unlock(&mcast_mutex); 393 394 return status; 395 } 396 397 static void ipoib_mcast_join(struct ipoib_dev_priv *priv, 398 struct ipoib_mcast *mcast, int create) 399 { 400 struct ib_sa_mcmember_rec rec = { 401 .join_state = 1 402 }; 403 ib_sa_comp_mask comp_mask; 404 int ret = 0; 405 406 ipoib_dbg_mcast(priv, "joining MGID %16D\n", 407 mcast->mcmember.mgid.raw, ":"); 408 409 rec.mgid = mcast->mcmember.mgid; 410 rec.port_gid = priv->local_gid; 411 rec.pkey = cpu_to_be16(priv->pkey); 412 413 comp_mask = 414 IB_SA_MCMEMBER_REC_MGID | 415 IB_SA_MCMEMBER_REC_PORT_GID | 416 IB_SA_MCMEMBER_REC_PKEY | 417 IB_SA_MCMEMBER_REC_JOIN_STATE; 418 419 if (create) { 420 comp_mask |= 421 IB_SA_MCMEMBER_REC_QKEY | 422 IB_SA_MCMEMBER_REC_MTU_SELECTOR | 423 IB_SA_MCMEMBER_REC_MTU | 424 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS | 425 IB_SA_MCMEMBER_REC_RATE_SELECTOR | 426 IB_SA_MCMEMBER_REC_RATE | 427 IB_SA_MCMEMBER_REC_SL | 428 IB_SA_MCMEMBER_REC_FLOW_LABEL | 429 IB_SA_MCMEMBER_REC_HOP_LIMIT; 430 431 rec.qkey = priv->broadcast->mcmember.qkey; 432 rec.mtu_selector = IB_SA_EQ; 433 rec.mtu = priv->broadcast->mcmember.mtu; 434 rec.traffic_class = priv->broadcast->mcmember.traffic_class; 435 rec.rate_selector = IB_SA_EQ; 436 rec.rate = priv->broadcast->mcmember.rate; 437 rec.sl = priv->broadcast->mcmember.sl; 438 rec.flow_label = priv->broadcast->mcmember.flow_label; 439 rec.hop_limit = priv->broadcast->mcmember.hop_limit; 440 } 441 442 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 443 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, 444 &rec, comp_mask, GFP_KERNEL, 445 ipoib_mcast_join_complete, mcast); 446 if (IS_ERR(mcast->mc)) { 447 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 448 ret = PTR_ERR(mcast->mc); 449 ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); 450 451 mcast->backoff *= 2; 452 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 453 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; 454 455 mutex_lock(&mcast_mutex); 456 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 457 queue_delayed_work(ipoib_workqueue, 458 &priv->mcast_task, 459 mcast->backoff * HZ); 460 mutex_unlock(&mcast_mutex); 461 } 462 } 463 464 void ipoib_mcast_join_task(struct work_struct *work) 465 { 466 struct ipoib_dev_priv *priv = 467 container_of(work, struct ipoib_dev_priv, mcast_task.work); 468 struct ifnet *dev = priv->dev; 469 struct ib_port_attr attr; 470 471 ipoib_dbg_mcast(priv, "Running join task. flags 0x%lX\n", priv->flags); 472 473 if (!test_bit(IPOIB_MCAST_RUN, &priv->flags)) 474 return; 475 476 if (ib_query_port(priv->ca, priv->port, &attr) || 477 attr.state != IB_PORT_ACTIVE) { 478 ipoib_dbg(priv, "%s: port state is not ACTIVE (state = %d) suspend task.\n", 479 __func__, attr.state); 480 return; 481 } 482 483 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) 484 ipoib_warn(priv, "ib_query_gid() failed\n"); 485 else 486 memcpy(IF_LLADDR(dev) + 4, priv->local_gid.raw, sizeof (union ib_gid)); 487 488 { 489 struct ib_port_attr attr; 490 491 if (!ib_query_port(priv->ca, priv->port, &attr)) 492 priv->local_lid = attr.lid; 493 else 494 ipoib_warn(priv, "ib_query_port failed\n"); 495 } 496 497 if (!priv->broadcast) { 498 struct ipoib_mcast *broadcast; 499 500 if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 501 return; 502 503 broadcast = ipoib_mcast_alloc(priv, 1); 504 if (!broadcast) { 505 ipoib_warn(priv, "failed to allocate broadcast group\n"); 506 mutex_lock(&mcast_mutex); 507 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 508 queue_delayed_work(ipoib_workqueue, 509 &priv->mcast_task, HZ); 510 mutex_unlock(&mcast_mutex); 511 return; 512 } 513 514 spin_lock_irq(&priv->lock); 515 memcpy(broadcast->mcmember.mgid.raw, dev->if_broadcastaddr + 4, 516 sizeof (union ib_gid)); 517 priv->broadcast = broadcast; 518 519 __ipoib_mcast_add(priv, priv->broadcast); 520 spin_unlock_irq(&priv->lock); 521 } 522 523 if (priv->broadcast && 524 !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { 525 if (priv->broadcast && 526 !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) 527 ipoib_mcast_join(priv, priv->broadcast, 0); 528 return; 529 } 530 531 while (1) { 532 struct ipoib_mcast *mcast = NULL; 533 534 spin_lock_irq(&priv->lock); 535 list_for_each_entry(mcast, &priv->multicast_list, list) { 536 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) 537 && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) 538 && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 539 /* Found the next unjoined group */ 540 break; 541 } 542 } 543 spin_unlock_irq(&priv->lock); 544 545 if (&mcast->list == &priv->multicast_list) { 546 /* All done */ 547 break; 548 } 549 550 ipoib_mcast_join(priv, mcast, 1); 551 return; 552 } 553 554 spin_lock_irq(&priv->lock); 555 if (priv->broadcast) 556 priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); 557 else 558 priv->mcast_mtu = priv->admin_mtu; 559 spin_unlock_irq(&priv->lock); 560 561 if (!ipoib_cm_admin_enabled(priv)) 562 ipoib_change_mtu(priv, min(priv->mcast_mtu, priv->admin_mtu)); 563 564 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); 565 566 clear_bit(IPOIB_MCAST_RUN, &priv->flags); 567 } 568 569 int ipoib_mcast_start_thread(struct ipoib_dev_priv *priv) 570 { 571 ipoib_dbg_mcast(priv, "starting multicast thread flags 0x%lX\n", 572 priv->flags); 573 574 mutex_lock(&mcast_mutex); 575 if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags)) 576 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0); 577 mutex_unlock(&mcast_mutex); 578 579 return 0; 580 } 581 582 int ipoib_mcast_stop_thread(struct ipoib_dev_priv *priv, int flush) 583 { 584 585 ipoib_dbg_mcast(priv, "stopping multicast thread\n"); 586 587 mutex_lock(&mcast_mutex); 588 clear_bit(IPOIB_MCAST_RUN, &priv->flags); 589 cancel_delayed_work(&priv->mcast_task); 590 mutex_unlock(&mcast_mutex); 591 592 if (flush) 593 flush_workqueue(ipoib_workqueue); 594 595 return 0; 596 } 597 598 static int ipoib_mcast_leave(struct ipoib_dev_priv *priv, struct ipoib_mcast *mcast) 599 { 600 int ret = 0; 601 602 if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) 603 ib_sa_free_multicast(mcast->mc); 604 605 if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 606 ipoib_dbg_mcast(priv, "leaving MGID %16D\n", 607 mcast->mcmember.mgid.raw, ":"); 608 609 /* Remove ourselves from the multicast group */ 610 ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid, 611 be16_to_cpu(mcast->mcmember.mlid)); 612 if (ret) 613 ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); 614 } 615 616 return 0; 617 } 618 619 void 620 ipoib_mcast_send(struct ipoib_dev_priv *priv, void *mgid, struct mbuf *mb) 621 { 622 struct ifnet *dev = priv->dev; 623 struct ipoib_mcast *mcast; 624 625 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) || 626 !priv->broadcast || 627 !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { 628 if_inc_counter(dev, IFCOUNTER_OERRORS, 1); 629 m_freem(mb); 630 return; 631 } 632 633 mcast = __ipoib_mcast_find(priv, mgid); 634 if (!mcast) { 635 /* Let's create a new send only group now */ 636 ipoib_dbg_mcast(priv, "setting up send only multicast group for %16D\n", 637 mgid, ":"); 638 639 mcast = ipoib_mcast_alloc(priv, 0); 640 if (!mcast) { 641 ipoib_warn(priv, "unable to allocate memory for " 642 "multicast structure\n"); 643 if_inc_counter(dev, IFCOUNTER_OERRORS, 1); 644 m_freem(mb); 645 goto out; 646 } 647 648 set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); 649 memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid)); 650 __ipoib_mcast_add(priv, mcast); 651 list_add_tail(&mcast->list, &priv->multicast_list); 652 } 653 654 if (!mcast->ah) { 655 if (mcast->pkt_queue.ifq_len < IPOIB_MAX_MCAST_QUEUE) { 656 _IF_ENQUEUE(&mcast->pkt_queue, mb); 657 } else { 658 if_inc_counter(dev, IFCOUNTER_OERRORS, 1); 659 m_freem(mb); 660 } 661 662 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) 663 ipoib_dbg_mcast(priv, "no address vector, " 664 "but multicast join already started\n"); 665 else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) 666 ipoib_mcast_sendonly_join(mcast); 667 668 /* 669 * If lookup completes between here and out:, don't 670 * want to send packet twice. 671 */ 672 mcast = NULL; 673 } 674 675 out: 676 if (mcast && mcast->ah) 677 ipoib_send(priv, mb, mcast->ah, IB_MULTICAST_QPN); 678 } 679 680 void ipoib_mcast_dev_flush(struct ipoib_dev_priv *priv) 681 { 682 LIST_HEAD(remove_list); 683 struct ipoib_mcast *mcast, *tmcast; 684 unsigned long flags; 685 686 ipoib_dbg_mcast(priv, "flushing multicast list\n"); 687 688 spin_lock_irqsave(&priv->lock, flags); 689 690 list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { 691 list_del(&mcast->list); 692 rb_erase(&mcast->rb_node, &priv->multicast_tree); 693 list_add_tail(&mcast->list, &remove_list); 694 } 695 696 if (priv->broadcast) { 697 rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree); 698 list_add_tail(&priv->broadcast->list, &remove_list); 699 priv->broadcast = NULL; 700 } 701 702 spin_unlock_irqrestore(&priv->lock, flags); 703 704 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 705 ipoib_mcast_leave(priv, mcast); 706 ipoib_mcast_free(mcast); 707 } 708 } 709 710 static int ipoib_mcast_addr_is_valid(const u8 *addr, unsigned int addrlen, 711 const u8 *broadcast) 712 { 713 if (addrlen != INFINIBAND_ALEN) 714 return 0; 715 /* reserved QPN, prefix, scope */ 716 if (memcmp(addr, broadcast, 6)) 717 return 0; 718 /* signature lower, pkey */ 719 if (memcmp(addr + 7, broadcast + 7, 3)) 720 return 0; 721 return 1; 722 } 723 724 void ipoib_mcast_restart_task(struct work_struct *work) 725 { 726 struct ipoib_dev_priv *priv = 727 container_of(work, struct ipoib_dev_priv, restart_task); 728 ipoib_mcast_restart(priv); 729 } 730 731 void ipoib_mcast_restart(struct ipoib_dev_priv *priv) 732 { 733 struct ifnet *dev = priv->dev; 734 struct ifmultiaddr *ifma; 735 struct ipoib_mcast *mcast, *tmcast; 736 LIST_HEAD(remove_list); 737 struct ib_sa_mcmember_rec rec; 738 int addrlen; 739 740 ipoib_dbg_mcast(priv, "restarting multicast task flags 0x%lX\n", 741 priv->flags); 742 743 ipoib_mcast_stop_thread(priv, 0); 744 745 if_maddr_rlock(dev); 746 spin_lock(&priv->lock); 747 748 /* 749 * Unfortunately, the networking core only gives us a list of all of 750 * the multicast hardware addresses. We need to figure out which ones 751 * are new and which ones have been removed 752 */ 753 754 /* Clear out the found flag */ 755 list_for_each_entry(mcast, &priv->multicast_list, list) 756 clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); 757 758 /* Mark all of the entries that are found or don't exist */ 759 760 761 TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) { 762 union ib_gid mgid; 763 uint8_t *addr; 764 765 if (ifma->ifma_addr->sa_family != AF_LINK) 766 continue; 767 addr = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); 768 addrlen = ((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen; 769 if (!ipoib_mcast_addr_is_valid(addr, addrlen, 770 dev->if_broadcastaddr)) 771 continue; 772 773 memcpy(mgid.raw, addr + 4, sizeof mgid); 774 775 mcast = __ipoib_mcast_find(priv, &mgid); 776 if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 777 struct ipoib_mcast *nmcast; 778 779 /* ignore group which is directly joined by userspace */ 780 if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) && 781 !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) { 782 ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %16D\n", 783 mgid.raw, ":"); 784 continue; 785 } 786 787 /* Not found or send-only group, let's add a new entry */ 788 ipoib_dbg_mcast(priv, "adding multicast entry for mgid %16D\n", 789 mgid.raw, ":"); 790 791 nmcast = ipoib_mcast_alloc(priv, 0); 792 if (!nmcast) { 793 ipoib_warn(priv, "unable to allocate memory for multicast structure\n"); 794 continue; 795 } 796 797 set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags); 798 799 nmcast->mcmember.mgid = mgid; 800 801 if (mcast) { 802 /* Destroy the send only entry */ 803 list_move_tail(&mcast->list, &remove_list); 804 805 rb_replace_node(&mcast->rb_node, 806 &nmcast->rb_node, 807 &priv->multicast_tree); 808 } else 809 __ipoib_mcast_add(priv, nmcast); 810 811 list_add_tail(&nmcast->list, &priv->multicast_list); 812 } 813 814 if (mcast) 815 set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); 816 } 817 818 /* Remove all of the entries don't exist anymore */ 819 list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { 820 if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) && 821 !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 822 ipoib_dbg_mcast(priv, "deleting multicast group %16D\n", 823 mcast->mcmember.mgid.raw, ":"); 824 825 rb_erase(&mcast->rb_node, &priv->multicast_tree); 826 827 /* Move to the remove list */ 828 list_move_tail(&mcast->list, &remove_list); 829 } 830 } 831 832 spin_unlock(&priv->lock); 833 if_maddr_runlock(dev); 834 835 /* We have to cancel outside of the spinlock */ 836 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 837 ipoib_mcast_leave(mcast->priv, mcast); 838 ipoib_mcast_free(mcast); 839 } 840 841 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 842 ipoib_mcast_start_thread(priv); 843 } 844 845 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 846 847 struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct ipoib_dev_priv *priv) 848 { 849 struct ipoib_mcast_iter *iter; 850 851 iter = kmalloc(sizeof *iter, GFP_KERNEL); 852 if (!iter) 853 return NULL; 854 855 iter->priv = priv; 856 memset(iter->mgid.raw, 0, 16); 857 858 if (ipoib_mcast_iter_next(iter)) { 859 kfree(iter); 860 return NULL; 861 } 862 863 return iter; 864 } 865 866 int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter) 867 { 868 struct ipoib_dev_priv *priv = iter->priv; 869 struct rb_node *n; 870 struct ipoib_mcast *mcast; 871 int ret = 1; 872 873 spin_lock_irq(&priv->lock); 874 875 n = rb_first(&priv->multicast_tree); 876 877 while (n) { 878 mcast = rb_entry(n, struct ipoib_mcast, rb_node); 879 880 if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw, 881 sizeof (union ib_gid)) < 0) { 882 iter->mgid = mcast->mcmember.mgid; 883 iter->created = mcast->created; 884 iter->queuelen = mcast->pkt_queue.ifq_len; 885 iter->complete = !!mcast->ah; 886 iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY)); 887 888 ret = 0; 889 890 break; 891 } 892 893 n = rb_next(n); 894 } 895 896 spin_unlock_irq(&priv->lock); 897 898 return ret; 899 } 900 901 void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, 902 union ib_gid *mgid, 903 unsigned long *created, 904 unsigned int *queuelen, 905 unsigned int *complete, 906 unsigned int *send_only) 907 { 908 *mgid = iter->mgid; 909 *created = iter->created; 910 *queuelen = iter->queuelen; 911 *complete = iter->complete; 912 *send_only = iter->send_only; 913 } 914 915 #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ 916