1 /*- 2 * Copyright (c) 2020 Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 #include "opt_inet.h" 27 #include "opt_inet6.h" 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/types.h> 35 #include <sys/systm.h> 36 #include <sys/eventhandler.h> 37 #include <sys/socket.h> 38 #include <sys/sysctl.h> 39 #include <sys/devctl.h> 40 #include <sys/module.h> 41 42 #include <net/if.h> 43 #include <net/if_var.h> 44 #include <net/route.h> 45 #include <net/ethernet.h> 46 #include <net/infiniband.h> 47 #include <net/bpf.h> 48 #include <net/if_llatbl.h> 49 #include <net/netisr.h> 50 #include <net/if_dl.h> 51 #include <net/if_types.h> 52 #include <net/if_media.h> 53 #include <net/if_lagg.h> 54 55 #include <netinet/in.h> 56 #include <netinet/if_ether.h> 57 #include <netinet/ip6.h> 58 59 #include <netinet6/in6_var.h> 60 #include <netinet6/nd6.h> 61 62 #include <security/mac/mac_framework.h> 63 64 /* if_lagg(4) support */ 65 struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *); 66 67 #ifdef INET 68 static inline void 69 infiniband_ipv4_multicast_map(uint32_t addr, 70 const uint8_t *broadcast, uint8_t *buf) 71 { 72 uint8_t scope; 73 74 addr = ntohl(addr); 75 scope = broadcast[5] & 0xF; 76 77 buf[0] = 0; 78 buf[1] = 0xff; 79 buf[2] = 0xff; 80 buf[3] = 0xff; 81 buf[4] = 0xff; 82 buf[5] = 0x10 | scope; 83 buf[6] = 0x40; 84 buf[7] = 0x1b; 85 buf[8] = broadcast[8]; 86 buf[9] = broadcast[9]; 87 buf[10] = 0; 88 buf[11] = 0; 89 buf[12] = 0; 90 buf[13] = 0; 91 buf[14] = 0; 92 buf[15] = 0; 93 buf[16] = (addr >> 24) & 0xff; 94 buf[17] = (addr >> 16) & 0xff; 95 buf[18] = (addr >> 8) & 0xff; 96 buf[19] = addr & 0xff; 97 } 98 #endif 99 100 #ifdef INET6 101 static inline void 102 infiniband_ipv6_multicast_map(const struct in6_addr *addr, 103 const uint8_t *broadcast, uint8_t *buf) 104 { 105 uint8_t scope; 106 107 scope = broadcast[5] & 0xF; 108 109 buf[0] = 0; 110 buf[1] = 0xff; 111 buf[2] = 0xff; 112 buf[3] = 0xff; 113 buf[4] = 0xff; 114 buf[5] = 0x10 | scope; 115 buf[6] = 0x60; 116 buf[7] = 0x1b; 117 buf[8] = broadcast[8]; 118 buf[9] = broadcast[9]; 119 memcpy(&buf[10], &addr->s6_addr[6], 10); 120 } 121 #endif 122 123 /* 124 * This is for clients that have an infiniband_header in the mbuf. 125 */ 126 void 127 infiniband_bpf_mtap(struct ifnet *ifp, struct mbuf *mb) 128 { 129 struct infiniband_header *ibh; 130 struct ether_header eh; 131 132 if (mb->m_len < sizeof(*ibh)) 133 return; 134 135 ibh = mtod(mb, struct infiniband_header *); 136 eh.ether_type = ibh->ib_protocol; 137 memset(eh.ether_shost, 0, ETHER_ADDR_LEN); 138 memcpy(eh.ether_dhost, ibh->ib_hwaddr + 4, ETHER_ADDR_LEN); 139 mb->m_data += sizeof(*ibh); 140 mb->m_len -= sizeof(*ibh); 141 mb->m_pkthdr.len -= sizeof(*ibh); 142 bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb); 143 mb->m_data -= sizeof(*ibh); 144 mb->m_len += sizeof(*ibh); 145 mb->m_pkthdr.len += sizeof(*ibh); 146 } 147 148 /* 149 * Infiniband output routine. 150 */ 151 static int 152 infiniband_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 153 struct route *ro) 154 { 155 uint8_t edst[INFINIBAND_ADDR_LEN]; 156 #if defined(INET) || defined(INET6) 157 struct llentry *lle = NULL; 158 #endif 159 struct infiniband_header *ibh; 160 int error = 0; 161 uint16_t type; 162 bool is_gw; 163 164 NET_EPOCH_ASSERT(); 165 166 is_gw = ((ro != NULL) && (ro->ro_flags & RT_HAS_GW) != 0); 167 168 #ifdef MAC 169 error = mac_ifnet_check_transmit(ifp, m); 170 if (error) 171 goto bad; 172 #endif 173 174 M_PROFILE(m); 175 if (ifp->if_flags & IFF_MONITOR) { 176 error = ENETDOWN; 177 goto bad; 178 } 179 if (!((ifp->if_flags & IFF_UP) && 180 (ifp->if_drv_flags & IFF_DRV_RUNNING))) { 181 error = ENETDOWN; 182 goto bad; 183 } 184 185 switch (dst->sa_family) { 186 case AF_LINK: 187 goto output; 188 #ifdef INET 189 case AF_INET: 190 if (lle != NULL && (lle->la_flags & LLE_VALID)) { 191 memcpy(edst, lle->ll_addr, sizeof(edst)); 192 } else if (m->m_flags & M_MCAST) { 193 infiniband_ipv4_multicast_map( 194 ((const struct sockaddr_in *)dst)->sin_addr.s_addr, 195 ifp->if_broadcastaddr, edst); 196 } else { 197 error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL); 198 if (error) { 199 if (error == EWOULDBLOCK) 200 error = 0; 201 m = NULL; /* mbuf is consumed by resolver */ 202 goto bad; 203 } 204 } 205 type = htons(ETHERTYPE_IP); 206 break; 207 case AF_ARP: { 208 struct arphdr *ah; 209 210 if (m->m_len < sizeof(*ah)) { 211 error = EINVAL; 212 goto bad; 213 } 214 215 ah = mtod(m, struct arphdr *); 216 217 if (m->m_len < arphdr_len(ah)) { 218 error = EINVAL; 219 goto bad; 220 } 221 ah->ar_hrd = htons(ARPHRD_INFINIBAND); 222 223 switch (ntohs(ah->ar_op)) { 224 case ARPOP_REVREQUEST: 225 case ARPOP_REVREPLY: 226 type = htons(ETHERTYPE_REVARP); 227 break; 228 case ARPOP_REQUEST: 229 case ARPOP_REPLY: 230 default: 231 type = htons(ETHERTYPE_ARP); 232 break; 233 } 234 235 if (m->m_flags & M_BCAST) { 236 memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN); 237 } else { 238 if (ah->ar_hln != INFINIBAND_ADDR_LEN) { 239 error = EINVAL; 240 goto bad; 241 } 242 memcpy(edst, ar_tha(ah), INFINIBAND_ADDR_LEN); 243 } 244 break; 245 } 246 #endif 247 #ifdef INET6 248 case AF_INET6: { 249 const struct ip6_hdr *ip6; 250 251 ip6 = mtod(m, const struct ip6_hdr *); 252 if (m->m_len < sizeof(*ip6)) { 253 error = EINVAL; 254 goto bad; 255 } else if (lle != NULL && (lle->la_flags & LLE_VALID)) { 256 memcpy(edst, lle->ll_addr, sizeof(edst)); 257 } else if (m->m_flags & M_MCAST) { 258 infiniband_ipv6_multicast_map( 259 &((const struct sockaddr_in6 *)dst)->sin6_addr, 260 ifp->if_broadcastaddr, edst); 261 } else if (ip6->ip6_nxt == IPPROTO_ICMPV6) { 262 memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN); 263 } else { 264 error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL); 265 if (error) { 266 if (error == EWOULDBLOCK) 267 error = 0; 268 m = NULL; /* mbuf is consumed by resolver */ 269 goto bad; 270 } 271 } 272 type = htons(ETHERTYPE_IPV6); 273 break; 274 } 275 #endif 276 default: 277 error = EAFNOSUPPORT; 278 goto bad; 279 } 280 281 /* 282 * Add local net header. If no space in first mbuf, 283 * allocate another. 284 */ 285 M_PREPEND(m, INFINIBAND_HDR_LEN, M_NOWAIT); 286 if (m == NULL) { 287 error = ENOBUFS; 288 goto bad; 289 } 290 ibh = mtod(m, struct infiniband_header *); 291 292 ibh->ib_protocol = type; 293 memcpy(ibh->ib_hwaddr, edst, sizeof(edst)); 294 295 /* 296 * Queue message on interface, update output statistics if 297 * successful, and start output if interface not yet active. 298 */ 299 output: 300 return (ifp->if_transmit(ifp, m)); 301 bad: 302 if (m != NULL) 303 m_freem(m); 304 return (error); 305 } 306 307 /* 308 * Process a received Infiniband packet. 309 */ 310 static void 311 infiniband_input(struct ifnet *ifp, struct mbuf *m) 312 { 313 struct infiniband_header *ibh; 314 struct epoch_tracker et; 315 int isr; 316 317 CURVNET_SET_QUIET(ifp->if_vnet); 318 319 if ((ifp->if_flags & IFF_UP) == 0) { 320 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 321 m_freem(m); 322 goto done; 323 } 324 325 ibh = mtod(m, struct infiniband_header *); 326 327 /* 328 * Reset layer specific mbuf flags to avoid confusing upper 329 * layers: 330 */ 331 m->m_flags &= ~M_VLANTAG; 332 m_clrprotoflags(m); 333 334 if (INFINIBAND_IS_MULTICAST(ibh->ib_hwaddr)) { 335 if (memcmp(ibh->ib_hwaddr, ifp->if_broadcastaddr, 336 ifp->if_addrlen) == 0) 337 m->m_flags |= M_BCAST; 338 else 339 m->m_flags |= M_MCAST; 340 if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); 341 } 342 343 /* Let BPF have it before we strip the header. */ 344 INFINIBAND_BPF_MTAP(ifp, m); 345 346 /* Allow monitor mode to claim this frame, after stats are updated. */ 347 if (ifp->if_flags & IFF_MONITOR) { 348 m_freem(m); 349 goto done; 350 } 351 352 /* Direct packet to correct FIB based on interface config. */ 353 M_SETFIB(m, ifp->if_fib); 354 355 /* Handle input from a lagg<N> port */ 356 if (ifp->if_type == IFT_INFINIBANDLAG) { 357 KASSERT(lagg_input_infiniband_p != NULL, 358 ("%s: if_lagg not loaded!", __func__)); 359 m = (*lagg_input_infiniband_p)(ifp, m); 360 if (__predict_false(m == NULL)) 361 goto done; 362 ifp = m->m_pkthdr.rcvif; 363 } 364 365 /* 366 * Dispatch frame to upper layer. 367 */ 368 switch (ibh->ib_protocol) { 369 #ifdef INET 370 case htons(ETHERTYPE_IP): 371 isr = NETISR_IP; 372 break; 373 374 case htons(ETHERTYPE_ARP): 375 if (ifp->if_flags & IFF_NOARP) { 376 /* Discard packet if ARP is disabled on interface */ 377 m_freem(m); 378 goto done; 379 } 380 isr = NETISR_ARP; 381 break; 382 #endif 383 #ifdef INET6 384 case htons(ETHERTYPE_IPV6): 385 isr = NETISR_IPV6; 386 break; 387 #endif 388 default: 389 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 390 m_freem(m); 391 goto done; 392 } 393 394 /* Strip off the Infiniband header. */ 395 m_adj(m, INFINIBAND_HDR_LEN); 396 397 #ifdef MAC 398 /* 399 * Tag the mbuf with an appropriate MAC label before any other 400 * consumers can get to it. 401 */ 402 mac_ifnet_create_mbuf(ifp, m); 403 #endif 404 /* Allow monitor mode to claim this frame, after stats are updated. */ 405 NET_EPOCH_ENTER(et); 406 netisr_dispatch(isr, m); 407 NET_EPOCH_EXIT(et); 408 done: 409 CURVNET_RESTORE(); 410 } 411 412 static int 413 infiniband_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, 414 struct sockaddr *sa) 415 { 416 struct sockaddr_dl *sdl; 417 #ifdef INET 418 struct sockaddr_in *sin; 419 #endif 420 #ifdef INET6 421 struct sockaddr_in6 *sin6; 422 #endif 423 uint8_t *e_addr; 424 425 switch (sa->sa_family) { 426 case AF_LINK: 427 /* 428 * No mapping needed. Just check that it's a valid MC address. 429 */ 430 sdl = (struct sockaddr_dl *)sa; 431 e_addr = LLADDR(sdl); 432 if (!INFINIBAND_IS_MULTICAST(e_addr)) 433 return (EADDRNOTAVAIL); 434 *llsa = NULL; 435 return 0; 436 437 #ifdef INET 438 case AF_INET: 439 sin = (struct sockaddr_in *)sa; 440 if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) 441 return (EADDRNOTAVAIL); 442 sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND); 443 sdl->sdl_alen = INFINIBAND_ADDR_LEN; 444 e_addr = LLADDR(sdl); 445 infiniband_ipv4_multicast_map(sin->sin_addr.s_addr, ifp->if_broadcastaddr, 446 e_addr); 447 *llsa = (struct sockaddr *)sdl; 448 return (0); 449 #endif 450 #ifdef INET6 451 case AF_INET6: 452 sin6 = (struct sockaddr_in6 *)sa; 453 /* 454 * An IP6 address of 0 means listen to all of the 455 * multicast address used for IP6. This has no meaning 456 * in infiniband. 457 */ 458 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 459 return (EADDRNOTAVAIL); 460 if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) 461 return (EADDRNOTAVAIL); 462 sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND); 463 sdl->sdl_alen = INFINIBAND_ADDR_LEN; 464 e_addr = LLADDR(sdl); 465 infiniband_ipv6_multicast_map(&sin6->sin6_addr, ifp->if_broadcastaddr, e_addr); 466 *llsa = (struct sockaddr *)sdl; 467 return (0); 468 #endif 469 default: 470 return (EAFNOSUPPORT); 471 } 472 } 473 474 void 475 infiniband_ifattach(struct ifnet *ifp, const uint8_t *lla, const uint8_t *llb) 476 { 477 struct sockaddr_dl *sdl; 478 struct ifaddr *ifa; 479 int i; 480 481 ifp->if_addrlen = INFINIBAND_ADDR_LEN; 482 ifp->if_hdrlen = INFINIBAND_HDR_LEN; 483 ifp->if_mtu = INFINIBAND_MTU; 484 if_attach(ifp); 485 ifp->if_output = infiniband_output; 486 ifp->if_input = infiniband_input; 487 ifp->if_resolvemulti = infiniband_resolvemulti; 488 489 if (ifp->if_baudrate == 0) 490 ifp->if_baudrate = IF_Gbps(10); /* default value */ 491 if (llb != NULL) 492 ifp->if_broadcastaddr = llb; 493 494 ifa = ifp->if_addr; 495 KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__)); 496 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 497 sdl->sdl_type = IFT_INFINIBAND; 498 sdl->sdl_alen = ifp->if_addrlen; 499 500 if (lla != NULL) { 501 memcpy(LLADDR(sdl), lla, ifp->if_addrlen); 502 503 if (ifp->if_hw_addr != NULL) 504 memcpy(ifp->if_hw_addr, lla, ifp->if_addrlen); 505 } else { 506 lla = LLADDR(sdl); 507 } 508 509 /* Attach ethernet compatible network device */ 510 bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN); 511 512 /* Announce Infiniband MAC address if non-zero. */ 513 for (i = 0; i < ifp->if_addrlen; i++) 514 if (lla[i] != 0) 515 break; 516 if (i != ifp->if_addrlen) 517 if_printf(ifp, "Infiniband address: %20D\n", lla, ":"); 518 519 /* Add necessary bits are setup; announce it now. */ 520 EVENTHANDLER_INVOKE(infiniband_ifattach_event, ifp); 521 522 if (IS_DEFAULT_VNET(curvnet)) 523 devctl_notify("INFINIBAND", ifp->if_xname, "IFATTACH", NULL); 524 } 525 526 /* 527 * Perform common duties while detaching an Infiniband interface 528 */ 529 void 530 infiniband_ifdetach(struct ifnet *ifp) 531 { 532 bpfdetach(ifp); 533 if_detach(ifp); 534 } 535 536 static int 537 infiniband_modevent(module_t mod, int type, void *data) 538 { 539 switch (type) { 540 case MOD_LOAD: 541 case MOD_UNLOAD: 542 return (0); 543 default: 544 return (EOPNOTSUPP); 545 } 546 } 547 548 static moduledata_t infiniband_mod = { 549 .name = "if_infiniband", 550 .evhand = &infiniband_modevent, 551 }; 552 553 DECLARE_MODULE(if_infiniband, infiniband_mod, SI_SUB_INIT_IF, SI_ORDER_ANY); 554 MODULE_VERSION(if_infiniband, 1); 555