1 /*- 2 * Copyright (c) 2020 Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 #include "opt_inet.h" 27 #include "opt_inet6.h" 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/devctl.h> 35 #include <sys/eventhandler.h> 36 #include <sys/kernel.h> 37 #include <sys/mbuf.h> 38 #include <sys/module.h> 39 #include <sys/socket.h> 40 #include <sys/sysctl.h> 41 42 #include <net/bpf.h> 43 #include <net/ethernet.h> 44 #include <net/infiniband.h> 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_dl.h> 48 #include <net/if_media.h> 49 #include <net/if_lagg.h> 50 #include <net/if_llatbl.h> 51 #include <net/if_types.h> 52 #include <net/netisr.h> 53 #include <net/route.h> 54 #include <netinet/if_ether.h> 55 #include <netinet/in.h> 56 #include <netinet/ip6.h> 57 #include <netinet6/in6_var.h> 58 #include <netinet6/nd6.h> 59 60 #include <security/mac/mac_framework.h> 61 62 /* if_lagg(4) support */ 63 struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *); 64 65 #ifdef INET 66 static inline void 67 infiniband_ipv4_multicast_map(uint32_t addr, 68 const uint8_t *broadcast, uint8_t *buf) 69 { 70 uint8_t scope; 71 72 addr = ntohl(addr); 73 scope = broadcast[5] & 0xF; 74 75 buf[0] = 0; 76 buf[1] = 0xff; 77 buf[2] = 0xff; 78 buf[3] = 0xff; 79 buf[4] = 0xff; 80 buf[5] = 0x10 | scope; 81 buf[6] = 0x40; 82 buf[7] = 0x1b; 83 buf[8] = broadcast[8]; 84 buf[9] = broadcast[9]; 85 buf[10] = 0; 86 buf[11] = 0; 87 buf[12] = 0; 88 buf[13] = 0; 89 buf[14] = 0; 90 buf[15] = 0; 91 buf[16] = (addr >> 24) & 0xff; 92 buf[17] = (addr >> 16) & 0xff; 93 buf[18] = (addr >> 8) & 0xff; 94 buf[19] = addr & 0xff; 95 } 96 #endif 97 98 #ifdef INET6 99 static inline void 100 infiniband_ipv6_multicast_map(const struct in6_addr *addr, 101 const uint8_t *broadcast, uint8_t *buf) 102 { 103 uint8_t scope; 104 105 scope = broadcast[5] & 0xF; 106 107 buf[0] = 0; 108 buf[1] = 0xff; 109 buf[2] = 0xff; 110 buf[3] = 0xff; 111 buf[4] = 0xff; 112 buf[5] = 0x10 | scope; 113 buf[6] = 0x60; 114 buf[7] = 0x1b; 115 buf[8] = broadcast[8]; 116 buf[9] = broadcast[9]; 117 memcpy(&buf[10], &addr->s6_addr[6], 10); 118 } 119 #endif 120 121 /* 122 * This is for clients that have an infiniband_header in the mbuf. 123 */ 124 void 125 infiniband_bpf_mtap(struct ifnet *ifp, struct mbuf *mb) 126 { 127 struct infiniband_header *ibh; 128 struct ether_header eh; 129 130 if (mb->m_len < sizeof(*ibh)) 131 return; 132 133 ibh = mtod(mb, struct infiniband_header *); 134 eh.ether_type = ibh->ib_protocol; 135 memset(eh.ether_shost, 0, ETHER_ADDR_LEN); 136 memcpy(eh.ether_dhost, ibh->ib_hwaddr + 4, ETHER_ADDR_LEN); 137 mb->m_data += sizeof(*ibh); 138 mb->m_len -= sizeof(*ibh); 139 mb->m_pkthdr.len -= sizeof(*ibh); 140 bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb); 141 mb->m_data -= sizeof(*ibh); 142 mb->m_len += sizeof(*ibh); 143 mb->m_pkthdr.len += sizeof(*ibh); 144 } 145 146 /* 147 * Infiniband output routine. 148 */ 149 static int 150 infiniband_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 151 struct route *ro) 152 { 153 uint8_t edst[INFINIBAND_ADDR_LEN]; 154 #if defined(INET) || defined(INET6) 155 struct llentry *lle = NULL; 156 #endif 157 struct infiniband_header *ibh; 158 int error = 0; 159 uint16_t type; 160 bool is_gw; 161 162 NET_EPOCH_ASSERT(); 163 164 is_gw = ((ro != NULL) && (ro->ro_flags & RT_HAS_GW) != 0); 165 166 #ifdef MAC 167 error = mac_ifnet_check_transmit(ifp, m); 168 if (error) 169 goto bad; 170 #endif 171 172 M_PROFILE(m); 173 if (ifp->if_flags & IFF_MONITOR) { 174 error = ENETDOWN; 175 goto bad; 176 } 177 if (!((ifp->if_flags & IFF_UP) && 178 (ifp->if_drv_flags & IFF_DRV_RUNNING))) { 179 error = ENETDOWN; 180 goto bad; 181 } 182 183 switch (dst->sa_family) { 184 case AF_LINK: 185 goto output; 186 #ifdef INET 187 case AF_INET: 188 if (lle != NULL && (lle->la_flags & LLE_VALID)) { 189 memcpy(edst, lle->ll_addr, sizeof(edst)); 190 } else if (m->m_flags & M_MCAST) { 191 infiniband_ipv4_multicast_map( 192 ((const struct sockaddr_in *)dst)->sin_addr.s_addr, 193 ifp->if_broadcastaddr, edst); 194 } else { 195 error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL); 196 if (error) { 197 if (error == EWOULDBLOCK) 198 error = 0; 199 m = NULL; /* mbuf is consumed by resolver */ 200 goto bad; 201 } 202 } 203 type = htons(ETHERTYPE_IP); 204 break; 205 case AF_ARP: { 206 struct arphdr *ah; 207 208 if (m->m_len < sizeof(*ah)) { 209 error = EINVAL; 210 goto bad; 211 } 212 213 ah = mtod(m, struct arphdr *); 214 215 if (m->m_len < arphdr_len(ah)) { 216 error = EINVAL; 217 goto bad; 218 } 219 ah->ar_hrd = htons(ARPHRD_INFINIBAND); 220 221 switch (ntohs(ah->ar_op)) { 222 case ARPOP_REVREQUEST: 223 case ARPOP_REVREPLY: 224 type = htons(ETHERTYPE_REVARP); 225 break; 226 case ARPOP_REQUEST: 227 case ARPOP_REPLY: 228 default: 229 type = htons(ETHERTYPE_ARP); 230 break; 231 } 232 233 if (m->m_flags & M_BCAST) { 234 memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN); 235 } else { 236 if (ah->ar_hln != INFINIBAND_ADDR_LEN) { 237 error = EINVAL; 238 goto bad; 239 } 240 memcpy(edst, ar_tha(ah), INFINIBAND_ADDR_LEN); 241 } 242 break; 243 } 244 #endif 245 #ifdef INET6 246 case AF_INET6: { 247 const struct ip6_hdr *ip6; 248 249 ip6 = mtod(m, const struct ip6_hdr *); 250 if (m->m_len < sizeof(*ip6)) { 251 error = EINVAL; 252 goto bad; 253 } else if (lle != NULL && (lle->la_flags & LLE_VALID)) { 254 memcpy(edst, lle->ll_addr, sizeof(edst)); 255 } else if (m->m_flags & M_MCAST) { 256 infiniband_ipv6_multicast_map( 257 &((const struct sockaddr_in6 *)dst)->sin6_addr, 258 ifp->if_broadcastaddr, edst); 259 } else if (ip6->ip6_nxt == IPPROTO_ICMPV6) { 260 memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN); 261 } else { 262 error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL); 263 if (error) { 264 if (error == EWOULDBLOCK) 265 error = 0; 266 m = NULL; /* mbuf is consumed by resolver */ 267 goto bad; 268 } 269 } 270 type = htons(ETHERTYPE_IPV6); 271 break; 272 } 273 #endif 274 default: 275 error = EAFNOSUPPORT; 276 goto bad; 277 } 278 279 /* 280 * Add local net header. If no space in first mbuf, 281 * allocate another. 282 */ 283 M_PREPEND(m, INFINIBAND_HDR_LEN, M_NOWAIT); 284 if (m == NULL) { 285 error = ENOBUFS; 286 goto bad; 287 } 288 ibh = mtod(m, struct infiniband_header *); 289 290 ibh->ib_protocol = type; 291 memcpy(ibh->ib_hwaddr, edst, sizeof(edst)); 292 293 /* 294 * Queue message on interface, update output statistics if 295 * successful, and start output if interface not yet active. 296 */ 297 output: 298 return (ifp->if_transmit(ifp, m)); 299 bad: 300 if (m != NULL) 301 m_freem(m); 302 return (error); 303 } 304 305 /* 306 * Process a received Infiniband packet. 307 */ 308 static void 309 infiniband_input(struct ifnet *ifp, struct mbuf *m) 310 { 311 struct infiniband_header *ibh; 312 struct epoch_tracker et; 313 int isr; 314 315 CURVNET_SET_QUIET(ifp->if_vnet); 316 317 if ((ifp->if_flags & IFF_UP) == 0) { 318 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 319 m_freem(m); 320 goto done; 321 } 322 323 ibh = mtod(m, struct infiniband_header *); 324 325 /* 326 * Reset layer specific mbuf flags to avoid confusing upper 327 * layers: 328 */ 329 m->m_flags &= ~M_VLANTAG; 330 m_clrprotoflags(m); 331 332 if (INFINIBAND_IS_MULTICAST(ibh->ib_hwaddr)) { 333 if (memcmp(ibh->ib_hwaddr, ifp->if_broadcastaddr, 334 ifp->if_addrlen) == 0) 335 m->m_flags |= M_BCAST; 336 else 337 m->m_flags |= M_MCAST; 338 if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); 339 } 340 341 /* Let BPF have it before we strip the header. */ 342 INFINIBAND_BPF_MTAP(ifp, m); 343 344 /* Allow monitor mode to claim this frame, after stats are updated. */ 345 if (ifp->if_flags & IFF_MONITOR) { 346 m_freem(m); 347 goto done; 348 } 349 350 /* Direct packet to correct FIB based on interface config. */ 351 M_SETFIB(m, ifp->if_fib); 352 353 /* Handle input from a lagg<N> port */ 354 if (ifp->if_type == IFT_INFINIBANDLAG) { 355 KASSERT(lagg_input_infiniband_p != NULL, 356 ("%s: if_lagg not loaded!", __func__)); 357 m = (*lagg_input_infiniband_p)(ifp, m); 358 if (__predict_false(m == NULL)) 359 goto done; 360 ifp = m->m_pkthdr.rcvif; 361 } 362 363 /* 364 * Dispatch frame to upper layer. 365 */ 366 switch (ibh->ib_protocol) { 367 #ifdef INET 368 case htons(ETHERTYPE_IP): 369 isr = NETISR_IP; 370 break; 371 372 case htons(ETHERTYPE_ARP): 373 if (ifp->if_flags & IFF_NOARP) { 374 /* Discard packet if ARP is disabled on interface */ 375 m_freem(m); 376 goto done; 377 } 378 isr = NETISR_ARP; 379 break; 380 #endif 381 #ifdef INET6 382 case htons(ETHERTYPE_IPV6): 383 isr = NETISR_IPV6; 384 break; 385 #endif 386 default: 387 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 388 m_freem(m); 389 goto done; 390 } 391 392 /* Strip off the Infiniband header. */ 393 m_adj(m, INFINIBAND_HDR_LEN); 394 395 #ifdef MAC 396 /* 397 * Tag the mbuf with an appropriate MAC label before any other 398 * consumers can get to it. 399 */ 400 mac_ifnet_create_mbuf(ifp, m); 401 #endif 402 /* Allow monitor mode to claim this frame, after stats are updated. */ 403 NET_EPOCH_ENTER(et); 404 netisr_dispatch(isr, m); 405 NET_EPOCH_EXIT(et); 406 done: 407 CURVNET_RESTORE(); 408 } 409 410 static int 411 infiniband_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, 412 struct sockaddr *sa) 413 { 414 struct sockaddr_dl *sdl; 415 #ifdef INET 416 struct sockaddr_in *sin; 417 #endif 418 #ifdef INET6 419 struct sockaddr_in6 *sin6; 420 #endif 421 uint8_t *e_addr; 422 423 switch (sa->sa_family) { 424 case AF_LINK: 425 /* 426 * No mapping needed. Just check that it's a valid MC address. 427 */ 428 sdl = (struct sockaddr_dl *)sa; 429 e_addr = LLADDR(sdl); 430 if (!INFINIBAND_IS_MULTICAST(e_addr)) 431 return (EADDRNOTAVAIL); 432 *llsa = NULL; 433 return 0; 434 435 #ifdef INET 436 case AF_INET: 437 sin = (struct sockaddr_in *)sa; 438 if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) 439 return (EADDRNOTAVAIL); 440 sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND); 441 sdl->sdl_alen = INFINIBAND_ADDR_LEN; 442 e_addr = LLADDR(sdl); 443 infiniband_ipv4_multicast_map( 444 sin->sin_addr.s_addr, ifp->if_broadcastaddr, e_addr); 445 *llsa = (struct sockaddr *)sdl; 446 return (0); 447 #endif 448 #ifdef INET6 449 case AF_INET6: 450 sin6 = (struct sockaddr_in6 *)sa; 451 /* 452 * An IP6 address of 0 means listen to all of the 453 * multicast address used for IP6. This has no meaning 454 * in infiniband. 455 */ 456 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 457 return (EADDRNOTAVAIL); 458 if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) 459 return (EADDRNOTAVAIL); 460 sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND); 461 sdl->sdl_alen = INFINIBAND_ADDR_LEN; 462 e_addr = LLADDR(sdl); 463 infiniband_ipv6_multicast_map( 464 &sin6->sin6_addr, ifp->if_broadcastaddr, e_addr); 465 *llsa = (struct sockaddr *)sdl; 466 return (0); 467 #endif 468 default: 469 return (EAFNOSUPPORT); 470 } 471 } 472 473 void 474 infiniband_ifattach(struct ifnet *ifp, const uint8_t *lla, const uint8_t *llb) 475 { 476 struct sockaddr_dl *sdl; 477 struct ifaddr *ifa; 478 int i; 479 480 ifp->if_addrlen = INFINIBAND_ADDR_LEN; 481 ifp->if_hdrlen = INFINIBAND_HDR_LEN; 482 ifp->if_mtu = INFINIBAND_MTU; 483 if_attach(ifp); 484 ifp->if_output = infiniband_output; 485 ifp->if_input = infiniband_input; 486 ifp->if_resolvemulti = infiniband_resolvemulti; 487 488 if (ifp->if_baudrate == 0) 489 ifp->if_baudrate = IF_Gbps(10); /* default value */ 490 if (llb != NULL) 491 ifp->if_broadcastaddr = llb; 492 493 ifa = ifp->if_addr; 494 KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__)); 495 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 496 sdl->sdl_type = IFT_INFINIBAND; 497 sdl->sdl_alen = ifp->if_addrlen; 498 499 if (lla != NULL) { 500 memcpy(LLADDR(sdl), lla, ifp->if_addrlen); 501 502 if (ifp->if_hw_addr != NULL) 503 memcpy(ifp->if_hw_addr, lla, ifp->if_addrlen); 504 } else { 505 lla = LLADDR(sdl); 506 } 507 508 /* Attach ethernet compatible network device */ 509 bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN); 510 511 /* Announce Infiniband MAC address if non-zero. */ 512 for (i = 0; i < ifp->if_addrlen; i++) 513 if (lla[i] != 0) 514 break; 515 if (i != ifp->if_addrlen) 516 if_printf(ifp, "Infiniband address: %20D\n", lla, ":"); 517 518 /* Add necessary bits are setup; announce it now. */ 519 EVENTHANDLER_INVOKE(infiniband_ifattach_event, ifp); 520 521 if (IS_DEFAULT_VNET(curvnet)) 522 devctl_notify("INFINIBAND", ifp->if_xname, "IFATTACH", NULL); 523 } 524 525 /* 526 * Perform common duties while detaching an Infiniband interface 527 */ 528 void 529 infiniband_ifdetach(struct ifnet *ifp) 530 { 531 bpfdetach(ifp); 532 if_detach(ifp); 533 } 534 535 static int 536 infiniband_modevent(module_t mod, int type, void *data) 537 { 538 switch (type) { 539 case MOD_LOAD: 540 case MOD_UNLOAD: 541 return (0); 542 default: 543 return (EOPNOTSUPP); 544 } 545 } 546 547 static moduledata_t infiniband_mod = { 548 .name = "if_infiniband", 549 .evhand = &infiniband_modevent, 550 }; 551 552 DECLARE_MODULE(if_infiniband, infiniband_mod, SI_SUB_INIT_IF, SI_ORDER_ANY); 553 MODULE_VERSION(if_infiniband, 1); 554