xref: /freebsd/sys/net/if_infiniband.c (revision 25fb30bd9abc492359ad1f66901a06cb8cd08370)
1 /*-
2  * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 #include "opt_inet.h"
27 #include "opt_inet6.h"
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/types.h>
35 #include <sys/systm.h>
36 #include <sys/eventhandler.h>
37 #include <sys/socket.h>
38 #include <sys/sysctl.h>
39 #include <sys/devctl.h>
40 #include <sys/module.h>
41 
42 #include <net/if.h>
43 #include <net/if_var.h>
44 #include <net/route.h>
45 #include <net/ethernet.h>
46 #include <net/infiniband.h>
47 #include <net/bpf.h>
48 #include <net/if_llatbl.h>
49 #include <net/netisr.h>
50 #include <net/if_dl.h>
51 #include <net/if_types.h>
52 #include <net/if_media.h>
53 #include <net/if_lagg.h>
54 
55 #include <netinet/in.h>
56 #include <netinet/if_ether.h>
57 #include <netinet/ip6.h>
58 
59 #include <netinet6/in6_var.h>
60 #include <netinet6/nd6.h>
61 
62 #include <security/mac/mac_framework.h>
63 
64 /* if_lagg(4) support */
65 struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *);
66 
67 #ifdef INET
68 static inline void
69 infiniband_ipv4_multicast_map(uint32_t addr,
70     const uint8_t *broadcast, uint8_t *buf)
71 {
72 	uint8_t scope;
73 
74 	addr = ntohl(addr);
75 	scope = broadcast[5] & 0xF;
76 
77 	buf[0] = 0;
78 	buf[1] = 0xff;
79 	buf[2] = 0xff;
80 	buf[3] = 0xff;
81 	buf[4] = 0xff;
82 	buf[5] = 0x10 | scope;
83 	buf[6] = 0x40;
84 	buf[7] = 0x1b;
85 	buf[8] = broadcast[8];
86 	buf[9] = broadcast[9];
87 	buf[10] = 0;
88 	buf[11] = 0;
89 	buf[12] = 0;
90 	buf[13] = 0;
91 	buf[14] = 0;
92 	buf[15] = 0;
93 	buf[16] = (addr >> 24) & 0xff;
94 	buf[17] = (addr >> 16) & 0xff;
95 	buf[18] = (addr >> 8) & 0xff;
96 	buf[19] = addr & 0xff;
97 }
98 #endif
99 
100 #ifdef INET6
101 static inline void
102 infiniband_ipv6_multicast_map(const struct in6_addr *addr,
103     const uint8_t *broadcast, uint8_t *buf)
104 {
105 	uint8_t scope;
106 
107 	scope = broadcast[5] & 0xF;
108 
109 	buf[0] = 0;
110 	buf[1] = 0xff;
111 	buf[2] = 0xff;
112 	buf[3] = 0xff;
113 	buf[4] = 0xff;
114 	buf[5] = 0x10 | scope;
115 	buf[6] = 0x60;
116 	buf[7] = 0x1b;
117 	buf[8] = broadcast[8];
118 	buf[9] = broadcast[9];
119 	memcpy(&buf[10], &addr->s6_addr[6], 10);
120 }
121 #endif
122 
123 /*
124  * This is for clients that have an infiniband_header in the mbuf.
125  */
126 void
127 infiniband_bpf_mtap(struct ifnet *ifp, struct mbuf *mb)
128 {
129 	struct infiniband_header *ibh;
130 	struct ether_header eh;
131 
132 	if (mb->m_len < sizeof(*ibh))
133 		return;
134 
135 	ibh = mtod(mb, struct infiniband_header *);
136 	eh.ether_type = ibh->ib_protocol;
137 	memset(eh.ether_shost, 0, ETHER_ADDR_LEN);
138 	memcpy(eh.ether_dhost, ibh->ib_hwaddr + 4, ETHER_ADDR_LEN);
139 	mb->m_data += sizeof(*ibh);
140 	mb->m_len -= sizeof(*ibh);
141 	mb->m_pkthdr.len -= sizeof(*ibh);
142 	bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
143 	mb->m_data -= sizeof(*ibh);
144 	mb->m_len += sizeof(*ibh);
145 	mb->m_pkthdr.len += sizeof(*ibh);
146 }
147 
148 /*
149  * Infiniband output routine.
150  */
151 static int
152 infiniband_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
153     struct route *ro)
154 {
155 	uint8_t edst[INFINIBAND_ADDR_LEN];
156 #if defined(INET) || defined(INET6)
157 	struct llentry *lle = NULL;
158 #endif
159 	struct infiniband_header *ibh;
160 	int error = 0;
161 	uint16_t type;
162 	bool is_gw;
163 
164 	NET_EPOCH_ASSERT();
165 
166 	is_gw = ((ro != NULL) && (ro->ro_flags & RT_HAS_GW) != 0);
167 
168 #ifdef MAC
169 	error = mac_ifnet_check_transmit(ifp, m);
170 	if (error)
171 		goto bad;
172 #endif
173 
174 	M_PROFILE(m);
175 	if (ifp->if_flags & IFF_MONITOR) {
176 		error = ENETDOWN;
177 		goto bad;
178 	}
179 	if (!((ifp->if_flags & IFF_UP) &&
180 	    (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
181 		error = ENETDOWN;
182 		goto bad;
183 	}
184 
185 	switch (dst->sa_family) {
186 	case AF_LINK:
187 		goto output;
188 #ifdef INET
189 	case AF_INET:
190 		if (lle != NULL && (lle->la_flags & LLE_VALID)) {
191 			memcpy(edst, lle->ll_addr, sizeof(edst));
192 		} else if (m->m_flags & M_MCAST) {
193 			infiniband_ipv4_multicast_map(
194 			    ((const struct sockaddr_in *)dst)->sin_addr.s_addr,
195 			    ifp->if_broadcastaddr, edst);
196 		} else {
197 			error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
198 			if (error) {
199 				if (error == EWOULDBLOCK)
200 					error = 0;
201 				m = NULL;	/* mbuf is consumed by resolver */
202 				goto bad;
203 			}
204 		}
205 		type = htons(ETHERTYPE_IP);
206 		break;
207 	case AF_ARP: {
208 		struct arphdr *ah;
209 
210 		if (m->m_len < sizeof(*ah)) {
211 			error = EINVAL;
212 			goto bad;
213 		}
214 
215 		ah = mtod(m, struct arphdr *);
216 
217 		if (m->m_len < arphdr_len(ah)) {
218 			error = EINVAL;
219 			goto bad;
220 		}
221 		ah->ar_hrd = htons(ARPHRD_INFINIBAND);
222 
223 		switch (ntohs(ah->ar_op)) {
224 		case ARPOP_REVREQUEST:
225 		case ARPOP_REVREPLY:
226 			type = htons(ETHERTYPE_REVARP);
227 			break;
228 		case ARPOP_REQUEST:
229 		case ARPOP_REPLY:
230 		default:
231 			type = htons(ETHERTYPE_ARP);
232 			break;
233 		}
234 
235 		if (m->m_flags & M_BCAST) {
236 			memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN);
237 		} else {
238 			if (ah->ar_hln != INFINIBAND_ADDR_LEN) {
239 				error = EINVAL;
240 				goto bad;
241 			}
242 			memcpy(edst, ar_tha(ah), INFINIBAND_ADDR_LEN);
243 		}
244 		break;
245 	}
246 #endif
247 #ifdef INET6
248 	case AF_INET6: {
249 		const struct ip6_hdr *ip6;
250 
251 		ip6 = mtod(m, const struct ip6_hdr *);
252 		if (m->m_len < sizeof(*ip6)) {
253 			error = EINVAL;
254 			goto bad;
255 		} else if (lle != NULL && (lle->la_flags & LLE_VALID)) {
256 			memcpy(edst, lle->ll_addr, sizeof(edst));
257 		} else if (m->m_flags & M_MCAST) {
258 			infiniband_ipv6_multicast_map(
259 			    &((const struct sockaddr_in6 *)dst)->sin6_addr,
260 			    ifp->if_broadcastaddr, edst);
261 		} else if (ip6->ip6_nxt == IPPROTO_ICMPV6) {
262 			memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN);
263 		} else {
264 			error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
265 			if (error) {
266 				if (error == EWOULDBLOCK)
267 					error = 0;
268 				m = NULL;	/* mbuf is consumed by resolver */
269 				goto bad;
270 			}
271 		}
272 		type = htons(ETHERTYPE_IPV6);
273 		break;
274 	}
275 #endif
276 	default:
277 		error = EAFNOSUPPORT;
278 		goto bad;
279 	}
280 
281 	/*
282 	 * Add local net header.  If no space in first mbuf,
283 	 * allocate another.
284 	 */
285 	M_PREPEND(m, INFINIBAND_HDR_LEN, M_NOWAIT);
286 	if (m == NULL) {
287 		error = ENOBUFS;
288 		goto bad;
289 	}
290 	ibh = mtod(m, struct infiniband_header *);
291 
292 	ibh->ib_protocol = type;
293 	memcpy(ibh->ib_hwaddr, edst, sizeof(edst));
294 
295 	/*
296 	 * Queue message on interface, update output statistics if
297 	 * successful, and start output if interface not yet active.
298 	 */
299 output:
300 	return (ifp->if_transmit(ifp, m));
301 bad:
302 	if (m != NULL)
303 		m_freem(m);
304 	return (error);
305 }
306 
307 /*
308  * Process a received Infiniband packet.
309  */
310 static void
311 infiniband_input(struct ifnet *ifp, struct mbuf *m)
312 {
313 	struct infiniband_header *ibh;
314 	struct epoch_tracker et;
315 	int isr;
316 
317 	CURVNET_SET_QUIET(ifp->if_vnet);
318 
319 	if ((ifp->if_flags & IFF_UP) == 0) {
320 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
321 		m_freem(m);
322 		goto done;
323 	}
324 
325 	ibh = mtod(m, struct infiniband_header *);
326 
327 	/*
328 	 * Reset layer specific mbuf flags to avoid confusing upper
329 	 * layers:
330 	 */
331 	m->m_flags &= ~M_VLANTAG;
332 	m_clrprotoflags(m);
333 
334 	if (INFINIBAND_IS_MULTICAST(ibh->ib_hwaddr)) {
335 		if (memcmp(ibh->ib_hwaddr, ifp->if_broadcastaddr,
336 		    ifp->if_addrlen) == 0)
337 			m->m_flags |= M_BCAST;
338 		else
339 			m->m_flags |= M_MCAST;
340 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
341 	}
342 
343 	/* Let BPF have it before we strip the header. */
344 	INFINIBAND_BPF_MTAP(ifp, m);
345 
346 	/* Allow monitor mode to claim this frame, after stats are updated. */
347 	if (ifp->if_flags & IFF_MONITOR) {
348 		m_freem(m);
349 		goto done;
350 	}
351 
352 	/* Direct packet to correct FIB based on interface config. */
353 	M_SETFIB(m, ifp->if_fib);
354 
355 	/* Handle input from a lagg<N> port */
356 	if (ifp->if_type == IFT_INFINIBANDLAG) {
357 		KASSERT(lagg_input_infiniband_p != NULL,
358 		    ("%s: if_lagg not loaded!", __func__));
359 		m = (*lagg_input_infiniband_p)(ifp, m);
360 		if (__predict_false(m == NULL))
361 			goto done;
362 		ifp = m->m_pkthdr.rcvif;
363 	}
364 
365 	/*
366 	 * Dispatch frame to upper layer.
367 	 */
368 	switch (ibh->ib_protocol) {
369 #ifdef INET
370 	case htons(ETHERTYPE_IP):
371 		isr = NETISR_IP;
372 		break;
373 
374 	case htons(ETHERTYPE_ARP):
375 		if (ifp->if_flags & IFF_NOARP) {
376 			/* Discard packet if ARP is disabled on interface */
377 			m_freem(m);
378 			goto done;
379 		}
380 		isr = NETISR_ARP;
381 		break;
382 #endif
383 #ifdef INET6
384 	case htons(ETHERTYPE_IPV6):
385 		isr = NETISR_IPV6;
386 		break;
387 #endif
388 	default:
389 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
390 		m_freem(m);
391 		goto done;
392 	}
393 
394 	/* Strip off the Infiniband header. */
395 	m_adj(m, INFINIBAND_HDR_LEN);
396 
397 #ifdef MAC
398 	/*
399 	 * Tag the mbuf with an appropriate MAC label before any other
400 	 * consumers can get to it.
401 	 */
402 	mac_ifnet_create_mbuf(ifp, m);
403 #endif
404 	/* Allow monitor mode to claim this frame, after stats are updated. */
405 	NET_EPOCH_ENTER(et);
406 	netisr_dispatch(isr, m);
407 	NET_EPOCH_EXIT(et);
408 done:
409 	CURVNET_RESTORE();
410 }
411 
412 static int
413 infiniband_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
414     struct sockaddr *sa)
415 {
416 	struct sockaddr_dl *sdl;
417 #ifdef INET
418 	struct sockaddr_in *sin;
419 #endif
420 #ifdef INET6
421 	struct sockaddr_in6 *sin6;
422 #endif
423 	uint8_t *e_addr;
424 
425 	switch (sa->sa_family) {
426 	case AF_LINK:
427 		/*
428 		 * No mapping needed. Just check that it's a valid MC address.
429 		 */
430 		sdl = (struct sockaddr_dl *)sa;
431 		e_addr = LLADDR(sdl);
432 		if (!INFINIBAND_IS_MULTICAST(e_addr))
433 			return (EADDRNOTAVAIL);
434 		*llsa = NULL;
435 		return 0;
436 
437 #ifdef INET
438 	case AF_INET:
439 		sin = (struct sockaddr_in *)sa;
440 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
441 			return (EADDRNOTAVAIL);
442 		sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
443 		sdl->sdl_alen = INFINIBAND_ADDR_LEN;
444 		e_addr = LLADDR(sdl);
445 		infiniband_ipv4_multicast_map(sin->sin_addr.s_addr, ifp->if_broadcastaddr,
446 		    e_addr);
447 		*llsa = (struct sockaddr *)sdl;
448 		return (0);
449 #endif
450 #ifdef INET6
451 	case AF_INET6:
452 		sin6 = (struct sockaddr_in6 *)sa;
453 		/*
454 		 * An IP6 address of 0 means listen to all of the
455 		 * multicast address used for IP6. This has no meaning
456 		 * in infiniband.
457 		 */
458 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
459 			return (EADDRNOTAVAIL);
460 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
461 			return (EADDRNOTAVAIL);
462 		sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
463 		sdl->sdl_alen = INFINIBAND_ADDR_LEN;
464 		e_addr = LLADDR(sdl);
465 		infiniband_ipv6_multicast_map(&sin6->sin6_addr, ifp->if_broadcastaddr, e_addr);
466 		*llsa = (struct sockaddr *)sdl;
467 		return (0);
468 #endif
469 	default:
470 		return (EAFNOSUPPORT);
471 	}
472 }
473 
474 void
475 infiniband_ifattach(struct ifnet *ifp, const uint8_t *lla, const uint8_t *llb)
476 {
477 	struct sockaddr_dl *sdl;
478 	struct ifaddr *ifa;
479 	int i;
480 
481 	ifp->if_addrlen = INFINIBAND_ADDR_LEN;
482 	ifp->if_hdrlen = INFINIBAND_HDR_LEN;
483 	ifp->if_mtu = INFINIBAND_MTU;
484 	if_attach(ifp);
485 	ifp->if_output = infiniband_output;
486 	ifp->if_input = infiniband_input;
487 	ifp->if_resolvemulti = infiniband_resolvemulti;
488 
489 	if (ifp->if_baudrate == 0)
490 		ifp->if_baudrate = IF_Gbps(10);	/* default value */
491 	if (llb != NULL)
492 		ifp->if_broadcastaddr = llb;
493 
494 	ifa = ifp->if_addr;
495 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
496 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
497 	sdl->sdl_type = IFT_INFINIBAND;
498 	sdl->sdl_alen = ifp->if_addrlen;
499 
500 	if (lla != NULL) {
501 		memcpy(LLADDR(sdl), lla, ifp->if_addrlen);
502 
503 		if (ifp->if_hw_addr != NULL)
504 			memcpy(ifp->if_hw_addr, lla, ifp->if_addrlen);
505 	} else {
506 		lla = LLADDR(sdl);
507 	}
508 
509 	/* Attach ethernet compatible network device */
510 	bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
511 
512 	/* Announce Infiniband MAC address if non-zero. */
513 	for (i = 0; i < ifp->if_addrlen; i++)
514 		if (lla[i] != 0)
515 			break;
516 	if (i != ifp->if_addrlen)
517 		if_printf(ifp, "Infiniband address: %20D\n", lla, ":");
518 
519 	/* Add necessary bits are setup; announce it now. */
520 	EVENTHANDLER_INVOKE(infiniband_ifattach_event, ifp);
521 
522 	if (IS_DEFAULT_VNET(curvnet))
523 		devctl_notify("INFINIBAND", ifp->if_xname, "IFATTACH", NULL);
524 }
525 
526 /*
527  * Perform common duties while detaching an Infiniband interface
528  */
529 void
530 infiniband_ifdetach(struct ifnet *ifp)
531 {
532 	bpfdetach(ifp);
533 	if_detach(ifp);
534 }
535 
536 static int
537 infiniband_modevent(module_t mod, int type, void *data)
538 {
539 	switch (type) {
540 	case MOD_LOAD:
541 	case MOD_UNLOAD:
542 		return (0);
543 	default:
544 		return (EOPNOTSUPP);
545 	}
546 }
547 
548 static moduledata_t infiniband_mod = {
549 	.name = "if_infiniband",
550 	.evhand = &infiniband_modevent,
551 };
552 
553 DECLARE_MODULE(if_infiniband, infiniband_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
554 MODULE_VERSION(if_infiniband, 1);
555