xref: /freebsd/sys/dev/ntb/if_ntb/if_ntb.c (revision 4f8f43b06ed07e96a250855488cc531799d5b78f)
1 /*-
2  * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
3  * Copyright (C) 2013 Intel Corporation
4  * Copyright (C) 2015 EMC Corporation
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * The Non-Transparent Bridge (NTB) is a device that allows you to connect
31  * two or more systems using a PCI-e links, providing remote memory access.
32  *
33  * This module contains a driver for simulated Ethernet device, using
34  * underlying NTB Transport device.
35  *
36  * NOTE: Much of the code in this module is shared with Linux. Any patches may
37  * be picked up and redistributed in Linux with a dual GPL/BSD license.
38  */
39 
40 #include <sys/cdefs.h>
41 #include <sys/param.h>
42 #include <sys/kernel.h>
43 #include <sys/systm.h>
44 #include <sys/buf_ring.h>
45 #include <sys/bus.h>
46 #include <sys/ktr.h>
47 #include <sys/limits.h>
48 #include <sys/module.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 #include <sys/taskqueue.h>
53 
54 #include <net/if.h>
55 #include <net/if_media.h>
56 #include <net/if_types.h>
57 #include <net/if_media.h>
58 #include <net/if_var.h>
59 #include <net/bpf.h>
60 #include <net/ethernet.h>
61 
62 #include <machine/bus.h>
63 
64 #include "../ntb_transport.h"
65 
66 #define KTR_NTB KTR_SPARE3
67 #define NTB_MEDIATYPE		 (IFM_ETHER | IFM_AUTO | IFM_FDX)
68 
69 #define	NTB_CSUM_FEATURES	(CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)
70 #define	NTB_CSUM_FEATURES6	(CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6)
71 #define	NTB_CSUM_SET		(CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \
72 				    CSUM_PSEUDO_HDR | \
73 				    CSUM_IP_CHECKED | CSUM_IP_VALID | \
74 				    CSUM_SCTP_VALID)
75 
76 static SYSCTL_NODE(_hw, OID_AUTO, if_ntb, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
77     "if_ntb");
78 
79 static unsigned g_if_ntb_num_queues = UINT_MAX;
80 SYSCTL_UINT(_hw_if_ntb, OID_AUTO, num_queues, CTLFLAG_RWTUN,
81     &g_if_ntb_num_queues, 0, "Number of queues per interface");
82 
83 struct ntb_net_queue {
84 	struct ntb_net_ctx	*sc;
85 	if_t			 ifp;
86 	struct ntb_transport_qp *qp;
87 	struct buf_ring		*br;
88 	struct task		 tx_task;
89 	struct taskqueue	*tx_tq;
90 	struct mtx		 tx_lock;
91 	struct callout		 queue_full;
92 };
93 
94 struct ntb_net_ctx {
95 	if_t			 ifp;
96 	struct ifmedia		 media;
97 	u_char			 eaddr[ETHER_ADDR_LEN];
98 	int			 num_queues;
99 	struct ntb_net_queue	*queues;
100 	int			 mtu;
101 };
102 
103 static int ntb_net_probe(device_t dev);
104 static int ntb_net_attach(device_t dev);
105 static int ntb_net_detach(device_t dev);
106 static void ntb_net_init(void *arg);
107 static int ntb_ifmedia_upd(struct ifnet *);
108 static void ntb_ifmedia_sts(struct ifnet *, struct ifmediareq *);
109 static int ntb_ioctl(if_t ifp, u_long command, caddr_t data);
110 static int ntb_transmit(if_t ifp, struct mbuf *m);
111 static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
112     void *data, int len);
113 static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
114     void *data, int len);
115 static void ntb_net_event_handler(void *data, enum ntb_link_event status);
116 static void ntb_handle_tx(void *arg, int pending);
117 static void ntb_qp_full(void *arg);
118 static void ntb_qflush(if_t ifp);
119 static void create_random_local_eui48(u_char *eaddr);
120 
121 static int
122 ntb_net_probe(device_t dev)
123 {
124 
125 	device_set_desc(dev, "NTB Network Interface");
126 	return (0);
127 }
128 
129 static int
130 ntb_net_attach(device_t dev)
131 {
132 	struct ntb_net_ctx *sc = device_get_softc(dev);
133 	struct ntb_net_queue *q;
134 	if_t ifp;
135 	struct ntb_queue_handlers handlers = { ntb_net_rx_handler,
136 	    ntb_net_tx_handler, ntb_net_event_handler };
137 	int i;
138 
139 	ifp = sc->ifp = if_gethandle(IFT_ETHER);
140 	if (ifp == NULL) {
141 		printf("ntb: Cannot allocate ifnet structure\n");
142 		return (ENOMEM);
143 	}
144 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
145 	if_setdev(ifp, dev);
146 
147 	sc->num_queues = min(g_if_ntb_num_queues,
148 	    ntb_transport_queue_count(dev));
149 	sc->queues = malloc(sc->num_queues * sizeof(struct ntb_net_queue),
150 	    M_DEVBUF, M_WAITOK | M_ZERO);
151 	sc->mtu = INT_MAX;
152 	for (i = 0; i < sc->num_queues; i++) {
153 		q = &sc->queues[i];
154 		q->sc = sc;
155 		q->ifp = ifp;
156 		q->qp = ntb_transport_create_queue(dev, i, &handlers, q);
157 		if (q->qp == NULL)
158 			break;
159 		sc->mtu = imin(sc->mtu, ntb_transport_max_size(q->qp));
160 		mtx_init(&q->tx_lock, "ntb tx", NULL, MTX_DEF);
161 		q->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &q->tx_lock);
162 		TASK_INIT(&q->tx_task, 0, ntb_handle_tx, q);
163 		q->tx_tq = taskqueue_create_fast("ntb_txq", M_NOWAIT,
164 		    taskqueue_thread_enqueue, &q->tx_tq);
165 		taskqueue_start_threads(&q->tx_tq, 1, PI_NET, "%s txq%d",
166 		    device_get_nameunit(dev), i);
167 		callout_init(&q->queue_full, 1);
168 	}
169 	sc->num_queues = i;
170 	device_printf(dev, "%d queue(s)\n", sc->num_queues);
171 
172 	if_setinitfn(ifp, ntb_net_init);
173 	if_setsoftc(ifp, sc);
174 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
175 	if_setioctlfn(ifp, ntb_ioctl);
176 	if_settransmitfn(ifp, ntb_transmit);
177 	if_setqflushfn(ifp, ntb_qflush);
178 	create_random_local_eui48(sc->eaddr);
179 	ether_ifattach(ifp, sc->eaddr);
180 	if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 |
181 	    IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
182 	if_setcapenable(ifp, IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
183 	if_setmtu(ifp, sc->mtu - ETHER_HDR_LEN);
184 
185 	ifmedia_init(&sc->media, IFM_IMASK, ntb_ifmedia_upd,
186 	    ntb_ifmedia_sts);
187 	ifmedia_add(&sc->media, NTB_MEDIATYPE, 0, NULL);
188 	ifmedia_set(&sc->media, NTB_MEDIATYPE);
189 
190 	for (i = 0; i < sc->num_queues; i++)
191 		ntb_transport_link_up(sc->queues[i].qp);
192 	return (0);
193 }
194 
195 static int
196 ntb_net_detach(device_t dev)
197 {
198 	struct ntb_net_ctx *sc = device_get_softc(dev);
199 	struct ntb_net_queue *q;
200 	int i;
201 
202 	for (i = 0; i < sc->num_queues; i++)
203 		ntb_transport_link_down(sc->queues[i].qp);
204 	ether_ifdetach(sc->ifp);
205 	if_free(sc->ifp);
206 	ifmedia_removeall(&sc->media);
207 	for (i = 0; i < sc->num_queues; i++) {
208 		q = &sc->queues[i];
209 		ntb_transport_free_queue(q->qp);
210 		buf_ring_free(q->br, M_DEVBUF);
211 		callout_drain(&q->queue_full);
212 		taskqueue_drain_all(q->tx_tq);
213 		mtx_destroy(&q->tx_lock);
214 	}
215 	free(sc->queues, M_DEVBUF);
216 	return (0);
217 }
218 
219 /* Network device interface */
220 
221 static void
222 ntb_net_init(void *arg)
223 {
224 	struct ntb_net_ctx *sc = arg;
225 	if_t ifp = sc->ifp;
226 
227 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
228 	if_setbaudrate(ifp, ntb_transport_link_speed(sc->queues[0].qp));
229 	if_link_state_change(ifp, ntb_transport_link_query(sc->queues[0].qp) ?
230 	    LINK_STATE_UP : LINK_STATE_DOWN);
231 }
232 
233 static int
234 ntb_ioctl(if_t ifp, u_long command, caddr_t data)
235 {
236 	struct ntb_net_ctx *sc = if_getsoftc(ifp);
237 	struct ifreq *ifr = (struct ifreq *)data;
238 	int error = 0;
239 
240 	switch (command) {
241 	case SIOCSIFFLAGS:
242 	case SIOCADDMULTI:
243 	case SIOCDELMULTI:
244 		break;
245 
246 	case SIOCSIFMTU:
247 	    {
248 		if (ifr->ifr_mtu > sc->mtu - ETHER_HDR_LEN) {
249 			error = EINVAL;
250 			break;
251 		}
252 
253 		if_setmtu(ifp, ifr->ifr_mtu);
254 		break;
255 	    }
256 
257 	case SIOCSIFMEDIA:
258 	case SIOCGIFMEDIA:
259 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
260 		break;
261 
262 	case SIOCSIFCAP:
263 		if (ifr->ifr_reqcap & IFCAP_RXCSUM)
264 			if_setcapenablebit(ifp, IFCAP_RXCSUM, 0);
265 		else
266 			if_setcapenablebit(ifp, 0, IFCAP_RXCSUM);
267 		if (ifr->ifr_reqcap & IFCAP_TXCSUM) {
268 			if_setcapenablebit(ifp, IFCAP_TXCSUM, 0);
269 			if_sethwassistbits(ifp, NTB_CSUM_FEATURES, 0);
270 		} else {
271 			if_setcapenablebit(ifp, 0, IFCAP_TXCSUM);
272 			if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES);
273 		}
274 		if (ifr->ifr_reqcap & IFCAP_RXCSUM_IPV6)
275 			if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0);
276 		else
277 			if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6);
278 		if (ifr->ifr_reqcap & IFCAP_TXCSUM_IPV6) {
279 			if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0);
280 			if_sethwassistbits(ifp, NTB_CSUM_FEATURES6, 0);
281 		} else {
282 			if_setcapenablebit(ifp, 0, IFCAP_TXCSUM_IPV6);
283 			if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES6);
284 		}
285 		break;
286 
287 	default:
288 		error = ether_ioctl(ifp, command, data);
289 		break;
290 	}
291 
292 	return (error);
293 }
294 
295 static int
296 ntb_ifmedia_upd(struct ifnet *ifp)
297 {
298 	struct ntb_net_ctx *sc = if_getsoftc(ifp);
299 	struct ifmedia *ifm = &sc->media;
300 
301 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
302 		return (EINVAL);
303 
304 	return (0);
305 }
306 
307 static void
308 ntb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
309 {
310 	struct ntb_net_ctx *sc = if_getsoftc(ifp);
311 
312 	ifmr->ifm_status = IFM_AVALID;
313 	ifmr->ifm_active = NTB_MEDIATYPE;
314 	if (ntb_transport_link_query(sc->queues[0].qp))
315 		ifmr->ifm_status |= IFM_ACTIVE;
316 }
317 
318 static void
319 ntb_transmit_locked(struct ntb_net_queue *q)
320 {
321 	if_t ifp = q->ifp;
322 	struct mbuf *m;
323 	int rc, len;
324 	short mflags;
325 
326 	CTR0(KTR_NTB, "TX: ntb_transmit_locked");
327 	while ((m = drbr_peek(ifp, q->br)) != NULL) {
328 		CTR1(KTR_NTB, "TX: start mbuf %p", m);
329 		ether_bpf_mtap_if(ifp, m);
330 		len = m->m_pkthdr.len;
331 		mflags = m->m_flags;
332 		rc = ntb_transport_tx_enqueue(q->qp, m, m, len);
333 		if (rc != 0) {
334 			CTR2(KTR_NTB, "TX: could not tx mbuf %p: %d", m, rc);
335 			if (rc == EAGAIN) {
336 				drbr_putback(ifp, q->br, m);
337 				callout_reset_sbt(&q->queue_full,
338 				    SBT_1MS / 4, SBT_1MS / 4,
339 				    ntb_qp_full, q, 0);
340 			} else {
341 				m_freem(m);
342 				drbr_advance(ifp, q->br);
343 				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
344 			}
345 			break;
346 		}
347 		drbr_advance(ifp, q->br);
348 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
349 		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
350 		if (mflags & M_MCAST)
351 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
352 	}
353 }
354 
355 static int
356 ntb_transmit(if_t ifp, struct mbuf *m)
357 {
358 	struct ntb_net_ctx *sc = if_getsoftc(ifp);
359 	struct ntb_net_queue *q;
360 	int error, i;
361 
362 	CTR0(KTR_NTB, "TX: ntb_transmit");
363 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
364 		i = m->m_pkthdr.flowid % sc->num_queues;
365 	else
366 		i = curcpu % sc->num_queues;
367 	q = &sc->queues[i];
368 
369 	error = drbr_enqueue(ifp, q->br, m);
370 	if (error)
371 		return (error);
372 
373 	if (mtx_trylock(&q->tx_lock)) {
374 		ntb_transmit_locked(q);
375 		mtx_unlock(&q->tx_lock);
376 	} else
377 		taskqueue_enqueue(q->tx_tq, &q->tx_task);
378 	return (0);
379 }
380 
381 static void
382 ntb_handle_tx(void *arg, int pending)
383 {
384 	struct ntb_net_queue *q = arg;
385 
386 	mtx_lock(&q->tx_lock);
387 	ntb_transmit_locked(q);
388 	mtx_unlock(&q->tx_lock);
389 }
390 
391 static void
392 ntb_qp_full(void *arg)
393 {
394 	struct ntb_net_queue *q = arg;
395 
396 	CTR0(KTR_NTB, "TX: qp_full callout");
397 	if (ntb_transport_tx_free_entry(q->qp) > 0)
398 		taskqueue_enqueue(q->tx_tq, &q->tx_task);
399 	else
400 		callout_schedule_sbt(&q->queue_full,
401 		    SBT_1MS / 4, SBT_1MS / 4, 0);
402 }
403 
404 static void
405 ntb_qflush(if_t ifp)
406 {
407 	struct ntb_net_ctx *sc = if_getsoftc(ifp);
408 	struct ntb_net_queue *q;
409 	struct mbuf *m;
410 	int i;
411 
412 	for (i = 0; i < sc->num_queues; i++) {
413 		q = &sc->queues[i];
414 		mtx_lock(&q->tx_lock);
415 		while ((m = buf_ring_dequeue_sc(q->br)) != NULL)
416 			m_freem(m);
417 		mtx_unlock(&q->tx_lock);
418 	}
419 	if_qflush(ifp);
420 }
421 
422 /* Network Device Callbacks */
423 static void
424 ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
425     int len)
426 {
427 
428 	m_freem(data);
429 	CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data);
430 }
431 
432 static void
433 ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
434     int len)
435 {
436 	struct ntb_net_queue *q = qp_data;
437 	struct ntb_net_ctx *sc = q->sc;
438 	struct mbuf *m = data;
439 	if_t ifp = q->ifp;
440 	uint16_t proto;
441 
442 	CTR1(KTR_NTB, "RX: rx handler (%d)", len);
443 	if (len < 0) {
444 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
445 		return;
446 	}
447 
448 	m->m_pkthdr.rcvif = ifp;
449 	if (sc->num_queues > 1) {
450 		m->m_pkthdr.flowid = q - sc->queues;
451 		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
452 	}
453 	if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
454 		m_copydata(m, 12, 2, (void *)&proto);
455 		switch (ntohs(proto)) {
456 		case ETHERTYPE_IP:
457 			if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
458 				m->m_pkthdr.csum_data = 0xffff;
459 				m->m_pkthdr.csum_flags = NTB_CSUM_SET;
460 			}
461 			break;
462 		case ETHERTYPE_IPV6:
463 			if (if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6) {
464 				m->m_pkthdr.csum_data = 0xffff;
465 				m->m_pkthdr.csum_flags = NTB_CSUM_SET;
466 			}
467 			break;
468 		}
469 	}
470 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
471 	if_input(ifp, m);
472 }
473 
474 static void
475 ntb_net_event_handler(void *data, enum ntb_link_event status)
476 {
477 	struct ntb_net_queue *q = data;
478 
479 	if_setbaudrate(q->ifp, ntb_transport_link_speed(q->qp));
480 	if_link_state_change(q->ifp, (status == NTB_LINK_UP) ? LINK_STATE_UP :
481 	    LINK_STATE_DOWN);
482 }
483 
484 /* Helper functions */
485 /* TODO: This too should really be part of the kernel */
486 #define EUI48_MULTICAST			1 << 0
487 #define EUI48_LOCALLY_ADMINISTERED	1 << 1
488 static void
489 create_random_local_eui48(u_char *eaddr)
490 {
491 	static uint8_t counter = 0;
492 
493 	eaddr[0] = EUI48_LOCALLY_ADMINISTERED;
494 	arc4rand(&eaddr[1], 4, 0);
495 	eaddr[5] = counter++;
496 }
497 
498 static device_method_t ntb_net_methods[] = {
499 	/* Device interface */
500 	DEVMETHOD(device_probe,     ntb_net_probe),
501 	DEVMETHOD(device_attach,    ntb_net_attach),
502 	DEVMETHOD(device_detach,    ntb_net_detach),
503 	DEVMETHOD_END
504 };
505 
506 static DEFINE_CLASS_0(ntb, ntb_net_driver, ntb_net_methods,
507     sizeof(struct ntb_net_ctx));
508 DRIVER_MODULE(if_ntb, ntb_transport, ntb_net_driver, NULL, NULL);
509 MODULE_DEPEND(if_ntb, ntb_transport, 1, 1, 1);
510 MODULE_VERSION(if_ntb, 1);
511