1 /*-
2 * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
3 * Copyright (C) 2013 Intel Corporation
4 * Copyright (C) 2015 EMC Corporation
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * The Non-Transparent Bridge (NTB) is a device that allows you to connect
31 * two or more systems using a PCI-e links, providing remote memory access.
32 *
33 * This module contains a driver for simulated Ethernet device, using
34 * underlying NTB Transport device.
35 *
36 * NOTE: Much of the code in this module is shared with Linux. Any patches may
37 * be picked up and redistributed in Linux with a dual GPL/BSD license.
38 */
39
40 #include <sys/param.h>
41 #include <sys/kernel.h>
42 #include <sys/systm.h>
43 #include <sys/buf_ring.h>
44 #include <sys/bus.h>
45 #include <sys/ktr.h>
46 #include <sys/limits.h>
47 #include <sys/module.h>
48 #include <sys/socket.h>
49 #include <sys/sockio.h>
50 #include <sys/sysctl.h>
51 #include <sys/taskqueue.h>
52
53 #include <net/if.h>
54 #include <net/if_media.h>
55 #include <net/if_types.h>
56 #include <net/if_media.h>
57 #include <net/if_var.h>
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60
61 #include <machine/bus.h>
62
63 #include "../ntb_transport.h"
64
65 #define KTR_NTB KTR_SPARE3
66 #define NTB_MEDIATYPE (IFM_ETHER | IFM_AUTO | IFM_FDX)
67
68 #define NTB_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)
69 #define NTB_CSUM_FEATURES6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6)
70 #define NTB_CSUM_SET (CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \
71 CSUM_PSEUDO_HDR | \
72 CSUM_IP_CHECKED | CSUM_IP_VALID | \
73 CSUM_SCTP_VALID)
74
75 static SYSCTL_NODE(_hw, OID_AUTO, if_ntb, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
76 "if_ntb");
77
78 static unsigned g_if_ntb_num_queues = UINT_MAX;
79 SYSCTL_UINT(_hw_if_ntb, OID_AUTO, num_queues, CTLFLAG_RWTUN,
80 &g_if_ntb_num_queues, 0, "Number of queues per interface");
81
82 struct ntb_net_queue {
83 struct ntb_net_ctx *sc;
84 if_t ifp;
85 struct ntb_transport_qp *qp;
86 struct buf_ring *br;
87 struct task tx_task;
88 struct taskqueue *tx_tq;
89 struct mtx tx_lock;
90 struct callout queue_full;
91 };
92
93 struct ntb_net_ctx {
94 if_t ifp;
95 struct ifmedia media;
96 u_char eaddr[ETHER_ADDR_LEN];
97 int num_queues;
98 struct ntb_net_queue *queues;
99 int mtu;
100 };
101
102 static int ntb_net_probe(device_t dev);
103 static int ntb_net_attach(device_t dev);
104 static int ntb_net_detach(device_t dev);
105 static void ntb_net_init(void *arg);
106 static int ntb_ifmedia_upd(struct ifnet *);
107 static void ntb_ifmedia_sts(struct ifnet *, struct ifmediareq *);
108 static int ntb_ioctl(if_t ifp, u_long command, caddr_t data);
109 static int ntb_transmit(if_t ifp, struct mbuf *m);
110 static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
111 void *data, int len);
112 static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
113 void *data, int len);
114 static void ntb_net_event_handler(void *data, enum ntb_link_event status);
115 static void ntb_handle_tx(void *arg, int pending);
116 static void ntb_qp_full(void *arg);
117 static void ntb_qflush(if_t ifp);
118 static void create_random_local_eui48(u_char *eaddr);
119
120 static int
ntb_net_probe(device_t dev)121 ntb_net_probe(device_t dev)
122 {
123
124 device_set_desc(dev, "NTB Network Interface");
125 return (0);
126 }
127
128 static int
ntb_net_attach(device_t dev)129 ntb_net_attach(device_t dev)
130 {
131 struct ntb_net_ctx *sc = device_get_softc(dev);
132 struct ntb_net_queue *q;
133 if_t ifp;
134 struct ntb_queue_handlers handlers = { ntb_net_rx_handler,
135 ntb_net_tx_handler, ntb_net_event_handler };
136 int i;
137
138 ifp = sc->ifp = if_gethandle(IFT_ETHER);
139 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
140 if_setdev(ifp, dev);
141
142 sc->num_queues = min(g_if_ntb_num_queues,
143 ntb_transport_queue_count(dev));
144 sc->queues = malloc(sc->num_queues * sizeof(struct ntb_net_queue),
145 M_DEVBUF, M_WAITOK | M_ZERO);
146 sc->mtu = INT_MAX;
147 for (i = 0; i < sc->num_queues; i++) {
148 q = &sc->queues[i];
149 q->sc = sc;
150 q->ifp = ifp;
151 q->qp = ntb_transport_create_queue(dev, i, &handlers, q);
152 if (q->qp == NULL)
153 break;
154 sc->mtu = imin(sc->mtu, ntb_transport_max_size(q->qp));
155 mtx_init(&q->tx_lock, "ntb tx", NULL, MTX_DEF);
156 q->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &q->tx_lock);
157 TASK_INIT(&q->tx_task, 0, ntb_handle_tx, q);
158 q->tx_tq = taskqueue_create_fast("ntb_txq", M_NOWAIT,
159 taskqueue_thread_enqueue, &q->tx_tq);
160 taskqueue_start_threads(&q->tx_tq, 1, PI_NET, "%s txq%d",
161 device_get_nameunit(dev), i);
162 callout_init(&q->queue_full, 1);
163 }
164 sc->num_queues = i;
165 device_printf(dev, "%d queue(s)\n", sc->num_queues);
166
167 if_setinitfn(ifp, ntb_net_init);
168 if_setsoftc(ifp, sc);
169 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
170 if_setioctlfn(ifp, ntb_ioctl);
171 if_settransmitfn(ifp, ntb_transmit);
172 if_setqflushfn(ifp, ntb_qflush);
173 create_random_local_eui48(sc->eaddr);
174 ether_ifattach(ifp, sc->eaddr);
175 if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 |
176 IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
177 if_setcapenable(ifp, IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
178 if_setmtu(ifp, sc->mtu - ETHER_HDR_LEN);
179
180 ifmedia_init(&sc->media, IFM_IMASK, ntb_ifmedia_upd,
181 ntb_ifmedia_sts);
182 ifmedia_add(&sc->media, NTB_MEDIATYPE, 0, NULL);
183 ifmedia_set(&sc->media, NTB_MEDIATYPE);
184
185 for (i = 0; i < sc->num_queues; i++)
186 ntb_transport_link_up(sc->queues[i].qp);
187 return (0);
188 }
189
190 static int
ntb_net_detach(device_t dev)191 ntb_net_detach(device_t dev)
192 {
193 struct ntb_net_ctx *sc = device_get_softc(dev);
194 struct ntb_net_queue *q;
195 int i;
196
197 for (i = 0; i < sc->num_queues; i++)
198 ntb_transport_link_down(sc->queues[i].qp);
199 ether_ifdetach(sc->ifp);
200 if_free(sc->ifp);
201 ifmedia_removeall(&sc->media);
202 for (i = 0; i < sc->num_queues; i++) {
203 q = &sc->queues[i];
204 ntb_transport_free_queue(q->qp);
205 buf_ring_free(q->br, M_DEVBUF);
206 callout_drain(&q->queue_full);
207 taskqueue_drain_all(q->tx_tq);
208 mtx_destroy(&q->tx_lock);
209 }
210 free(sc->queues, M_DEVBUF);
211 return (0);
212 }
213
214 /* Network device interface */
215
216 static void
ntb_net_init(void * arg)217 ntb_net_init(void *arg)
218 {
219 struct ntb_net_ctx *sc = arg;
220 if_t ifp = sc->ifp;
221
222 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
223 if_setbaudrate(ifp, ntb_transport_link_speed(sc->queues[0].qp));
224 if_link_state_change(ifp, ntb_transport_link_query(sc->queues[0].qp) ?
225 LINK_STATE_UP : LINK_STATE_DOWN);
226 }
227
228 static int
ntb_ioctl(if_t ifp,u_long command,caddr_t data)229 ntb_ioctl(if_t ifp, u_long command, caddr_t data)
230 {
231 struct ntb_net_ctx *sc = if_getsoftc(ifp);
232 struct ifreq *ifr = (struct ifreq *)data;
233 int error = 0;
234
235 switch (command) {
236 case SIOCSIFFLAGS:
237 case SIOCADDMULTI:
238 case SIOCDELMULTI:
239 break;
240
241 case SIOCSIFMTU:
242 {
243 if (ifr->ifr_mtu > sc->mtu - ETHER_HDR_LEN) {
244 error = EINVAL;
245 break;
246 }
247
248 if_setmtu(ifp, ifr->ifr_mtu);
249 break;
250 }
251
252 case SIOCSIFMEDIA:
253 case SIOCGIFMEDIA:
254 error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
255 break;
256
257 case SIOCSIFCAP:
258 if (ifr->ifr_reqcap & IFCAP_RXCSUM)
259 if_setcapenablebit(ifp, IFCAP_RXCSUM, 0);
260 else
261 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM);
262 if (ifr->ifr_reqcap & IFCAP_TXCSUM) {
263 if_setcapenablebit(ifp, IFCAP_TXCSUM, 0);
264 if_sethwassistbits(ifp, NTB_CSUM_FEATURES, 0);
265 } else {
266 if_setcapenablebit(ifp, 0, IFCAP_TXCSUM);
267 if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES);
268 }
269 if (ifr->ifr_reqcap & IFCAP_RXCSUM_IPV6)
270 if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0);
271 else
272 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6);
273 if (ifr->ifr_reqcap & IFCAP_TXCSUM_IPV6) {
274 if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0);
275 if_sethwassistbits(ifp, NTB_CSUM_FEATURES6, 0);
276 } else {
277 if_setcapenablebit(ifp, 0, IFCAP_TXCSUM_IPV6);
278 if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES6);
279 }
280 break;
281
282 default:
283 error = ether_ioctl(ifp, command, data);
284 break;
285 }
286
287 return (error);
288 }
289
290 static int
ntb_ifmedia_upd(struct ifnet * ifp)291 ntb_ifmedia_upd(struct ifnet *ifp)
292 {
293 struct ntb_net_ctx *sc = if_getsoftc(ifp);
294 struct ifmedia *ifm = &sc->media;
295
296 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
297 return (EINVAL);
298
299 return (0);
300 }
301
302 static void
ntb_ifmedia_sts(struct ifnet * ifp,struct ifmediareq * ifmr)303 ntb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
304 {
305 struct ntb_net_ctx *sc = if_getsoftc(ifp);
306
307 ifmr->ifm_status = IFM_AVALID;
308 ifmr->ifm_active = NTB_MEDIATYPE;
309 if (ntb_transport_link_query(sc->queues[0].qp))
310 ifmr->ifm_status |= IFM_ACTIVE;
311 }
312
313 static void
ntb_transmit_locked(struct ntb_net_queue * q)314 ntb_transmit_locked(struct ntb_net_queue *q)
315 {
316 if_t ifp = q->ifp;
317 struct mbuf *m;
318 int rc, len;
319 short mflags;
320
321 CTR0(KTR_NTB, "TX: ntb_transmit_locked");
322 while ((m = drbr_peek(ifp, q->br)) != NULL) {
323 CTR1(KTR_NTB, "TX: start mbuf %p", m);
324 ether_bpf_mtap_if(ifp, m);
325 len = m->m_pkthdr.len;
326 mflags = m->m_flags;
327 rc = ntb_transport_tx_enqueue(q->qp, m, m, len);
328 if (rc != 0) {
329 CTR2(KTR_NTB, "TX: could not tx mbuf %p: %d", m, rc);
330 if (rc == EAGAIN) {
331 drbr_putback(ifp, q->br, m);
332 callout_reset_sbt(&q->queue_full,
333 SBT_1MS / 4, SBT_1MS / 4,
334 ntb_qp_full, q, 0);
335 } else {
336 m_freem(m);
337 drbr_advance(ifp, q->br);
338 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
339 }
340 break;
341 }
342 drbr_advance(ifp, q->br);
343 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
344 if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
345 if (mflags & M_MCAST)
346 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
347 }
348 }
349
350 static int
ntb_transmit(if_t ifp,struct mbuf * m)351 ntb_transmit(if_t ifp, struct mbuf *m)
352 {
353 struct ntb_net_ctx *sc = if_getsoftc(ifp);
354 struct ntb_net_queue *q;
355 int error, i;
356
357 CTR0(KTR_NTB, "TX: ntb_transmit");
358 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
359 i = m->m_pkthdr.flowid % sc->num_queues;
360 else
361 i = curcpu % sc->num_queues;
362 q = &sc->queues[i];
363
364 error = drbr_enqueue(ifp, q->br, m);
365 if (error)
366 return (error);
367
368 if (mtx_trylock(&q->tx_lock)) {
369 ntb_transmit_locked(q);
370 mtx_unlock(&q->tx_lock);
371 } else
372 taskqueue_enqueue(q->tx_tq, &q->tx_task);
373 return (0);
374 }
375
376 static void
ntb_handle_tx(void * arg,int pending)377 ntb_handle_tx(void *arg, int pending)
378 {
379 struct ntb_net_queue *q = arg;
380
381 mtx_lock(&q->tx_lock);
382 ntb_transmit_locked(q);
383 mtx_unlock(&q->tx_lock);
384 }
385
386 static void
ntb_qp_full(void * arg)387 ntb_qp_full(void *arg)
388 {
389 struct ntb_net_queue *q = arg;
390
391 CTR0(KTR_NTB, "TX: qp_full callout");
392 if (ntb_transport_tx_free_entry(q->qp) > 0)
393 taskqueue_enqueue(q->tx_tq, &q->tx_task);
394 else
395 callout_schedule_sbt(&q->queue_full,
396 SBT_1MS / 4, SBT_1MS / 4, 0);
397 }
398
399 static void
ntb_qflush(if_t ifp)400 ntb_qflush(if_t ifp)
401 {
402 struct ntb_net_ctx *sc = if_getsoftc(ifp);
403 struct ntb_net_queue *q;
404 struct mbuf *m;
405 int i;
406
407 for (i = 0; i < sc->num_queues; i++) {
408 q = &sc->queues[i];
409 mtx_lock(&q->tx_lock);
410 while ((m = buf_ring_dequeue_sc(q->br)) != NULL)
411 m_freem(m);
412 mtx_unlock(&q->tx_lock);
413 }
414 if_qflush(ifp);
415 }
416
417 /* Network Device Callbacks */
418 static void
ntb_net_tx_handler(struct ntb_transport_qp * qp,void * qp_data,void * data,int len)419 ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
420 int len)
421 {
422
423 m_freem(data);
424 CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data);
425 }
426
427 static void
ntb_net_rx_handler(struct ntb_transport_qp * qp,void * qp_data,void * data,int len)428 ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
429 int len)
430 {
431 struct ntb_net_queue *q = qp_data;
432 struct ntb_net_ctx *sc = q->sc;
433 struct mbuf *m = data;
434 if_t ifp = q->ifp;
435 uint16_t proto;
436
437 CTR1(KTR_NTB, "RX: rx handler (%d)", len);
438 if (len < 0) {
439 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
440 return;
441 }
442
443 m->m_pkthdr.rcvif = ifp;
444 if (sc->num_queues > 1) {
445 m->m_pkthdr.flowid = q - sc->queues;
446 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
447 }
448 if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
449 m_copydata(m, 12, 2, (void *)&proto);
450 switch (ntohs(proto)) {
451 case ETHERTYPE_IP:
452 if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
453 m->m_pkthdr.csum_data = 0xffff;
454 m->m_pkthdr.csum_flags = NTB_CSUM_SET;
455 }
456 break;
457 case ETHERTYPE_IPV6:
458 if (if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6) {
459 m->m_pkthdr.csum_data = 0xffff;
460 m->m_pkthdr.csum_flags = NTB_CSUM_SET;
461 }
462 break;
463 }
464 }
465 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
466 if_input(ifp, m);
467 }
468
469 static void
ntb_net_event_handler(void * data,enum ntb_link_event status)470 ntb_net_event_handler(void *data, enum ntb_link_event status)
471 {
472 struct ntb_net_queue *q = data;
473
474 if_setbaudrate(q->ifp, ntb_transport_link_speed(q->qp));
475 if_link_state_change(q->ifp, (status == NTB_LINK_UP) ? LINK_STATE_UP :
476 LINK_STATE_DOWN);
477 }
478
479 /* Helper functions */
480 /* TODO: This too should really be part of the kernel */
481 #define EUI48_MULTICAST 1 << 0
482 #define EUI48_LOCALLY_ADMINISTERED 1 << 1
483 static void
create_random_local_eui48(u_char * eaddr)484 create_random_local_eui48(u_char *eaddr)
485 {
486 static uint8_t counter = 0;
487
488 eaddr[0] = EUI48_LOCALLY_ADMINISTERED;
489 arc4rand(&eaddr[1], 4, 0);
490 eaddr[5] = counter++;
491 }
492
493 static device_method_t ntb_net_methods[] = {
494 /* Device interface */
495 DEVMETHOD(device_probe, ntb_net_probe),
496 DEVMETHOD(device_attach, ntb_net_attach),
497 DEVMETHOD(device_detach, ntb_net_detach),
498 DEVMETHOD_END
499 };
500
501 static DEFINE_CLASS_0(ntb, ntb_net_driver, ntb_net_methods,
502 sizeof(struct ntb_net_ctx));
503 DRIVER_MODULE(if_ntb, ntb_transport, ntb_net_driver, NULL, NULL);
504 MODULE_DEPEND(if_ntb, ntb_transport, 1, 1, 1);
505 MODULE_VERSION(if_ntb, 1);
506