1 /*- 2 * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org> 3 * Copyright (C) 2013 Intel Corporation 4 * Copyright (C) 2015 EMC Corporation 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * The Non-Transparent Bridge (NTB) is a device that allows you to connect 31 * two or more systems using a PCI-e links, providing remote memory access. 32 * 33 * This module contains a driver for simulated Ethernet device, using 34 * underlying NTB Transport device. 35 * 36 * NOTE: Much of the code in this module is shared with Linux. Any patches may 37 * be picked up and redistributed in Linux with a dual GPL/BSD license. 38 */ 39 40 #include <sys/param.h> 41 #include <sys/kernel.h> 42 #include <sys/systm.h> 43 #include <sys/buf_ring.h> 44 #include <sys/bus.h> 45 #include <sys/ktr.h> 46 #include <sys/limits.h> 47 #include <sys/module.h> 48 #include <sys/socket.h> 49 #include <sys/sockio.h> 50 #include <sys/sysctl.h> 51 #include <sys/taskqueue.h> 52 53 #include <net/if.h> 54 #include <net/if_media.h> 55 #include <net/if_types.h> 56 #include <net/if_media.h> 57 #include <net/if_var.h> 58 #include <net/bpf.h> 59 #include <net/ethernet.h> 60 61 #include <machine/bus.h> 62 63 #include "../ntb_transport.h" 64 65 #define KTR_NTB KTR_SPARE3 66 #define NTB_MEDIATYPE (IFM_ETHER | IFM_AUTO | IFM_FDX) 67 68 #define NTB_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP) 69 #define NTB_CSUM_FEATURES6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6) 70 #define NTB_CSUM_SET (CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \ 71 CSUM_PSEUDO_HDR | \ 72 CSUM_IP_CHECKED | CSUM_IP_VALID | \ 73 CSUM_SCTP_VALID) 74 75 static SYSCTL_NODE(_hw, OID_AUTO, if_ntb, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 76 "if_ntb"); 77 78 static unsigned g_if_ntb_num_queues = UINT_MAX; 79 SYSCTL_UINT(_hw_if_ntb, OID_AUTO, num_queues, CTLFLAG_RWTUN, 80 &g_if_ntb_num_queues, 0, "Number of queues per interface"); 81 82 struct ntb_net_queue { 83 struct ntb_net_ctx *sc; 84 if_t ifp; 85 struct ntb_transport_qp *qp; 86 struct buf_ring *br; 87 struct task tx_task; 88 struct taskqueue *tx_tq; 89 struct mtx tx_lock; 90 struct callout queue_full; 91 }; 92 93 struct ntb_net_ctx { 94 if_t ifp; 95 struct ifmedia media; 96 u_char eaddr[ETHER_ADDR_LEN]; 97 int num_queues; 98 struct ntb_net_queue *queues; 99 int mtu; 100 }; 101 102 static int ntb_net_probe(device_t dev); 103 static int ntb_net_attach(device_t dev); 104 static int ntb_net_detach(device_t dev); 105 static void ntb_net_init(void *arg); 106 static int ntb_ifmedia_upd(struct ifnet *); 107 static void ntb_ifmedia_sts(struct ifnet *, struct ifmediareq *); 108 static int ntb_ioctl(if_t ifp, u_long command, caddr_t data); 109 static int ntb_transmit(if_t ifp, struct mbuf *m); 110 static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, 111 void *data, int len); 112 static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, 113 void *data, int len); 114 static void ntb_net_event_handler(void *data, enum ntb_link_event status); 115 static void ntb_handle_tx(void *arg, int pending); 116 static void ntb_qp_full(void *arg); 117 static void ntb_qflush(if_t ifp); 118 static void create_random_local_eui48(u_char *eaddr); 119 120 static int 121 ntb_net_probe(device_t dev) 122 { 123 124 device_set_desc(dev, "NTB Network Interface"); 125 return (0); 126 } 127 128 static int 129 ntb_net_attach(device_t dev) 130 { 131 struct ntb_net_ctx *sc = device_get_softc(dev); 132 struct ntb_net_queue *q; 133 if_t ifp; 134 struct ntb_queue_handlers handlers = { ntb_net_rx_handler, 135 ntb_net_tx_handler, ntb_net_event_handler }; 136 int i; 137 138 ifp = sc->ifp = if_gethandle(IFT_ETHER); 139 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 140 if_setdev(ifp, dev); 141 142 sc->num_queues = min(g_if_ntb_num_queues, 143 ntb_transport_queue_count(dev)); 144 sc->queues = malloc(sc->num_queues * sizeof(struct ntb_net_queue), 145 M_DEVBUF, M_WAITOK | M_ZERO); 146 sc->mtu = INT_MAX; 147 for (i = 0; i < sc->num_queues; i++) { 148 q = &sc->queues[i]; 149 q->sc = sc; 150 q->ifp = ifp; 151 q->qp = ntb_transport_create_queue(dev, i, &handlers, q); 152 if (q->qp == NULL) 153 break; 154 sc->mtu = imin(sc->mtu, ntb_transport_max_size(q->qp)); 155 mtx_init(&q->tx_lock, "ntb tx", NULL, MTX_DEF); 156 q->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &q->tx_lock); 157 TASK_INIT(&q->tx_task, 0, ntb_handle_tx, q); 158 q->tx_tq = taskqueue_create_fast("ntb_txq", M_NOWAIT, 159 taskqueue_thread_enqueue, &q->tx_tq); 160 taskqueue_start_threads(&q->tx_tq, 1, PI_NET, "%s txq%d", 161 device_get_nameunit(dev), i); 162 callout_init(&q->queue_full, 1); 163 } 164 sc->num_queues = i; 165 device_printf(dev, "%d queue(s)\n", sc->num_queues); 166 167 if_setinitfn(ifp, ntb_net_init); 168 if_setsoftc(ifp, sc); 169 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 170 if_setioctlfn(ifp, ntb_ioctl); 171 if_settransmitfn(ifp, ntb_transmit); 172 if_setqflushfn(ifp, ntb_qflush); 173 create_random_local_eui48(sc->eaddr); 174 ether_ifattach(ifp, sc->eaddr); 175 if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 | 176 IFCAP_JUMBO_MTU | IFCAP_LINKSTATE); 177 if_setcapenable(ifp, IFCAP_JUMBO_MTU | IFCAP_LINKSTATE); 178 if_setmtu(ifp, sc->mtu - ETHER_HDR_LEN); 179 180 ifmedia_init(&sc->media, IFM_IMASK, ntb_ifmedia_upd, 181 ntb_ifmedia_sts); 182 ifmedia_add(&sc->media, NTB_MEDIATYPE, 0, NULL); 183 ifmedia_set(&sc->media, NTB_MEDIATYPE); 184 185 for (i = 0; i < sc->num_queues; i++) 186 ntb_transport_link_up(sc->queues[i].qp); 187 return (0); 188 } 189 190 static int 191 ntb_net_detach(device_t dev) 192 { 193 struct ntb_net_ctx *sc = device_get_softc(dev); 194 struct ntb_net_queue *q; 195 int i; 196 197 for (i = 0; i < sc->num_queues; i++) 198 ntb_transport_link_down(sc->queues[i].qp); 199 ether_ifdetach(sc->ifp); 200 if_free(sc->ifp); 201 ifmedia_removeall(&sc->media); 202 for (i = 0; i < sc->num_queues; i++) { 203 q = &sc->queues[i]; 204 ntb_transport_free_queue(q->qp); 205 buf_ring_free(q->br, M_DEVBUF); 206 callout_drain(&q->queue_full); 207 taskqueue_drain_all(q->tx_tq); 208 mtx_destroy(&q->tx_lock); 209 } 210 free(sc->queues, M_DEVBUF); 211 return (0); 212 } 213 214 /* Network device interface */ 215 216 static void 217 ntb_net_init(void *arg) 218 { 219 struct ntb_net_ctx *sc = arg; 220 if_t ifp = sc->ifp; 221 222 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); 223 if_setbaudrate(ifp, ntb_transport_link_speed(sc->queues[0].qp)); 224 if_link_state_change(ifp, ntb_transport_link_query(sc->queues[0].qp) ? 225 LINK_STATE_UP : LINK_STATE_DOWN); 226 } 227 228 static int 229 ntb_ioctl(if_t ifp, u_long command, caddr_t data) 230 { 231 struct ntb_net_ctx *sc = if_getsoftc(ifp); 232 struct ifreq *ifr = (struct ifreq *)data; 233 int error = 0; 234 235 switch (command) { 236 case SIOCSIFFLAGS: 237 case SIOCADDMULTI: 238 case SIOCDELMULTI: 239 break; 240 241 case SIOCSIFMTU: 242 { 243 if (ifr->ifr_mtu > sc->mtu - ETHER_HDR_LEN) { 244 error = EINVAL; 245 break; 246 } 247 248 if_setmtu(ifp, ifr->ifr_mtu); 249 break; 250 } 251 252 case SIOCSIFMEDIA: 253 case SIOCGIFMEDIA: 254 error = ifmedia_ioctl(ifp, ifr, &sc->media, command); 255 break; 256 257 case SIOCSIFCAP: 258 if (ifr->ifr_reqcap & IFCAP_RXCSUM) 259 if_setcapenablebit(ifp, IFCAP_RXCSUM, 0); 260 else 261 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM); 262 if (ifr->ifr_reqcap & IFCAP_TXCSUM) { 263 if_setcapenablebit(ifp, IFCAP_TXCSUM, 0); 264 if_sethwassistbits(ifp, NTB_CSUM_FEATURES, 0); 265 } else { 266 if_setcapenablebit(ifp, 0, IFCAP_TXCSUM); 267 if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES); 268 } 269 if (ifr->ifr_reqcap & IFCAP_RXCSUM_IPV6) 270 if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0); 271 else 272 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6); 273 if (ifr->ifr_reqcap & IFCAP_TXCSUM_IPV6) { 274 if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0); 275 if_sethwassistbits(ifp, NTB_CSUM_FEATURES6, 0); 276 } else { 277 if_setcapenablebit(ifp, 0, IFCAP_TXCSUM_IPV6); 278 if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES6); 279 } 280 break; 281 282 default: 283 error = ether_ioctl(ifp, command, data); 284 break; 285 } 286 287 return (error); 288 } 289 290 static int 291 ntb_ifmedia_upd(struct ifnet *ifp) 292 { 293 struct ntb_net_ctx *sc = if_getsoftc(ifp); 294 struct ifmedia *ifm = &sc->media; 295 296 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 297 return (EINVAL); 298 299 return (0); 300 } 301 302 static void 303 ntb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 304 { 305 struct ntb_net_ctx *sc = if_getsoftc(ifp); 306 307 ifmr->ifm_status = IFM_AVALID; 308 ifmr->ifm_active = NTB_MEDIATYPE; 309 if (ntb_transport_link_query(sc->queues[0].qp)) 310 ifmr->ifm_status |= IFM_ACTIVE; 311 } 312 313 static void 314 ntb_transmit_locked(struct ntb_net_queue *q) 315 { 316 if_t ifp = q->ifp; 317 struct mbuf *m; 318 int rc, len; 319 short mflags; 320 321 CTR0(KTR_NTB, "TX: ntb_transmit_locked"); 322 while ((m = drbr_peek(ifp, q->br)) != NULL) { 323 CTR1(KTR_NTB, "TX: start mbuf %p", m); 324 ether_bpf_mtap_if(ifp, m); 325 len = m->m_pkthdr.len; 326 mflags = m->m_flags; 327 rc = ntb_transport_tx_enqueue(q->qp, m, m, len); 328 if (rc != 0) { 329 CTR2(KTR_NTB, "TX: could not tx mbuf %p: %d", m, rc); 330 if (rc == EAGAIN) { 331 drbr_putback(ifp, q->br, m); 332 callout_reset_sbt(&q->queue_full, 333 SBT_1MS / 4, SBT_1MS / 4, 334 ntb_qp_full, q, 0); 335 } else { 336 m_freem(m); 337 drbr_advance(ifp, q->br); 338 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 339 } 340 break; 341 } 342 drbr_advance(ifp, q->br); 343 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 344 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 345 if (mflags & M_MCAST) 346 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 347 } 348 } 349 350 static int 351 ntb_transmit(if_t ifp, struct mbuf *m) 352 { 353 struct ntb_net_ctx *sc = if_getsoftc(ifp); 354 struct ntb_net_queue *q; 355 int error, i; 356 357 CTR0(KTR_NTB, "TX: ntb_transmit"); 358 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 359 i = m->m_pkthdr.flowid % sc->num_queues; 360 else 361 i = curcpu % sc->num_queues; 362 q = &sc->queues[i]; 363 364 error = drbr_enqueue(ifp, q->br, m); 365 if (error) 366 return (error); 367 368 if (mtx_trylock(&q->tx_lock)) { 369 ntb_transmit_locked(q); 370 mtx_unlock(&q->tx_lock); 371 } else 372 taskqueue_enqueue(q->tx_tq, &q->tx_task); 373 return (0); 374 } 375 376 static void 377 ntb_handle_tx(void *arg, int pending) 378 { 379 struct ntb_net_queue *q = arg; 380 381 mtx_lock(&q->tx_lock); 382 ntb_transmit_locked(q); 383 mtx_unlock(&q->tx_lock); 384 } 385 386 static void 387 ntb_qp_full(void *arg) 388 { 389 struct ntb_net_queue *q = arg; 390 391 CTR0(KTR_NTB, "TX: qp_full callout"); 392 if (ntb_transport_tx_free_entry(q->qp) > 0) 393 taskqueue_enqueue(q->tx_tq, &q->tx_task); 394 else 395 callout_schedule_sbt(&q->queue_full, 396 SBT_1MS / 4, SBT_1MS / 4, 0); 397 } 398 399 static void 400 ntb_qflush(if_t ifp) 401 { 402 struct ntb_net_ctx *sc = if_getsoftc(ifp); 403 struct ntb_net_queue *q; 404 struct mbuf *m; 405 int i; 406 407 for (i = 0; i < sc->num_queues; i++) { 408 q = &sc->queues[i]; 409 mtx_lock(&q->tx_lock); 410 while ((m = buf_ring_dequeue_sc(q->br)) != NULL) 411 m_freem(m); 412 mtx_unlock(&q->tx_lock); 413 } 414 if_qflush(ifp); 415 } 416 417 /* Network Device Callbacks */ 418 static void 419 ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, 420 int len) 421 { 422 423 m_freem(data); 424 CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data); 425 } 426 427 static void 428 ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, 429 int len) 430 { 431 struct ntb_net_queue *q = qp_data; 432 struct ntb_net_ctx *sc = q->sc; 433 struct mbuf *m = data; 434 if_t ifp = q->ifp; 435 uint16_t proto; 436 437 CTR1(KTR_NTB, "RX: rx handler (%d)", len); 438 if (len < 0) { 439 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 440 return; 441 } 442 443 m->m_pkthdr.rcvif = ifp; 444 if (sc->num_queues > 1) { 445 m->m_pkthdr.flowid = q - sc->queues; 446 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 447 } 448 if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { 449 m_copydata(m, 12, 2, (void *)&proto); 450 switch (ntohs(proto)) { 451 case ETHERTYPE_IP: 452 if (if_getcapenable(ifp) & IFCAP_RXCSUM) { 453 m->m_pkthdr.csum_data = 0xffff; 454 m->m_pkthdr.csum_flags = NTB_CSUM_SET; 455 } 456 break; 457 case ETHERTYPE_IPV6: 458 if (if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6) { 459 m->m_pkthdr.csum_data = 0xffff; 460 m->m_pkthdr.csum_flags = NTB_CSUM_SET; 461 } 462 break; 463 } 464 } 465 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 466 if_input(ifp, m); 467 } 468 469 static void 470 ntb_net_event_handler(void *data, enum ntb_link_event status) 471 { 472 struct ntb_net_queue *q = data; 473 474 if_setbaudrate(q->ifp, ntb_transport_link_speed(q->qp)); 475 if_link_state_change(q->ifp, (status == NTB_LINK_UP) ? LINK_STATE_UP : 476 LINK_STATE_DOWN); 477 } 478 479 /* Helper functions */ 480 /* TODO: This too should really be part of the kernel */ 481 #define EUI48_MULTICAST 1 << 0 482 #define EUI48_LOCALLY_ADMINISTERED 1 << 1 483 static void 484 create_random_local_eui48(u_char *eaddr) 485 { 486 static uint8_t counter = 0; 487 488 eaddr[0] = EUI48_LOCALLY_ADMINISTERED; 489 arc4rand(&eaddr[1], 4, 0); 490 eaddr[5] = counter++; 491 } 492 493 static device_method_t ntb_net_methods[] = { 494 /* Device interface */ 495 DEVMETHOD(device_probe, ntb_net_probe), 496 DEVMETHOD(device_attach, ntb_net_attach), 497 DEVMETHOD(device_detach, ntb_net_detach), 498 DEVMETHOD_END 499 }; 500 501 static DEFINE_CLASS_0(ntb, ntb_net_driver, ntb_net_methods, 502 sizeof(struct ntb_net_ctx)); 503 DRIVER_MODULE(if_ntb, ntb_transport, ntb_net_driver, NULL, NULL); 504 MODULE_DEPEND(if_ntb, ntb_transport, 1, 1, 1); 505 MODULE_VERSION(if_ntb, 1); 506