1 /*- 2 * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org> 3 * Copyright (C) 2013 Intel Corporation 4 * Copyright (C) 2015 EMC Corporation 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * The Non-Transparent Bridge (NTB) is a device that allows you to connect 31 * two or more systems using a PCI-e links, providing remote memory access. 32 * 33 * This module contains a driver for simulated Ethernet device, using 34 * underlying NTB Transport device. 35 * 36 * NOTE: Much of the code in this module is shared with Linux. Any patches may 37 * be picked up and redistributed in Linux with a dual GPL/BSD license. 38 */ 39 40 #include <sys/cdefs.h> 41 __FBSDID("$FreeBSD$"); 42 43 #include <sys/param.h> 44 #include <sys/kernel.h> 45 #include <sys/systm.h> 46 #include <sys/buf_ring.h> 47 #include <sys/bus.h> 48 #include <sys/limits.h> 49 #include <sys/module.h> 50 #include <sys/socket.h> 51 #include <sys/sockio.h> 52 #include <sys/sysctl.h> 53 #include <sys/taskqueue.h> 54 55 #include <net/if.h> 56 #include <net/if_media.h> 57 #include <net/if_types.h> 58 #include <net/if_media.h> 59 #include <net/if_var.h> 60 #include <net/bpf.h> 61 #include <net/ethernet.h> 62 63 #include <machine/bus.h> 64 65 #include "../ntb_transport.h" 66 67 #define KTR_NTB KTR_SPARE3 68 #define NTB_MEDIATYPE (IFM_ETHER | IFM_AUTO | IFM_FDX) 69 70 #define NTB_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP) 71 #define NTB_CSUM_FEATURES6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6) 72 #define NTB_CSUM_SET (CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \ 73 CSUM_PSEUDO_HDR | \ 74 CSUM_IP_CHECKED | CSUM_IP_VALID | \ 75 CSUM_SCTP_VALID) 76 77 static SYSCTL_NODE(_hw, OID_AUTO, if_ntb, CTLFLAG_RW, 0, "if_ntb"); 78 79 static unsigned g_if_ntb_num_queues = 1; 80 SYSCTL_UINT(_hw_if_ntb, OID_AUTO, num_queues, CTLFLAG_RWTUN, 81 &g_if_ntb_num_queues, 0, "Number of queues per interface"); 82 83 struct ntb_net_queue { 84 struct ntb_net_ctx *sc; 85 if_t ifp; 86 struct ntb_transport_qp *qp; 87 struct buf_ring *br; 88 struct task tx_task; 89 struct taskqueue *tx_tq; 90 struct mtx tx_lock; 91 struct callout queue_full; 92 }; 93 94 struct ntb_net_ctx { 95 if_t ifp; 96 struct ifmedia media; 97 u_char eaddr[ETHER_ADDR_LEN]; 98 int num_queues; 99 struct ntb_net_queue *queues; 100 int mtu; 101 }; 102 103 static int ntb_net_probe(device_t dev); 104 static int ntb_net_attach(device_t dev); 105 static int ntb_net_detach(device_t dev); 106 static void ntb_net_init(void *arg); 107 static int ntb_ifmedia_upd(struct ifnet *); 108 static void ntb_ifmedia_sts(struct ifnet *, struct ifmediareq *); 109 static int ntb_ioctl(if_t ifp, u_long command, caddr_t data); 110 static int ntb_transmit(if_t ifp, struct mbuf *m); 111 static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, 112 void *data, int len); 113 static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, 114 void *data, int len); 115 static void ntb_net_event_handler(void *data, enum ntb_link_event status); 116 static void ntb_handle_tx(void *arg, int pending); 117 static void ntb_qp_full(void *arg); 118 static void ntb_qflush(if_t ifp); 119 static void create_random_local_eui48(u_char *eaddr); 120 121 static int 122 ntb_net_probe(device_t dev) 123 { 124 125 device_set_desc(dev, "NTB Network Interface"); 126 return (0); 127 } 128 129 static int 130 ntb_net_attach(device_t dev) 131 { 132 struct ntb_net_ctx *sc = device_get_softc(dev); 133 struct ntb_net_queue *q; 134 if_t ifp; 135 struct ntb_queue_handlers handlers = { ntb_net_rx_handler, 136 ntb_net_tx_handler, ntb_net_event_handler }; 137 int i; 138 139 ifp = sc->ifp = if_gethandle(IFT_ETHER); 140 if (ifp == NULL) { 141 printf("ntb: Cannot allocate ifnet structure\n"); 142 return (ENOMEM); 143 } 144 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 145 if_setdev(ifp, dev); 146 147 sc->num_queues = g_if_ntb_num_queues; 148 sc->queues = malloc(sc->num_queues * sizeof(struct ntb_net_queue), 149 M_DEVBUF, M_WAITOK | M_ZERO); 150 sc->mtu = INT_MAX; 151 for (i = 0; i < sc->num_queues; i++) { 152 q = &sc->queues[i]; 153 q->sc = sc; 154 q->ifp = ifp; 155 q->qp = ntb_transport_create_queue(q, 156 device_get_parent(dev), &handlers); 157 if (q->qp == NULL) 158 break; 159 sc->mtu = imin(sc->mtu, ntb_transport_max_size(q->qp)); 160 mtx_init(&q->tx_lock, "ntb tx", NULL, MTX_DEF); 161 q->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &q->tx_lock); 162 TASK_INIT(&q->tx_task, 0, ntb_handle_tx, q); 163 q->tx_tq = taskqueue_create_fast("ntb_txq", M_NOWAIT, 164 taskqueue_thread_enqueue, &q->tx_tq); 165 taskqueue_start_threads(&q->tx_tq, 1, PI_NET, "%s txq%d", 166 device_get_nameunit(dev), i); 167 callout_init(&q->queue_full, 1); 168 } 169 sc->num_queues = i; 170 171 if_setinitfn(ifp, ntb_net_init); 172 if_setsoftc(ifp, sc); 173 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 174 if_setioctlfn(ifp, ntb_ioctl); 175 if_settransmitfn(ifp, ntb_transmit); 176 if_setqflushfn(ifp, ntb_qflush); 177 create_random_local_eui48(sc->eaddr); 178 ether_ifattach(ifp, sc->eaddr); 179 if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 | 180 IFCAP_JUMBO_MTU | IFCAP_LINKSTATE); 181 if_setcapenable(ifp, IFCAP_JUMBO_MTU | IFCAP_LINKSTATE); 182 if_setmtu(ifp, sc->mtu - ETHER_HDR_LEN); 183 184 ifmedia_init(&sc->media, IFM_IMASK, ntb_ifmedia_upd, 185 ntb_ifmedia_sts); 186 ifmedia_add(&sc->media, NTB_MEDIATYPE, 0, NULL); 187 ifmedia_set(&sc->media, NTB_MEDIATYPE); 188 189 for (i = 0; i < sc->num_queues; i++) 190 ntb_transport_link_up(sc->queues[i].qp); 191 return (0); 192 } 193 194 static int 195 ntb_net_detach(device_t dev) 196 { 197 struct ntb_net_ctx *sc = device_get_softc(dev); 198 struct ntb_net_queue *q; 199 int i; 200 201 for (i = 0; i < sc->num_queues; i++) 202 ntb_transport_link_down(sc->queues[i].qp); 203 ether_ifdetach(sc->ifp); 204 if_free(sc->ifp); 205 ifmedia_removeall(&sc->media); 206 for (i = 0; i < sc->num_queues; i++) { 207 q = &sc->queues[i]; 208 ntb_transport_free_queue(q->qp); 209 buf_ring_free(q->br, M_DEVBUF); 210 callout_drain(&q->queue_full); 211 taskqueue_drain_all(q->tx_tq); 212 mtx_destroy(&q->tx_lock); 213 } 214 free(sc->queues, M_DEVBUF); 215 return (0); 216 } 217 218 /* Network device interface */ 219 220 static void 221 ntb_net_init(void *arg) 222 { 223 struct ntb_net_ctx *sc = arg; 224 if_t ifp = sc->ifp; 225 226 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); 227 if_link_state_change(ifp, ntb_transport_link_query(sc->queues[0].qp) ? 228 LINK_STATE_UP : LINK_STATE_DOWN); 229 } 230 231 static int 232 ntb_ioctl(if_t ifp, u_long command, caddr_t data) 233 { 234 struct ntb_net_ctx *sc = if_getsoftc(ifp); 235 struct ifreq *ifr = (struct ifreq *)data; 236 int error = 0; 237 238 switch (command) { 239 case SIOCSIFMTU: 240 { 241 if (ifr->ifr_mtu > sc->mtu - ETHER_HDR_LEN) { 242 error = EINVAL; 243 break; 244 } 245 246 if_setmtu(ifp, ifr->ifr_mtu); 247 break; 248 } 249 250 case SIOCSIFMEDIA: 251 case SIOCGIFMEDIA: 252 error = ifmedia_ioctl(ifp, ifr, &sc->media, command); 253 break; 254 255 case SIOCSIFCAP: 256 if (ifr->ifr_reqcap & IFCAP_RXCSUM) 257 if_setcapenablebit(ifp, IFCAP_RXCSUM, 0); 258 else 259 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM); 260 if (ifr->ifr_reqcap & IFCAP_TXCSUM) { 261 if_setcapenablebit(ifp, IFCAP_TXCSUM, 0); 262 if_sethwassistbits(ifp, NTB_CSUM_FEATURES, 0); 263 } else { 264 if_setcapenablebit(ifp, 0, IFCAP_TXCSUM); 265 if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES); 266 } 267 if (ifr->ifr_reqcap & IFCAP_RXCSUM_IPV6) 268 if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0); 269 else 270 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6); 271 if (ifr->ifr_reqcap & IFCAP_TXCSUM_IPV6) { 272 if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0); 273 if_sethwassistbits(ifp, NTB_CSUM_FEATURES6, 0); 274 } else { 275 if_setcapenablebit(ifp, 0, IFCAP_TXCSUM_IPV6); 276 if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES6); 277 } 278 break; 279 280 default: 281 error = ether_ioctl(ifp, command, data); 282 break; 283 } 284 285 return (error); 286 } 287 288 static int 289 ntb_ifmedia_upd(struct ifnet *ifp) 290 { 291 struct ntb_net_ctx *sc = if_getsoftc(ifp); 292 struct ifmedia *ifm = &sc->media; 293 294 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 295 return (EINVAL); 296 297 return (0); 298 } 299 300 static void 301 ntb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 302 { 303 struct ntb_net_ctx *sc = if_getsoftc(ifp); 304 305 ifmr->ifm_status = IFM_AVALID; 306 ifmr->ifm_active = NTB_MEDIATYPE; 307 if (ntb_transport_link_query(sc->queues[0].qp)) 308 ifmr->ifm_status |= IFM_ACTIVE; 309 } 310 311 static void 312 ntb_transmit_locked(struct ntb_net_queue *q) 313 { 314 if_t ifp = q->ifp; 315 struct mbuf *m; 316 int rc, len; 317 short mflags; 318 319 CTR0(KTR_NTB, "TX: ntb_transmit_locked"); 320 while ((m = drbr_peek(ifp, q->br)) != NULL) { 321 CTR1(KTR_NTB, "TX: start mbuf %p", m); 322 if_etherbpfmtap(ifp, m); 323 len = m->m_pkthdr.len; 324 mflags = m->m_flags; 325 rc = ntb_transport_tx_enqueue(q->qp, m, m, len); 326 if (rc != 0) { 327 CTR2(KTR_NTB, "TX: could not tx mbuf %p: %d", m, rc); 328 if (rc == EAGAIN) { 329 drbr_putback(ifp, q->br, m); 330 callout_reset_sbt(&q->queue_full, 331 SBT_1MS / 4, SBT_1MS / 4, 332 ntb_qp_full, q, 0); 333 } else { 334 m_freem(m); 335 drbr_advance(ifp, q->br); 336 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 337 } 338 break; 339 } 340 drbr_advance(ifp, q->br); 341 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 342 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 343 if (mflags & M_MCAST) 344 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 345 } 346 } 347 348 static int 349 ntb_transmit(if_t ifp, struct mbuf *m) 350 { 351 struct ntb_net_ctx *sc = if_getsoftc(ifp); 352 struct ntb_net_queue *q; 353 int error, i; 354 355 CTR0(KTR_NTB, "TX: ntb_transmit"); 356 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 357 i = m->m_pkthdr.flowid % sc->num_queues; 358 else 359 i = curcpu % sc->num_queues; 360 q = &sc->queues[i]; 361 362 error = drbr_enqueue(ifp, q->br, m); 363 if (error) 364 return (error); 365 366 if (mtx_trylock(&q->tx_lock)) { 367 ntb_transmit_locked(q); 368 mtx_unlock(&q->tx_lock); 369 } else 370 taskqueue_enqueue(q->tx_tq, &q->tx_task); 371 return (0); 372 } 373 374 static void 375 ntb_handle_tx(void *arg, int pending) 376 { 377 struct ntb_net_queue *q = arg; 378 379 mtx_lock(&q->tx_lock); 380 ntb_transmit_locked(q); 381 mtx_unlock(&q->tx_lock); 382 } 383 384 static void 385 ntb_qp_full(void *arg) 386 { 387 struct ntb_net_queue *q = arg; 388 389 CTR0(KTR_NTB, "TX: qp_full callout"); 390 if (ntb_transport_tx_free_entry(q->qp) > 0) 391 taskqueue_enqueue(q->tx_tq, &q->tx_task); 392 else 393 callout_schedule_sbt(&q->queue_full, 394 SBT_1MS / 4, SBT_1MS / 4, 0); 395 } 396 397 static void 398 ntb_qflush(if_t ifp) 399 { 400 struct ntb_net_ctx *sc = if_getsoftc(ifp); 401 struct ntb_net_queue *q; 402 struct mbuf *m; 403 int i; 404 405 for (i = 0; i < sc->num_queues; i++) { 406 q = &sc->queues[i]; 407 mtx_lock(&q->tx_lock); 408 while ((m = buf_ring_dequeue_sc(q->br)) != NULL) 409 m_freem(m); 410 mtx_unlock(&q->tx_lock); 411 } 412 if_qflush(ifp); 413 } 414 415 /* Network Device Callbacks */ 416 static void 417 ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, 418 int len) 419 { 420 421 m_freem(data); 422 CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data); 423 } 424 425 static void 426 ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, 427 int len) 428 { 429 struct ntb_net_queue *q = qp_data; 430 struct ntb_net_ctx *sc = q->sc; 431 struct mbuf *m = data; 432 if_t ifp = q->ifp; 433 uint16_t proto; 434 435 CTR1(KTR_NTB, "RX: rx handler (%d)", len); 436 if (len < 0) { 437 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 438 return; 439 } 440 441 m->m_pkthdr.rcvif = ifp; 442 if (sc->num_queues > 1) { 443 m->m_pkthdr.flowid = q - sc->queues; 444 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 445 } 446 if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { 447 m_copydata(m, 12, 2, (void *)&proto); 448 switch (ntohs(proto)) { 449 case ETHERTYPE_IP: 450 if (if_getcapenable(ifp) & IFCAP_RXCSUM) { 451 m->m_pkthdr.csum_data = 0xffff; 452 m->m_pkthdr.csum_flags = NTB_CSUM_SET; 453 } 454 break; 455 case ETHERTYPE_IPV6: 456 if (if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6) { 457 m->m_pkthdr.csum_data = 0xffff; 458 m->m_pkthdr.csum_flags = NTB_CSUM_SET; 459 } 460 break; 461 } 462 } 463 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 464 if_input(ifp, m); 465 } 466 467 static void 468 ntb_net_event_handler(void *data, enum ntb_link_event status) 469 { 470 struct ntb_net_queue *q = data; 471 int new_state; 472 473 switch (status) { 474 case NTB_LINK_DOWN: 475 new_state = LINK_STATE_DOWN; 476 break; 477 case NTB_LINK_UP: 478 new_state = LINK_STATE_UP; 479 break; 480 default: 481 new_state = LINK_STATE_UNKNOWN; 482 break; 483 } 484 if_link_state_change(q->ifp, new_state); 485 } 486 487 /* Helper functions */ 488 /* TODO: This too should really be part of the kernel */ 489 #define EUI48_MULTICAST 1 << 0 490 #define EUI48_LOCALLY_ADMINISTERED 1 << 1 491 static void 492 create_random_local_eui48(u_char *eaddr) 493 { 494 static uint8_t counter = 0; 495 uint32_t seed = ticks; 496 497 eaddr[0] = EUI48_LOCALLY_ADMINISTERED; 498 memcpy(&eaddr[1], &seed, sizeof(uint32_t)); 499 eaddr[5] = counter++; 500 } 501 502 static device_method_t ntb_net_methods[] = { 503 /* Device interface */ 504 DEVMETHOD(device_probe, ntb_net_probe), 505 DEVMETHOD(device_attach, ntb_net_attach), 506 DEVMETHOD(device_detach, ntb_net_detach), 507 DEVMETHOD_END 508 }; 509 510 devclass_t ntb_net_devclass; 511 static DEFINE_CLASS_0(ntb, ntb_net_driver, ntb_net_methods, 512 sizeof(struct ntb_net_ctx)); 513 DRIVER_MODULE(if_ntb, ntb_transport, ntb_net_driver, ntb_net_devclass, 514 NULL, NULL); 515 MODULE_DEPEND(if_ntb, ntb_transport, 1, 1, 1); 516 MODULE_VERSION(if_ntb, 1); 517