1 /* 2 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 /* 27 * This module implements netmap support on top of standard, 28 * unmodified device drivers. 29 * 30 * A NIOCREGIF request is handled here if the device does not 31 * have native support. TX and RX rings are emulated as follows: 32 * 33 * NIOCREGIF 34 * We preallocate a block of TX mbufs (roughly as many as 35 * tx descriptors; the number is not critical) to speed up 36 * operation during transmissions. The refcount on most of 37 * these buffers is artificially bumped up so we can recycle 38 * them more easily. Also, the destructor is intercepted 39 * so we use it as an interrupt notification to wake up 40 * processes blocked on a poll(). 41 * 42 * For each receive ring we allocate one "struct mbq" 43 * (an mbuf tailq plus a spinlock). We intercept packets 44 * (through if_input) 45 * on the receive path and put them in the mbq from which 46 * netmap receive routines can grab them. 47 * 48 * TX: 49 * in the generic_txsync() routine, netmap buffers are copied 50 * (or linked, in a future) to the preallocated mbufs 51 * and pushed to the transmit queue. Some of these mbufs 52 * (those with NS_REPORT, or otherwise every half ring) 53 * have the refcount=1, others have refcount=2. 54 * When the destructor is invoked, we take that as 55 * a notification that all mbufs up to that one in 56 * the specific ring have been completed, and generate 57 * the equivalent of a transmit interrupt. 58 * 59 * RX: 60 * 61 */ 62 63 #ifdef __FreeBSD__ 64 65 #include <sys/cdefs.h> /* prerequisite */ 66 __FBSDID("$FreeBSD$"); 67 68 #include <sys/types.h> 69 #include <sys/errno.h> 70 #include <sys/malloc.h> 71 #include <sys/lock.h> /* PROT_EXEC */ 72 #include <sys/rwlock.h> 73 #include <sys/socket.h> /* sockaddrs */ 74 #include <sys/selinfo.h> 75 #include <net/if.h> 76 #include <net/if_var.h> 77 #include <machine/bus.h> /* bus_dmamap_* in netmap_kern.h */ 78 79 // XXX temporary - D() defined here 80 #include <net/netmap.h> 81 #include <dev/netmap/netmap_kern.h> 82 #include <dev/netmap/netmap_mem2.h> 83 84 #define rtnl_lock() ND("rtnl_lock called") 85 #define rtnl_unlock() ND("rtnl_unlock called") 86 #define MBUF_TXQ(m) ((m)->m_pkthdr.flowid) 87 #define MBUF_RXQ(m) ((m)->m_pkthdr.flowid) 88 #define smp_mb() 89 90 /* 91 * FreeBSD mbuf allocator/deallocator in emulation mode: 92 * 93 * We allocate EXT_PACKET mbuf+clusters, but need to set M_NOFREE 94 * so that the destructor, if invoked, will not free the packet. 95 * In principle we should set the destructor only on demand, 96 * but since there might be a race we better do it on allocation. 97 * As a consequence, we also need to set the destructor or we 98 * would leak buffers. 99 */ 100 101 /* 102 * mbuf wrappers 103 */ 104 105 /* mbuf destructor, also need to change the type to EXT_EXTREF, 106 * add an M_NOFREE flag, and then clear the flag and 107 * chain into uma_zfree(zone_pack, mf) 108 * (or reinstall the buffer ?) 109 */ 110 #define SET_MBUF_DESTRUCTOR(m, fn) do { \ 111 (m)->m_ext.ext_free = (void *)fn; \ 112 (m)->m_ext.ext_type = EXT_EXTREF; \ 113 } while (0) 114 115 static void 116 netmap_default_mbuf_destructor(struct mbuf *m) 117 { 118 /* restore original mbuf */ 119 m->m_ext.ext_buf = m->m_data = m->m_ext.ext_arg1; 120 m->m_ext.ext_arg1 = NULL; 121 m->m_ext.ext_type = EXT_PACKET; 122 m->m_ext.ext_free = NULL; 123 if (GET_MBUF_REFCNT(m) == 0) 124 SET_MBUF_REFCNT(m, 1); 125 uma_zfree(zone_pack, m); 126 } 127 128 static inline struct mbuf * 129 netmap_get_mbuf(int len) 130 { 131 struct mbuf *m; 132 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 133 if (m) { 134 m->m_flags |= M_NOFREE; /* XXXNP: Almost certainly incorrect. */ 135 m->m_ext.ext_arg1 = m->m_ext.ext_buf; // XXX save 136 m->m_ext.ext_free = (void *)netmap_default_mbuf_destructor; 137 m->m_ext.ext_type = EXT_EXTREF; 138 ND(5, "create m %p refcnt %d", m, GET_MBUF_REFCNT(m)); 139 } 140 return m; 141 } 142 143 144 145 #else /* linux */ 146 147 #include "bsd_glue.h" 148 149 #include <linux/rtnetlink.h> /* rtnl_[un]lock() */ 150 #include <linux/ethtool.h> /* struct ethtool_ops, get_ringparam */ 151 #include <linux/hrtimer.h> 152 153 //#define REG_RESET 154 155 #endif /* linux */ 156 157 158 /* Common headers. */ 159 #include <net/netmap.h> 160 #include <dev/netmap/netmap_kern.h> 161 #include <dev/netmap/netmap_mem2.h> 162 163 164 165 /* ======================== usage stats =========================== */ 166 167 #ifdef RATE_GENERIC 168 #define IFRATE(x) x 169 struct rate_stats { 170 unsigned long txpkt; 171 unsigned long txsync; 172 unsigned long txirq; 173 unsigned long rxpkt; 174 unsigned long rxirq; 175 unsigned long rxsync; 176 }; 177 178 struct rate_context { 179 unsigned refcount; 180 struct timer_list timer; 181 struct rate_stats new; 182 struct rate_stats old; 183 }; 184 185 #define RATE_PRINTK(_NAME_) \ 186 printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD); 187 #define RATE_PERIOD 2 188 static void rate_callback(unsigned long arg) 189 { 190 struct rate_context * ctx = (struct rate_context *)arg; 191 struct rate_stats cur = ctx->new; 192 int r; 193 194 RATE_PRINTK(txpkt); 195 RATE_PRINTK(txsync); 196 RATE_PRINTK(txirq); 197 RATE_PRINTK(rxpkt); 198 RATE_PRINTK(rxsync); 199 RATE_PRINTK(rxirq); 200 printk("\n"); 201 202 ctx->old = cur; 203 r = mod_timer(&ctx->timer, jiffies + 204 msecs_to_jiffies(RATE_PERIOD * 1000)); 205 if (unlikely(r)) 206 D("[v1000] Error: mod_timer()"); 207 } 208 209 static struct rate_context rate_ctx; 210 211 void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi) 212 { 213 if (txp) rate_ctx.new.txpkt++; 214 if (txs) rate_ctx.new.txsync++; 215 if (txi) rate_ctx.new.txirq++; 216 if (rxp) rate_ctx.new.rxpkt++; 217 if (rxs) rate_ctx.new.rxsync++; 218 if (rxi) rate_ctx.new.rxirq++; 219 } 220 221 #else /* !RATE */ 222 #define IFRATE(x) 223 #endif /* !RATE */ 224 225 226 /* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */ 227 228 /* 229 * Wrapper used by the generic adapter layer to notify 230 * the poller threads. Differently from netmap_rx_irq(), we check 231 * only NAF_NETMAP_ON instead of NAF_NATIVE_ON to enable the irq. 232 */ 233 static void 234 netmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done) 235 { 236 struct netmap_adapter *na = NA(ifp); 237 if (unlikely(!nm_netmap_on(na))) 238 return; 239 240 netmap_common_irq(ifp, q, work_done); 241 } 242 243 244 /* Enable/disable netmap mode for a generic network interface. */ 245 static int 246 generic_netmap_register(struct netmap_adapter *na, int enable) 247 { 248 struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; 249 struct mbuf *m; 250 int error; 251 int i, r; 252 253 if (!na) 254 return EINVAL; 255 256 #ifdef REG_RESET 257 error = ifp->netdev_ops->ndo_stop(ifp); 258 if (error) { 259 return error; 260 } 261 #endif /* REG_RESET */ 262 263 if (enable) { /* Enable netmap mode. */ 264 /* Init the mitigation support on all the rx queues. */ 265 gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit), 266 M_DEVBUF, M_NOWAIT | M_ZERO); 267 if (!gna->mit) { 268 D("mitigation allocation failed"); 269 error = ENOMEM; 270 goto out; 271 } 272 for (r=0; r<na->num_rx_rings; r++) 273 netmap_mitigation_init(&gna->mit[r], r, na); 274 275 /* Initialize the rx queue, as generic_rx_handler() can 276 * be called as soon as netmap_catch_rx() returns. 277 */ 278 for (r=0; r<na->num_rx_rings; r++) { 279 mbq_safe_init(&na->rx_rings[r].rx_queue); 280 } 281 282 /* 283 * Preallocate packet buffers for the tx rings. 284 */ 285 for (r=0; r<na->num_tx_rings; r++) 286 na->tx_rings[r].tx_pool = NULL; 287 for (r=0; r<na->num_tx_rings; r++) { 288 na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *), 289 M_DEVBUF, M_NOWAIT | M_ZERO); 290 if (!na->tx_rings[r].tx_pool) { 291 D("tx_pool allocation failed"); 292 error = ENOMEM; 293 goto free_tx_pools; 294 } 295 for (i=0; i<na->num_tx_desc; i++) 296 na->tx_rings[r].tx_pool[i] = NULL; 297 for (i=0; i<na->num_tx_desc; i++) { 298 m = netmap_get_mbuf(NETMAP_BUF_SIZE(na)); 299 if (!m) { 300 D("tx_pool[%d] allocation failed", i); 301 error = ENOMEM; 302 goto free_tx_pools; 303 } 304 na->tx_rings[r].tx_pool[i] = m; 305 } 306 } 307 rtnl_lock(); 308 /* Prepare to intercept incoming traffic. */ 309 error = netmap_catch_rx(gna, 1); 310 if (error) { 311 D("netdev_rx_handler_register() failed (%d)", error); 312 goto register_handler; 313 } 314 na->na_flags |= NAF_NETMAP_ON; 315 316 /* Make netmap control the packet steering. */ 317 netmap_catch_tx(gna, 1); 318 319 rtnl_unlock(); 320 321 #ifdef RATE_GENERIC 322 if (rate_ctx.refcount == 0) { 323 D("setup_timer()"); 324 memset(&rate_ctx, 0, sizeof(rate_ctx)); 325 setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx); 326 if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) { 327 D("Error: mod_timer()"); 328 } 329 } 330 rate_ctx.refcount++; 331 #endif /* RATE */ 332 333 } else if (na->tx_rings[0].tx_pool) { 334 /* Disable netmap mode. We enter here only if the previous 335 generic_netmap_register(na, 1) was successful. 336 If it was not, na->tx_rings[0].tx_pool was set to NULL by the 337 error handling code below. */ 338 rtnl_lock(); 339 340 na->na_flags &= ~NAF_NETMAP_ON; 341 342 /* Release packet steering control. */ 343 netmap_catch_tx(gna, 0); 344 345 /* Do not intercept packets on the rx path. */ 346 netmap_catch_rx(gna, 0); 347 348 rtnl_unlock(); 349 350 /* Free the mbufs going to the netmap rings */ 351 for (r=0; r<na->num_rx_rings; r++) { 352 mbq_safe_purge(&na->rx_rings[r].rx_queue); 353 mbq_safe_destroy(&na->rx_rings[r].rx_queue); 354 } 355 356 for (r=0; r<na->num_rx_rings; r++) 357 netmap_mitigation_cleanup(&gna->mit[r]); 358 free(gna->mit, M_DEVBUF); 359 360 for (r=0; r<na->num_tx_rings; r++) { 361 for (i=0; i<na->num_tx_desc; i++) { 362 m_freem(na->tx_rings[r].tx_pool[i]); 363 } 364 free(na->tx_rings[r].tx_pool, M_DEVBUF); 365 } 366 367 #ifdef RATE_GENERIC 368 if (--rate_ctx.refcount == 0) { 369 D("del_timer()"); 370 del_timer(&rate_ctx.timer); 371 } 372 #endif 373 } 374 375 #ifdef REG_RESET 376 error = ifp->netdev_ops->ndo_open(ifp); 377 if (error) { 378 goto free_tx_pools; 379 } 380 #endif 381 382 return 0; 383 384 register_handler: 385 rtnl_unlock(); 386 free_tx_pools: 387 for (r=0; r<na->num_tx_rings; r++) { 388 if (na->tx_rings[r].tx_pool == NULL) 389 continue; 390 for (i=0; i<na->num_tx_desc; i++) 391 if (na->tx_rings[r].tx_pool[i]) 392 m_freem(na->tx_rings[r].tx_pool[i]); 393 free(na->tx_rings[r].tx_pool, M_DEVBUF); 394 na->tx_rings[r].tx_pool = NULL; 395 } 396 for (r=0; r<na->num_rx_rings; r++) { 397 netmap_mitigation_cleanup(&gna->mit[r]); 398 mbq_safe_destroy(&na->rx_rings[r].rx_queue); 399 } 400 free(gna->mit, M_DEVBUF); 401 out: 402 403 return error; 404 } 405 406 /* 407 * Callback invoked when the device driver frees an mbuf used 408 * by netmap to transmit a packet. This usually happens when 409 * the NIC notifies the driver that transmission is completed. 410 */ 411 static void 412 generic_mbuf_destructor(struct mbuf *m) 413 { 414 netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL); 415 #ifdef __FreeBSD__ 416 if (netmap_verbose) 417 RD(5, "Tx irq (%p) queue %d index %d" , m, MBUF_TXQ(m), (int)(uintptr_t)m->m_ext.ext_arg1); 418 netmap_default_mbuf_destructor(m); 419 #endif /* __FreeBSD__ */ 420 IFRATE(rate_ctx.new.txirq++); 421 } 422 423 extern int netmap_adaptive_io; 424 425 /* Record completed transmissions and update hwtail. 426 * 427 * The oldest tx buffer not yet completed is at nr_hwtail + 1, 428 * nr_hwcur is the first unsent buffer. 429 */ 430 static u_int 431 generic_netmap_tx_clean(struct netmap_kring *kring) 432 { 433 u_int const lim = kring->nkr_num_slots - 1; 434 u_int nm_i = nm_next(kring->nr_hwtail, lim); 435 u_int hwcur = kring->nr_hwcur; 436 u_int n = 0; 437 struct mbuf **tx_pool = kring->tx_pool; 438 439 while (nm_i != hwcur) { /* buffers not completed */ 440 struct mbuf *m = tx_pool[nm_i]; 441 442 if (unlikely(m == NULL)) { 443 /* this is done, try to replenish the entry */ 444 tx_pool[nm_i] = m = netmap_get_mbuf(NETMAP_BUF_SIZE(kring->na)); 445 if (unlikely(m == NULL)) { 446 D("mbuf allocation failed, XXX error"); 447 // XXX how do we proceed ? break ? 448 return -ENOMEM; 449 } 450 } else if (GET_MBUF_REFCNT(m) != 1) { 451 break; /* This mbuf is still busy: its refcnt is 2. */ 452 } 453 n++; 454 nm_i = nm_next(nm_i, lim); 455 #if 0 /* rate adaptation */ 456 if (netmap_adaptive_io > 1) { 457 if (n >= netmap_adaptive_io) 458 break; 459 } else if (netmap_adaptive_io) { 460 /* if hwcur - nm_i < lim/8 do an early break 461 * so we prevent the sender from stalling. See CVT. 462 */ 463 if (hwcur >= nm_i) { 464 if (hwcur - nm_i < lim/2) 465 break; 466 } else { 467 if (hwcur + lim + 1 - nm_i < lim/2) 468 break; 469 } 470 } 471 #endif 472 } 473 kring->nr_hwtail = nm_prev(nm_i, lim); 474 ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail); 475 476 return n; 477 } 478 479 480 /* 481 * We have pending packets in the driver between nr_hwtail +1 and hwcur. 482 * Compute a position in the middle, to be used to generate 483 * a notification. 484 */ 485 static inline u_int 486 generic_tx_event_middle(struct netmap_kring *kring, u_int hwcur) 487 { 488 u_int n = kring->nkr_num_slots; 489 u_int ntc = nm_next(kring->nr_hwtail, n-1); 490 u_int e; 491 492 if (hwcur >= ntc) { 493 e = (hwcur + ntc) / 2; 494 } else { /* wrap around */ 495 e = (hwcur + n + ntc) / 2; 496 if (e >= n) { 497 e -= n; 498 } 499 } 500 501 if (unlikely(e >= n)) { 502 D("This cannot happen"); 503 e = 0; 504 } 505 506 return e; 507 } 508 509 /* 510 * We have pending packets in the driver between nr_hwtail+1 and hwcur. 511 * Schedule a notification approximately in the middle of the two. 512 * There is a race but this is only called within txsync which does 513 * a double check. 514 */ 515 static void 516 generic_set_tx_event(struct netmap_kring *kring, u_int hwcur) 517 { 518 struct mbuf *m; 519 u_int e; 520 521 if (nm_next(kring->nr_hwtail, kring->nkr_num_slots -1) == hwcur) { 522 return; /* all buffers are free */ 523 } 524 e = generic_tx_event_middle(kring, hwcur); 525 526 m = kring->tx_pool[e]; 527 ND(5, "Request Event at %d mbuf %p refcnt %d", e, m, m ? GET_MBUF_REFCNT(m) : -2 ); 528 if (m == NULL) { 529 /* This can happen if there is already an event on the netmap 530 slot 'e': There is nothing to do. */ 531 return; 532 } 533 kring->tx_pool[e] = NULL; 534 SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor); 535 536 // XXX wmb() ? 537 /* Decrement the refcount an free it if we have the last one. */ 538 m_freem(m); 539 smp_mb(); 540 } 541 542 543 /* 544 * generic_netmap_txsync() transforms netmap buffers into mbufs 545 * and passes them to the standard device driver 546 * (ndo_start_xmit() or ifp->if_transmit() ). 547 * On linux this is not done directly, but using dev_queue_xmit(), 548 * since it implements the TX flow control (and takes some locks). 549 */ 550 static int 551 generic_netmap_txsync(struct netmap_kring *kring, int flags) 552 { 553 struct netmap_adapter *na = kring->na; 554 struct ifnet *ifp = na->ifp; 555 struct netmap_ring *ring = kring->ring; 556 u_int nm_i; /* index into the netmap ring */ // j 557 u_int const lim = kring->nkr_num_slots - 1; 558 u_int const head = kring->rhead; 559 u_int ring_nr = kring->ring_id; 560 561 IFRATE(rate_ctx.new.txsync++); 562 563 // TODO: handle the case of mbuf allocation failure 564 565 rmb(); 566 567 /* 568 * First part: process new packets to send. 569 */ 570 nm_i = kring->nr_hwcur; 571 if (nm_i != head) { /* we have new packets to send */ 572 while (nm_i != head) { 573 struct netmap_slot *slot = &ring->slot[nm_i]; 574 u_int len = slot->len; 575 void *addr = NMB(na, slot); 576 577 /* device-specific */ 578 struct mbuf *m; 579 int tx_ret; 580 581 NM_CHECK_ADDR_LEN(na, addr, len); 582 583 /* Tale a mbuf from the tx pool and copy in the user packet. */ 584 m = kring->tx_pool[nm_i]; 585 if (unlikely(!m)) { 586 RD(5, "This should never happen"); 587 kring->tx_pool[nm_i] = m = netmap_get_mbuf(NETMAP_BUF_SIZE(na)); 588 if (unlikely(m == NULL)) { 589 D("mbuf allocation failed"); 590 break; 591 } 592 } 593 /* XXX we should ask notifications when NS_REPORT is set, 594 * or roughly every half frame. We can optimize this 595 * by lazily requesting notifications only when a 596 * transmission fails. Probably the best way is to 597 * break on failures and set notifications when 598 * ring->cur == ring->tail || nm_i != cur 599 */ 600 tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr); 601 if (unlikely(tx_ret)) { 602 ND(5, "start_xmit failed: err %d [nm_i %u, head %u, hwtail %u]", 603 tx_ret, nm_i, head, kring->nr_hwtail); 604 /* 605 * No room for this mbuf in the device driver. 606 * Request a notification FOR A PREVIOUS MBUF, 607 * then call generic_netmap_tx_clean(kring) to do the 608 * double check and see if we can free more buffers. 609 * If there is space continue, else break; 610 * NOTE: the double check is necessary if the problem 611 * occurs in the txsync call after selrecord(). 612 * Also, we need some way to tell the caller that not 613 * all buffers were queued onto the device (this was 614 * not a problem with native netmap driver where space 615 * is preallocated). The bridge has a similar problem 616 * and we solve it there by dropping the excess packets. 617 */ 618 generic_set_tx_event(kring, nm_i); 619 if (generic_netmap_tx_clean(kring)) { /* space now available */ 620 continue; 621 } else { 622 break; 623 } 624 } 625 slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); 626 nm_i = nm_next(nm_i, lim); 627 IFRATE(rate_ctx.new.txpkt ++); 628 } 629 630 /* Update hwcur to the next slot to transmit. */ 631 kring->nr_hwcur = nm_i; /* not head, we could break early */ 632 } 633 634 /* 635 * Second, reclaim completed buffers 636 */ 637 if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { 638 /* No more available slots? Set a notification event 639 * on a netmap slot that will be cleaned in the future. 640 * No doublecheck is performed, since txsync() will be 641 * called twice by netmap_poll(). 642 */ 643 generic_set_tx_event(kring, nm_i); 644 } 645 ND("tx #%d, hwtail = %d", n, kring->nr_hwtail); 646 647 generic_netmap_tx_clean(kring); 648 649 return 0; 650 } 651 652 653 /* 654 * This handler is registered (through netmap_catch_rx()) 655 * within the attached network interface 656 * in the RX subsystem, so that every mbuf passed up by 657 * the driver can be stolen to the network stack. 658 * Stolen packets are put in a queue where the 659 * generic_netmap_rxsync() callback can extract them. 660 */ 661 void 662 generic_rx_handler(struct ifnet *ifp, struct mbuf *m) 663 { 664 struct netmap_adapter *na = NA(ifp); 665 struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; 666 u_int work_done; 667 u_int rr = MBUF_RXQ(m); // receive ring number 668 669 if (rr >= na->num_rx_rings) { 670 rr = rr % na->num_rx_rings; // XXX expensive... 671 } 672 673 /* limit the size of the queue */ 674 if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) { 675 m_freem(m); 676 } else { 677 mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m); 678 } 679 680 if (netmap_generic_mit < 32768) { 681 /* no rx mitigation, pass notification up */ 682 netmap_generic_irq(na->ifp, rr, &work_done); 683 IFRATE(rate_ctx.new.rxirq++); 684 } else { 685 /* same as send combining, filter notification if there is a 686 * pending timer, otherwise pass it up and start a timer. 687 */ 688 if (likely(netmap_mitigation_active(&gna->mit[rr]))) { 689 /* Record that there is some pending work. */ 690 gna->mit[rr].mit_pending = 1; 691 } else { 692 netmap_generic_irq(na->ifp, rr, &work_done); 693 IFRATE(rate_ctx.new.rxirq++); 694 netmap_mitigation_start(&gna->mit[rr]); 695 } 696 } 697 } 698 699 /* 700 * generic_netmap_rxsync() extracts mbufs from the queue filled by 701 * generic_netmap_rx_handler() and puts their content in the netmap 702 * receive ring. 703 * Access must be protected because the rx handler is asynchronous, 704 */ 705 static int 706 generic_netmap_rxsync(struct netmap_kring *kring, int flags) 707 { 708 struct netmap_ring *ring = kring->ring; 709 struct netmap_adapter *na = kring->na; 710 u_int nm_i; /* index into the netmap ring */ //j, 711 u_int n; 712 u_int const lim = kring->nkr_num_slots - 1; 713 u_int const head = kring->rhead; 714 int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; 715 716 if (head > lim) 717 return netmap_ring_reinit(kring); 718 719 /* 720 * First part: import newly received packets. 721 */ 722 if (netmap_no_pendintr || force_update) { 723 /* extract buffers from the rx queue, stop at most one 724 * slot before nr_hwcur (stop_i) 725 */ 726 uint16_t slot_flags = kring->nkr_slot_flags; 727 u_int stop_i = nm_prev(kring->nr_hwcur, lim); 728 729 nm_i = kring->nr_hwtail; /* first empty slot in the receive ring */ 730 for (n = 0; nm_i != stop_i; n++) { 731 int len; 732 void *addr = NMB(na, &ring->slot[nm_i]); 733 struct mbuf *m; 734 735 /* we only check the address here on generic rx rings */ 736 if (addr == NETMAP_BUF_BASE(na)) { /* Bad buffer */ 737 return netmap_ring_reinit(kring); 738 } 739 /* 740 * Call the locked version of the function. 741 * XXX Ideally we could grab a batch of mbufs at once 742 * and save some locking overhead. 743 */ 744 m = mbq_safe_dequeue(&kring->rx_queue); 745 if (!m) /* no more data */ 746 break; 747 len = MBUF_LEN(m); 748 m_copydata(m, 0, len, addr); 749 ring->slot[nm_i].len = len; 750 ring->slot[nm_i].flags = slot_flags; 751 m_freem(m); 752 nm_i = nm_next(nm_i, lim); 753 } 754 if (n) { 755 kring->nr_hwtail = nm_i; 756 IFRATE(rate_ctx.new.rxpkt += n); 757 } 758 kring->nr_kflags &= ~NKR_PENDINTR; 759 } 760 761 // XXX should we invert the order ? 762 /* 763 * Second part: skip past packets that userspace has released. 764 */ 765 nm_i = kring->nr_hwcur; 766 if (nm_i != head) { 767 /* Userspace has released some packets. */ 768 for (n = 0; nm_i != head; n++) { 769 struct netmap_slot *slot = &ring->slot[nm_i]; 770 771 slot->flags &= ~NS_BUF_CHANGED; 772 nm_i = nm_next(nm_i, lim); 773 } 774 kring->nr_hwcur = head; 775 } 776 IFRATE(rate_ctx.new.rxsync++); 777 778 return 0; 779 } 780 781 static void 782 generic_netmap_dtor(struct netmap_adapter *na) 783 { 784 struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na; 785 struct ifnet *ifp = netmap_generic_getifp(gna); 786 struct netmap_adapter *prev_na = gna->prev; 787 788 if (prev_na != NULL) { 789 D("Released generic NA %p", gna); 790 if_rele(ifp); 791 netmap_adapter_put(prev_na); 792 if (na->ifp == NULL) { 793 /* 794 * The driver has been removed without releasing 795 * the reference so we need to do it here. 796 */ 797 netmap_adapter_put(prev_na); 798 } 799 } 800 WNA(ifp) = prev_na; 801 D("Restored native NA %p", prev_na); 802 na->ifp = NULL; 803 } 804 805 /* 806 * generic_netmap_attach() makes it possible to use netmap on 807 * a device without native netmap support. 808 * This is less performant than native support but potentially 809 * faster than raw sockets or similar schemes. 810 * 811 * In this "emulated" mode, netmap rings do not necessarily 812 * have the same size as those in the NIC. We use a default 813 * value and possibly override it if the OS has ways to fetch the 814 * actual configuration. 815 */ 816 int 817 generic_netmap_attach(struct ifnet *ifp) 818 { 819 struct netmap_adapter *na; 820 struct netmap_generic_adapter *gna; 821 int retval; 822 u_int num_tx_desc, num_rx_desc; 823 824 num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */ 825 826 generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); /* ignore errors */ 827 ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc); 828 if (num_tx_desc == 0 || num_rx_desc == 0) { 829 D("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc); 830 return EINVAL; 831 } 832 833 gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO); 834 if (gna == NULL) { 835 D("no memory on attach, give up"); 836 return ENOMEM; 837 } 838 na = (struct netmap_adapter *)gna; 839 strncpy(na->name, ifp->if_xname, sizeof(na->name)); 840 na->ifp = ifp; 841 na->num_tx_desc = num_tx_desc; 842 na->num_rx_desc = num_rx_desc; 843 na->nm_register = &generic_netmap_register; 844 na->nm_txsync = &generic_netmap_txsync; 845 na->nm_rxsync = &generic_netmap_rxsync; 846 na->nm_dtor = &generic_netmap_dtor; 847 /* when using generic, NAF_NETMAP_ON is set so we force 848 * NAF_SKIP_INTR to use the regular interrupt handler 849 */ 850 na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS; 851 852 ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)", 853 ifp->num_tx_queues, ifp->real_num_tx_queues, 854 ifp->tx_queue_len); 855 ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)", 856 ifp->num_rx_queues, ifp->real_num_rx_queues); 857 858 generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings); 859 860 retval = netmap_attach_common(na); 861 if (retval) { 862 free(gna, M_DEVBUF); 863 } 864 865 return retval; 866 } 867