xref: /freebsd/share/man/man4/netmap.4 (revision ce3adf4362fcca6a43e500b2531f0038adbfbd21)
1.\" Copyright (c) 2011 Matteo Landi, Luigi Rizzo, Universita` di Pisa
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" This document is derived in part from the enet man page (enet.4)
26.\" distributed with 4.3BSD Unix.
27.\"
28.\" $FreeBSD$
29.\" $Id: netmap.4 11563 2012-08-02 08:59:12Z luigi $: stable/8/share/man/man4/bpf.4 181694 2008-08-13 17:45:06Z ed $
30.\"
31.Dd February 27, 2012
32.Dt NETMAP 4
33.Os
34.Sh NAME
35.Nm netmap
36.Nd a framework for fast packet I/O
37.Sh SYNOPSIS
38.Cd device netmap
39.Sh DESCRIPTION
40.Nm
41is a framework for fast and safe access to network devices
42(reaching 14.88 Mpps at less than 1 GHz).
43.Nm
44uses memory mapped buffers and metadata
45(buffer indexes and lengths) to communicate with the kernel,
46which is in charge of validating information through
47.Pa ioctl()
48and
49.Pa select()/poll().
50.Nm
51can exploit the parallelism in multiqueue devices and
52multicore systems.
53.Pp
54.Nm
55requires explicit support in device drivers.
56For a list of supported devices, see the end of this manual page.
57.Sh OPERATION
58.Nm
59clients must first open the
60.Pa open("/dev/netmap") ,
61and then issue an
62.Pa ioctl(...,NIOCREGIF,...)
63to bind the file descriptor to a network device.
64.Pp
65When a device is put in
66.Nm
67mode, its data path is disconnected from the host stack.
68The processes owning the file descriptor
69can exchange packets with the device, or with the host stack,
70through an mmapped memory region that contains pre-allocated
71buffers and metadata.
72.Pp
73Non blocking I/O is done with special
74.Pa ioctl()'s ,
75whereas the file descriptor can be passed to
76.Pa select()/poll()
77to be notified about incoming packet or available transmit buffers.
78.Ss Data structures
79All data structures for all devices in
80.Nm
81mode are in a memory
82region shared by the kernel and all processes
83who open
84.Pa /dev/netmap
85(NOTE: visibility may be restricted in future implementations).
86All references between the shared data structure
87are relative (offsets or indexes). Some macros help converting
88them into actual pointers.
89.Pp
90The data structures in shared memory are the following:
91.Bl -tag -width XXX
92.It Dv struct netmap_if (one per interface)
93indicates the number of rings supported by an interface, their
94sizes, and the offsets of the
95.Pa netmap_rings
96associated to the interface.
97The offset of a
98.Pa struct netmap_if
99in the shared memory region is indicated by the
100.Pa nr_offset
101field in the structure returned by the
102.Pa NIOCREGIF
103(see below).
104.Bd -literal
105struct netmap_if {
106    char ni_name[IFNAMSIZ]; /* name of the interface. */
107    const u_int ni_num_queues; /* number of hw ring pairs */
108    const ssize_t   ring_ofs[]; /* offset of tx and rx rings */
109};
110.Ed
111.It Dv struct netmap_ring (one per ring)
112contains the index of the current read or write slot (cur),
113the number of slots available for reception or transmission (avail),
114and an array of
115.Pa slots
116describing the buffers.
117There is one ring pair for each of the N hardware ring pairs
118supported by the card (numbered 0..N-1), plus
119one ring pair (numbered N) for packets from/to the host stack.
120.Bd -literal
121struct netmap_ring {
122    const ssize_t buf_ofs;
123    const uint32_t num_slots; /* number of slots in the ring. */
124    uint32_t avail;           /* number of usable slots */
125    uint32_t cur;             /* 'current' index for the user side */
126    uint32_t reserved;        /* not refilled before current */
127
128    const uint16_t nr_buf_size;
129    uint16_t flags;
130    struct netmap_slot slot[0]; /* array of slots. */
131}
132.Ed
133.It Dv struct netmap_slot (one per packet)
134contains the metadata for a packet: a buffer index (buf_idx),
135a buffer length (len), and some flags.
136.Bd -literal
137struct netmap_slot {
138    uint32_t buf_idx; /* buffer index */
139    uint16_t len;   /* packet length */
140    uint16_t flags; /* buf changed, etc. */
141#define NS_BUF_CHANGED  0x0001  /* must resync, buffer changed */
142#define NS_REPORT       0x0002  /* tell hw to report results
143                                 * e.g. by generating an interrupt
144                                 */
145};
146.Ed
147.It Dv packet buffers
148are fixed size (approximately 2k) buffers allocated by the kernel
149that contain packet data. Buffers addresses are computed through
150macros.
151.El
152.Pp
153Some macros support the access to objects in the shared memory
154region. In particular:
155.Bd -literal
156struct netmap_if *nifp;
157struct netmap_ring *txring = NETMAP_TXRING(nifp, i);
158struct netmap_ring *rxring = NETMAP_RXRING(nifp, i);
159int i = txring->slot[txring->cur].buf_idx;
160char *buf = NETMAP_BUF(txring, i);
161.Ed
162.Sh IOCTLS
163.Nm
164supports some ioctl() to synchronize the state of the rings
165between the kernel and the user processes, plus some
166to query and configure the interface.
167The former do not require any argument, whereas the latter
168use a
169.Pa struct netmap_req
170defined as follows:
171.Bd -literal
172struct nmreq {
173        char      nr_name[IFNAMSIZ];
174        uint32_t  nr_version;     /* API version */
175#define NETMAP_API      3         /* current version */
176        uint32_t  nr_offset;      /* nifp offset in the shared region */
177        uint32_t  nr_memsize;     /* size of the shared region */
178        uint32_t  nr_tx_slots;    /* slots in tx rings */
179        uint32_t  nr_rx_slots;    /* slots in rx rings */
180        uint16_t  nr_tx_rings;    /* number of tx rings */
181        uint16_t  nr_rx_rings;    /* number of tx rings */
182        uint16_t  nr_ringid;      /* ring(s) we care about */
183#define NETMAP_HW_RING  0x4000    /* low bits indicate one hw ring */
184#define NETMAP_SW_RING  0x2000    /* we process the sw ring */
185#define NETMAP_NO_TX_POLL 0x1000  /* no gratuitous txsync on poll */
186#define NETMAP_RING_MASK 0xfff    /* the actual ring number */
187        uint16_t        spare1;
188        uint32_t        spare2[4];
189};
190
191.Ed
192A device descriptor obtained through
193.Pa /dev/netmap
194also supports the ioctl supported by network devices.
195.Pp
196The netmap-specific
197.Xr ioctl 2
198command codes below are defined in
199.In net/netmap.h
200and are:
201.Bl -tag -width XXXX
202.It Dv NIOCGINFO
203returns information about the interface named in nr_name.
204On return, nr_memsize indicates the size of the shared netmap
205memory region (this is device-independent),
206nr_tx_slots and nr_rx_slots indicates how many buffers are in a
207transmit and receive ring,
208nr_tx_rings and nr_rx_rings indicates the number of transmit
209and receive rings supported by the hardware.
210.Pp
211If the device does not support netmap, the ioctl returns EINVAL.
212.It Dv NIOCREGIF
213puts the interface named in nr_name into netmap mode, disconnecting
214it from the host stack, and/or defines which rings are controlled
215through this file descriptor.
216On return, it gives the same info as NIOCGINFO, and nr_ringid
217indicates the identity of the rings controlled through the file
218descriptor.
219.Pp
220Possible values for nr_ringid are
221.Bl -tag -width XXXXX
222.It 0
223default, all hardware rings
224.It NETMAP_SW_RING
225the ``host rings'' connecting to the host stack
226.It NETMAP_HW_RING + i
227the i-th hardware ring
228.El
229By default, a
230.Nm poll
231or
232.Nm select
233call pushes out any pending packets on the transmit ring, even if
234no write events are specified.
235The feature can be disabled by or-ing
236.Nm NETMAP_NO_TX_SYNC
237to nr_ringid.
238But normally you should keep this feature unless you are using
239separate file descriptors for the send and receive rings, because
240otherwise packets are pushed out only if NETMAP_TXSYNC is called,
241or the send queue is full.
242.Pp
243.Pa NIOCREGIF
244can be used multiple times to change the association of a
245file descriptor to a ring pair, always within the same device.
246.It Dv NIOCUNREGIF
247brings an interface back to normal mode.
248.It Dv NIOCTXSYNC
249tells the hardware of new packets to transmit, and updates the
250number of slots available for transmission.
251.It Dv NIOCRXSYNC
252tells the hardware of consumed packets, and asks for newly available
253packets.
254.El
255.Sh SYSTEM CALLS
256.Nm
257uses
258.Nm select
259and
260.Nm poll
261to wake up processes when significant events occur.
262.Sh EXAMPLES
263The following code implements a traffic generator
264.Pp
265.Bd -literal -compact
266#include <net/netmap.h>
267#include <net/netmap_user.h>
268struct netmap_if *nifp;
269struct netmap_ring *ring;
270struct netmap_request nmr;
271
272fd = open("/dev/netmap", O_RDWR);
273bzero(&nmr, sizeof(nmr));
274strcpy(nmr.nm_name, "ix0");
275nmr.nm_version = NETMAP_API;
276ioctl(fd, NIOCREG, &nmr);
277p = mmap(0, nmr.memsize, fd);
278nifp = NETMAP_IF(p, nmr.offset);
279ring = NETMAP_TXRING(nifp, 0);
280fds.fd = fd;
281fds.events = POLLOUT;
282for (;;) {
283    poll(list, 1, -1);
284    for ( ; ring->avail > 0 ; ring->avail--) {
285        i = ring->cur;
286        buf = NETMAP_BUF(ring, ring->slot[i].buf_index);
287        ... prepare packet in buf ...
288        ring->slot[i].len = ... packet length ...
289        ring->cur = NETMAP_RING_NEXT(ring, i);
290    }
291}
292.Ed
293.Sh SUPPORTED INTERFACES
294.Nm
295supports the following interfaces:
296.Xr em 4 ,
297.Xr igb 4 ,
298.Xr ixgbe 4 ,
299.Xr lem 4 ,
300.Xr re 4
301.Sh SEE ALSO
302.Xr vale 4
303.Pp
304http://info.iet.unipi.it/~luigi/netmap/
305.Pp
306Luigi Rizzo, Revisiting network I/O APIs: the netmap framework,
307Communications of the ACM, 55 (3), pp.45-51, March 2012
308.Pp
309Luigi Rizzo, netmap: a novel framework for fast packet I/O,
310Usenix ATC'12, June 2012, Boston
311.Sh AUTHORS
312.An -nosplit
313The
314.Nm
315framework has been designed and implemented at the
316Universita` di Pisa in 2011 by
317.An Luigi Rizzo ,
318with help from
319.An Matteo Landi ,
320.An Gaetano Catalli ,
321.An Giuseppe Lettieri .
322.Pp
323.Nm
324has been funded by the European Commission within FP7 Project CHANGE (257422).
325