xref: /freebsd/share/man/man4/netmap.4 (revision 9124ddeb4a551977cf6b2218291e7c666ce25f47)
1.\" Copyright (c) 2011 Matteo Landi, Luigi Rizzo, Universita` di Pisa
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" This document is derived in part from the enet man page (enet.4)
26.\" distributed with 4.3BSD Unix.
27.\"
28.\" $FreeBSD$
29.\" $Id: netmap.4 9662 2011-11-16 13:18:06Z luigi $: stable/8/share/man/man4/bpf.4 181694 2008-08-13 17:45:06Z ed $
30.\"
31.Dd February 27, 2012
32.Dt NETMAP 4
33.Os
34.Sh NAME
35.Nm netmap
36.Nd a framework for fast packet I/O
37.Sh SYNOPSIS
38.Cd device netmap
39.Sh DESCRIPTION
40.Nm
41is a framework for fast and safe access to network devices
42(reaching 14.88 Mpps at less than 1 GHz).
43.Nm
44uses memory mapped buffers and metadata
45(buffer indexes and lengths) to communicate with the kernel,
46which is in charge of validating information through
47.Pa ioctl()
48and
49.Pa select()/poll().
50.Nm
51can exploit the parallelism in multiqueue devices and
52multicore systems.
53.Pp
54.Pp
55.Nm
56requires explicit support in device drivers.
57For a list of supported devices, see the end of this manual page.
58.Sh OPERATION
59.Nm
60clients must first open the
61.Pa open("/dev/netmap") ,
62and then issue an
63.Pa ioctl(...,NIOCREGIF,...)
64to bind the file descriptor to a network device.
65.Pp
66When a device is put in
67.Nm
68mode, its data path is disconnected from the host stack.
69The processes owning the file descriptor
70can exchange packets with the device, or with the host stack,
71through an mmapped memory region that contains pre-allocated
72buffers and metadata.
73.Pp
74Non blocking I/O is done with special
75.Pa ioctl()'s ,
76whereas the file descriptor can be passed to
77.Pa select()/poll()
78to be notified about incoming packet or available transmit buffers.
79.Ss Data structures
80All data structures for all devices in
81.Nm
82mode are in a memory
83region shared by the kernel and all processes
84who open
85.Pa /dev/netmap
86(NOTE: visibility may be restricted in future implementations).
87All references between the shared data structure
88are relative (offsets or indexes). Some macros help converting
89them into actual pointers.
90.Pp
91The data structures in shared memory are the following:
92.Pp
93.Bl -tag -width XXX
94.It Dv struct netmap_if (one per interface)
95indicates the number of rings supported by an interface, their
96sizes, and the offsets of the
97.Pa netmap_rings
98associated to the interface.
99The offset of a
100.Pa struct netmap_if
101in the shared memory region is indicated by the
102.Pa nr_offset
103field in the structure returned by the
104.Pa NIOCREGIF
105(see below).
106.Bd -literal
107struct netmap_if {
108    char ni_name[IFNAMSIZ]; /* name of the interface. */
109    const u_int ni_num_queues; /* number of hw ring pairs */
110    const ssize_t   ring_ofs[]; /* offset of tx and rx rings */
111};
112.Ed
113.It Dv struct netmap_ring (one per ring)
114contains the index of the current read or write slot (cur),
115the number of slots available for reception or transmission (avail),
116and an array of
117.Pa slots
118describing the buffers.
119There is one ring pair for each of the N hardware ring pairs
120supported by the card (numbered 0..N-1), plus
121one ring pair (numbered N) for packets from/to the host stack.
122.Bd -literal
123struct netmap_ring {
124    const ssize_t buf_ofs;
125    const uint32_t num_slots; /* number of slots in the ring. */
126    uint32_t avail;           /* number of usable slots */
127    uint32_t cur;             /* 'current' index for the user side */
128    uint32_t reserved;        /* not refilled before current */
129
130    const uint16_t nr_buf_size;
131    uint16_t flags;
132    struct netmap_slot slot[0]; /* array of slots. */
133}
134.Ed
135.It Dv struct netmap_slot (one per packet)
136contains the metadata for a packet: a buffer index (buf_idx),
137a buffer length (len), and some flags.
138.Bd -literal
139struct netmap_slot {
140    uint32_t buf_idx; /* buffer index */
141    uint16_t len;   /* packet length */
142    uint16_t flags; /* buf changed, etc. */
143#define NS_BUF_CHANGED  0x0001  /* must resync, buffer changed */
144#define NS_REPORT       0x0002  /* tell hw to report results
145                                 * e.g. by generating an interrupt
146                                 */
147};
148.Ed
149.It Dv packet buffers
150are fixed size (approximately 2k) buffers allocated by the kernel
151that contain packet data. Buffers addresses are computed through
152macros.
153.El
154.Pp
155Some macros support the access to objects in the shared memory
156region. In particular:
157.Bd -literal
158struct netmap_if *nifp;
159struct netmap_ring *txring = NETMAP_TXRING(nifp, i);
160struct netmap_ring *rxring = NETMAP_RXRING(nifp, i);
161int i = txring->slot[txring->cur].buf_idx;
162char *buf = NETMAP_BUF(txring, i);
163.Ed
164.Ss IOCTLS
165.Pp
166.Nm
167supports some ioctl() to synchronize the state of the rings
168between the kernel and the user processes, plus some
169to query and configure the interface.
170The former do not require any argument, whereas the latter
171use a
172.Pa struct netmap_req
173defined as follows:
174.Bd -literal
175struct nmreq {
176        char      nr_name[IFNAMSIZ];
177        uint32_t  nr_version;     /* API version */
178#define NETMAP_API      2         /* current version */
179        uint32_t  nr_offset;      /* nifp offset in the shared region */
180        uint32_t  nr_memsize;     /* size of the shared region */
181        uint32_t  nr_tx_slots;    /* slots in tx rings */
182        uint32_t  nr_rx_slots;    /* slots in rx rings */
183        uint16_t  nr_tx_rings;    /* number of tx rings */
184        uint16_t  nr_rx_rings;    /* number of tx rings */
185        uint16_t  nr_ringid;      /* ring(s) we care about */
186#define NETMAP_HW_RING  0x4000    /* low bits indicate one hw ring */
187#define NETMAP_SW_RING  0x2000    /* we process the sw ring */
188#define NETMAP_NO_TX_POLL 0x1000  /* no gratuitous txsync on poll */
189#define NETMAP_RING_MASK 0xfff    /* the actual ring number */
190};
191
192.Ed
193A device descriptor obtained through
194.Pa /dev/netmap
195also supports the ioctl supported by network devices.
196.Pp
197The netmap-specific
198.Xr ioctl 2
199command codes below are defined in
200.In net/netmap.h
201and are:
202.Bl -tag -width XXXX
203.It Dv NIOCGINFO
204returns information about the interface named in nr_name.
205On return, nr_memsize indicates the size of the shared netmap
206memory region (this is device-independent),
207nr_tx_slots and nr_rx_slots indicates how many buffers are in a
208transmit and receive ring,
209nr_tx_rings and nr_rx_rings indicates the number of transmit
210and receive rings supported by the hardware.
211.Pp
212If the device does not support netmap, the ioctl returns EINVAL.
213.It Dv NIOCREGIF
214puts the interface named in nr_name into netmap mode, disconnecting
215it from the host stack, and/or defines which rings are controlled
216through this file descriptor.
217On return, it gives the same info as NIOCGINFO, and nr_ringid
218indicates the identity of the rings controlled through the file
219descriptor.
220.Pp
221Possible values for nr_ringid are
222.Bl -tag -width XXXXX
223.It 0
224default, all hardware rings
225.It NETMAP_SW_RING
226the ``host rings'' connecting to the host stack
227.It NETMAP_HW_RING + i
228the i-th hardware ring
229.El
230By default, a
231.Nm poll
232or
233.Nm select
234call pushes out any pending packets on the transmit ring, even if
235no write events are specified.
236The feature can be disabled by or-ing
237.Nm NETMAP_NO_TX_SYNC
238to nr_ringid.
239But normally you should keep this feature unless you are using
240separate file descriptors for the send and receive rings, because
241otherwise packets are pushed out only if NETMAP_TXSYNC is called,
242or the send queue is full.
243.Pp
244.Pa NIOCREGIF
245can be used multiple times to change the association of a
246file descriptor to a ring pair, always within the same device.
247.It Dv NIOCUNREGIF
248brings an interface back to normal mode.
249.It Dv NIOCTXSYNC
250tells the hardware of new packets to transmit, and updates the
251number of slots available for transmission.
252.It Dv NIOCRXSYNC
253tells the hardware of consumed packets, and asks for newly available
254packets.
255.El
256.Ss SYSTEM CALLS
257.Nm
258uses
259.Nm select
260and
261.Nm poll
262to wake up processes when significant events occur.
263.Sh EXAMPLES
264The following code implements a traffic generator
265.Pp
266.Bd -literal -compact
267#include <net/netmap.h>
268#include <net/netmap_user.h>
269struct netmap_if *nifp;
270struct netmap_ring *ring;
271struct netmap_request nmr;
272
273fd = open("/dev/netmap", O_RDWR);
274bzero(&nmr, sizeof(nmr));
275strcpy(nmr.nm_name, "ix0");
276nmr.nm_version = NETMAP_API;
277ioctl(fd, NIOCREG, &nmr);
278p = mmap(0, nmr.memsize, fd);
279nifp = NETMAP_IF(p, nmr.offset);
280ring = NETMAP_TXRING(nifp, 0);
281fds.fd = fd;
282fds.events = POLLOUT;
283for (;;) {
284    poll(list, 1, -1);
285    while (ring->avail-- > 0) {
286        i = ring->cur;
287        buf = NETMAP_BUF(ring, ring->slot[i].buf_index);
288        ... prepare packet in buf ...
289        ring->slot[i].len = ... packet length ...
290        ring->cur = NETMAP_RING_NEXT(ring, i);
291    }
292}
293.Ed
294.Sh SUPPORTED INTERFACES
295.Nm
296supports the following interfaces:
297.Xr em 4 ,
298.Xr ixgbe 4 ,
299.Xr re 4 ,
300.Sh AUTHORS
301The
302.Nm
303framework has been designed and implemented by
304.An Luigi Rizzo
305and
306.An Matteo Landi
307in 2011 at the Universita` di Pisa.
308