xref: /freebsd/share/man/man4/netmap.4 (revision 3ef51c5fb9163f2aafb1c14729e06a8bf0c4d113)
1.\" Copyright (c) 2011 Matteo Landi, Luigi Rizzo, Universita` di Pisa
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" This document is derived in part from the enet man page (enet.4)
26.\" distributed with 4.3BSD Unix.
27.\"
28.\" $FreeBSD$
29.\" $Id: netmap.4 9662 2011-11-16 13:18:06Z luigi $: stable/8/share/man/man4/bpf.4 181694 2008-08-13 17:45:06Z ed $
30.\"
31.Dd February 27, 2012
32.Dt NETMAP 4
33.Os
34.Sh NAME
35.Nm netmap
36.Nd a framework for fast packet I/O
37.Sh SYNOPSIS
38.Cd device netmap
39.Sh DESCRIPTION
40.Nm
41is a framework for fast and safe access to network devices
42(reaching 14.88 Mpps at less than 1 GHz).
43.Nm
44uses memory mapped buffers and metadata
45(buffer indexes and lengths) to communicate with the kernel,
46which is in charge of validating information through
47.Pa ioctl()
48and
49.Pa select()/poll().
50.Nm
51can exploit the parallelism in multiqueue devices and
52multicore systems.
53.Pp
54.Nm
55requires explicit support in device drivers.
56For a list of supported devices, see the end of this manual page.
57.Sh OPERATION
58.Nm
59clients must first open the
60.Pa open("/dev/netmap") ,
61and then issue an
62.Pa ioctl(...,NIOCREGIF,...)
63to bind the file descriptor to a network device.
64.Pp
65When a device is put in
66.Nm
67mode, its data path is disconnected from the host stack.
68The processes owning the file descriptor
69can exchange packets with the device, or with the host stack,
70through an mmapped memory region that contains pre-allocated
71buffers and metadata.
72.Pp
73Non blocking I/O is done with special
74.Pa ioctl()'s ,
75whereas the file descriptor can be passed to
76.Pa select()/poll()
77to be notified about incoming packet or available transmit buffers.
78.Ss Data structures
79All data structures for all devices in
80.Nm
81mode are in a memory
82region shared by the kernel and all processes
83who open
84.Pa /dev/netmap
85(NOTE: visibility may be restricted in future implementations).
86All references between the shared data structure
87are relative (offsets or indexes). Some macros help converting
88them into actual pointers.
89.Pp
90The data structures in shared memory are the following:
91.Bl -tag -width XXX
92.It Dv struct netmap_if (one per interface)
93indicates the number of rings supported by an interface, their
94sizes, and the offsets of the
95.Pa netmap_rings
96associated to the interface.
97The offset of a
98.Pa struct netmap_if
99in the shared memory region is indicated by the
100.Pa nr_offset
101field in the structure returned by the
102.Pa NIOCREGIF
103(see below).
104.Bd -literal
105struct netmap_if {
106    char ni_name[IFNAMSIZ]; /* name of the interface. */
107    const u_int ni_num_queues; /* number of hw ring pairs */
108    const ssize_t   ring_ofs[]; /* offset of tx and rx rings */
109};
110.Ed
111.It Dv struct netmap_ring (one per ring)
112contains the index of the current read or write slot (cur),
113the number of slots available for reception or transmission (avail),
114and an array of
115.Pa slots
116describing the buffers.
117There is one ring pair for each of the N hardware ring pairs
118supported by the card (numbered 0..N-1), plus
119one ring pair (numbered N) for packets from/to the host stack.
120.Bd -literal
121struct netmap_ring {
122    const ssize_t buf_ofs;
123    const uint32_t num_slots; /* number of slots in the ring. */
124    uint32_t avail;           /* number of usable slots */
125    uint32_t cur;             /* 'current' index for the user side */
126    uint32_t reserved;        /* not refilled before current */
127
128    const uint16_t nr_buf_size;
129    uint16_t flags;
130    struct netmap_slot slot[0]; /* array of slots. */
131}
132.Ed
133.It Dv struct netmap_slot (one per packet)
134contains the metadata for a packet: a buffer index (buf_idx),
135a buffer length (len), and some flags.
136.Bd -literal
137struct netmap_slot {
138    uint32_t buf_idx; /* buffer index */
139    uint16_t len;   /* packet length */
140    uint16_t flags; /* buf changed, etc. */
141#define NS_BUF_CHANGED  0x0001  /* must resync, buffer changed */
142#define NS_REPORT       0x0002  /* tell hw to report results
143                                 * e.g. by generating an interrupt
144                                 */
145};
146.Ed
147.It Dv packet buffers
148are fixed size (approximately 2k) buffers allocated by the kernel
149that contain packet data. Buffers addresses are computed through
150macros.
151.El
152.Pp
153Some macros support the access to objects in the shared memory
154region. In particular:
155.Bd -literal
156struct netmap_if *nifp;
157struct netmap_ring *txring = NETMAP_TXRING(nifp, i);
158struct netmap_ring *rxring = NETMAP_RXRING(nifp, i);
159int i = txring->slot[txring->cur].buf_idx;
160char *buf = NETMAP_BUF(txring, i);
161.Ed
162.Ss IOCTLS
163.Nm
164supports some ioctl() to synchronize the state of the rings
165between the kernel and the user processes, plus some
166to query and configure the interface.
167The former do not require any argument, whereas the latter
168use a
169.Pa struct netmap_req
170defined as follows:
171.Bd -literal
172struct nmreq {
173        char      nr_name[IFNAMSIZ];
174        uint32_t  nr_version;     /* API version */
175#define NETMAP_API      2         /* current version */
176        uint32_t  nr_offset;      /* nifp offset in the shared region */
177        uint32_t  nr_memsize;     /* size of the shared region */
178        uint32_t  nr_tx_slots;    /* slots in tx rings */
179        uint32_t  nr_rx_slots;    /* slots in rx rings */
180        uint16_t  nr_tx_rings;    /* number of tx rings */
181        uint16_t  nr_rx_rings;    /* number of tx rings */
182        uint16_t  nr_ringid;      /* ring(s) we care about */
183#define NETMAP_HW_RING  0x4000    /* low bits indicate one hw ring */
184#define NETMAP_SW_RING  0x2000    /* we process the sw ring */
185#define NETMAP_NO_TX_POLL 0x1000  /* no gratuitous txsync on poll */
186#define NETMAP_RING_MASK 0xfff    /* the actual ring number */
187};
188
189.Ed
190A device descriptor obtained through
191.Pa /dev/netmap
192also supports the ioctl supported by network devices.
193.Pp
194The netmap-specific
195.Xr ioctl 2
196command codes below are defined in
197.In net/netmap.h
198and are:
199.Bl -tag -width XXXX
200.It Dv NIOCGINFO
201returns information about the interface named in nr_name.
202On return, nr_memsize indicates the size of the shared netmap
203memory region (this is device-independent),
204nr_tx_slots and nr_rx_slots indicates how many buffers are in a
205transmit and receive ring,
206nr_tx_rings and nr_rx_rings indicates the number of transmit
207and receive rings supported by the hardware.
208.Pp
209If the device does not support netmap, the ioctl returns EINVAL.
210.It Dv NIOCREGIF
211puts the interface named in nr_name into netmap mode, disconnecting
212it from the host stack, and/or defines which rings are controlled
213through this file descriptor.
214On return, it gives the same info as NIOCGINFO, and nr_ringid
215indicates the identity of the rings controlled through the file
216descriptor.
217.Pp
218Possible values for nr_ringid are
219.Bl -tag -width XXXXX
220.It 0
221default, all hardware rings
222.It NETMAP_SW_RING
223the ``host rings'' connecting to the host stack
224.It NETMAP_HW_RING + i
225the i-th hardware ring
226.El
227By default, a
228.Nm poll
229or
230.Nm select
231call pushes out any pending packets on the transmit ring, even if
232no write events are specified.
233The feature can be disabled by or-ing
234.Nm NETMAP_NO_TX_SYNC
235to nr_ringid.
236But normally you should keep this feature unless you are using
237separate file descriptors for the send and receive rings, because
238otherwise packets are pushed out only if NETMAP_TXSYNC is called,
239or the send queue is full.
240.Pp
241.Pa NIOCREGIF
242can be used multiple times to change the association of a
243file descriptor to a ring pair, always within the same device.
244.It Dv NIOCUNREGIF
245brings an interface back to normal mode.
246.It Dv NIOCTXSYNC
247tells the hardware of new packets to transmit, and updates the
248number of slots available for transmission.
249.It Dv NIOCRXSYNC
250tells the hardware of consumed packets, and asks for newly available
251packets.
252.El
253.Ss SYSTEM CALLS
254.Nm
255uses
256.Nm select
257and
258.Nm poll
259to wake up processes when significant events occur.
260.Sh EXAMPLES
261The following code implements a traffic generator
262.Pp
263.Bd -literal -compact
264#include <net/netmap.h>
265#include <net/netmap_user.h>
266struct netmap_if *nifp;
267struct netmap_ring *ring;
268struct netmap_request nmr;
269
270fd = open("/dev/netmap", O_RDWR);
271bzero(&nmr, sizeof(nmr));
272strcpy(nmr.nm_name, "ix0");
273nmr.nm_version = NETMAP_API;
274ioctl(fd, NIOCREG, &nmr);
275p = mmap(0, nmr.memsize, fd);
276nifp = NETMAP_IF(p, nmr.offset);
277ring = NETMAP_TXRING(nifp, 0);
278fds.fd = fd;
279fds.events = POLLOUT;
280for (;;) {
281    poll(list, 1, -1);
282    while (ring->avail-- > 0) {
283        i = ring->cur;
284        buf = NETMAP_BUF(ring, ring->slot[i].buf_index);
285        ... prepare packet in buf ...
286        ring->slot[i].len = ... packet length ...
287        ring->cur = NETMAP_RING_NEXT(ring, i);
288    }
289}
290.Ed
291.Sh SUPPORTED INTERFACES
292.Nm
293supports the following interfaces:
294.Xr em 4 ,
295.Xr ixgbe 4 ,
296.Xr re 4 ,
297.Sh AUTHORS
298The
299.Nm
300framework has been designed and implemented by
301.An Luigi Rizzo
302and
303.An Matteo Landi
304in 2011 at the Universita` di Pisa.
305