xref: /freebsd/sys/net/netmap_user.h (revision 529a53abe2287eae08a3af62749273df775254e9)
1 /*
2  * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright
9  *      notice, this list of conditions and the following disclaimer.
10  *   2. Redistributions in binary form must reproduce the above copyright
11  *      notice, this list of conditions and the following disclaimer in the
12  *      documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * $FreeBSD$
29  *
30  * Functions and macros to manipulate netmap structures and packets
31  * in userspace. See netmap(4) for more information.
32  *
33  * The address of the struct netmap_if, say nifp, is computed from the
34  * value returned from ioctl(.., NIOCREG, ...) and the mmap region:
35  *	ioctl(fd, NIOCREG, &req);
36  *	mem = mmap(0, ... );
37  *	nifp = NETMAP_IF(mem, req.nr_nifp);
38  *		(so simple, we could just do it manually)
39  *
40  * From there:
41  *	struct netmap_ring *NETMAP_TXRING(nifp, index)
42  *	struct netmap_ring *NETMAP_RXRING(nifp, index)
43  *		we can access ring->nr_cur, ring->nr_avail, ring->nr_flags
44  *
45  *	ring->slot[i] gives us the i-th slot (we can access
46  *		directly len, flags, buf_idx)
47  *
48  *	char *buf = NETMAP_BUF(ring, x) returns a pointer to
49  *		the buffer numbered x
50  *
51  * All ring indexes (head, cur, tail) should always move forward.
52  * To compute the next index in a circular ring you can use
53  *	i = nm_ring_next(ring, i);
54  *
55  * To ease porting apps from pcap to netmap we supply a few fuctions
56  * that can be called to open, close, read and write on netmap in a way
57  * similar to libpcap. Note that the read/write function depend on
58  * an ioctl()/select()/poll() being issued to refill rings or push
59  * packets out.
60  *
61  * In order to use these, include #define NETMAP_WITH_LIBS
62  * in the source file that invokes these functions.
63  */
64 
65 #ifndef _NET_NETMAP_USER_H_
66 #define _NET_NETMAP_USER_H_
67 
68 #include <stdint.h>
69 #include <net/if.h>		/* IFNAMSIZ */
70 
71 #ifndef likely
72 #define likely(x)	__builtin_expect(!!(x), 1)
73 #define unlikely(x)	__builtin_expect(!!(x), 0)
74 #endif /* likely and unlikely */
75 
76 #include <net/netmap.h>
77 
78 /* helper macro */
79 #define _NETMAP_OFFSET(type, ptr, offset) \
80 	((type)(void *)((char *)(ptr) + (offset)))
81 
82 #define NETMAP_IF(_base, _ofs)	_NETMAP_OFFSET(struct netmap_if *, _base, _ofs)
83 
84 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \
85 	nifp, (nifp)->ring_ofs[index] )
86 
87 #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *,	\
88 	nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] )
89 
90 #define NETMAP_BUF(ring, index)				\
91 	((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size))
92 
93 #define NETMAP_BUF_IDX(ring, buf)			\
94 	( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \
95 		(ring)->nr_buf_size )
96 
97 
98 static inline uint32_t
99 nm_ring_next(struct netmap_ring *r, uint32_t i)
100 {
101 	return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1);
102 }
103 
104 
105 /*
106  * Return 1 if we have pending transmissions in the tx ring.
107  * When everything is complete ring->cur = ring->tail + 1 (modulo ring size)
108  */
109 static inline int
110 nm_tx_pending(struct netmap_ring *r)
111 {
112 	return nm_ring_next(r, r->tail) != r->cur;
113 }
114 
115 
116 static inline uint32_t
117 nm_ring_space(struct netmap_ring *ring)
118 {
119         int ret = ring->tail - ring->cur;
120         if (ret < 0)
121                 ret += ring->num_slots;
122         return ret;
123 }
124 
125 
126 #ifdef NETMAP_WITH_LIBS
127 /*
128  * Support for simple I/O libraries.
129  * Include other system headers required for compiling this.
130  */
131 
132 #ifndef HAVE_NETMAP_WITH_LIBS
133 #define HAVE_NETMAP_WITH_LIBS
134 
135 #include <sys/time.h>
136 #include <sys/mman.h>
137 #include <string.h>	/* memset */
138 #include <sys/ioctl.h>
139 #include <sys/errno.h>	/* EINVAL */
140 #include <fcntl.h>	/* O_RDWR */
141 #include <unistd.h>	/* close() */
142 #include <signal.h>
143 #include <stdlib.h>
144 
145 struct nm_hdr_t {	/* same as pcap_pkthdr */
146 	struct timeval	ts;
147 	uint32_t	caplen;
148 	uint32_t	len;
149 };
150 
151 struct nm_stat_t { // pcap_stat
152 	u_int	ps_recv;
153 	u_int	ps_drop;
154 	u_int	ps_ifdrop;
155 #ifdef WIN32
156 	u_int	bs_capt;
157 #endif /* WIN32 */
158 };
159 
160 #define NM_ERRBUF_SIZE	512
161 
162 struct nm_desc_t {
163 	struct nm_desc_t *self;
164 	int fd;
165 	void *mem;
166 	int memsize;
167 	struct netmap_if *nifp;
168 	uint16_t first_tx_ring, last_tx_ring, cur_tx_ring;
169 	uint16_t first_rx_ring, last_rx_ring, cur_rx_ring;
170 	struct nmreq req;	/* also contains the nr_name = ifname */
171 	struct nm_hdr_t hdr;
172 
173 	struct netmap_ring *tx, *rx;	/* shortcuts to base hw/sw rings */
174 
175 	/* parameters from pcap_open_live */
176 	int snaplen;
177 	int promisc;
178 	int to_ms;
179 	char *errbuf;
180 
181 	/* save flags so we can restore them on close */
182 	uint32_t if_flags;
183         uint32_t if_reqcap;
184         uint32_t if_curcap;
185 
186 	struct nm_stat_t st;
187 	char msg[NM_ERRBUF_SIZE];
188 };
189 
190 /*
191  * when the descriptor is open correctly, d->self == d
192  * Eventually we should also use some magic number.
193  */
194 #define P2NMD(p)		((struct nm_desc_t *)(p))
195 #define IS_NETMAP_DESC(d)	(P2NMD(d)->self == P2NMD(d))
196 #define NETMAP_FD(d)		(P2NMD(d)->fd)
197 
198 
199 /*
200  * this is a slightly optimized copy routine which rounds
201  * to multiple of 64 bytes and is often faster than dealing
202  * with other odd sizes. We assume there is enough room
203  * in the source and destination buffers.
204  *
205  * XXX only for multiples of 64 bytes, non overlapped.
206  */
207 static inline void
208 pkt_copy(const void *_src, void *_dst, int l)
209 {
210 	const uint64_t *src = (const uint64_t *)_src;
211 	uint64_t *dst = (uint64_t *)_dst;
212 
213 	if (unlikely(l >= 1024)) {
214 		memcpy(dst, src, l);
215 		return;
216 	}
217 	for (; likely(l > 0); l-=64) {
218 		*dst++ = *src++;
219 		*dst++ = *src++;
220 		*dst++ = *src++;
221 		*dst++ = *src++;
222 		*dst++ = *src++;
223 		*dst++ = *src++;
224 		*dst++ = *src++;
225 		*dst++ = *src++;
226 	}
227 }
228 
229 
230 /*
231  * The callback, invoked on each received packet. Same as libpcap
232  */
233 typedef void (*nm_cb_t)(u_char *, const struct nm_hdr_t *, const u_char *d);
234 
235 /*
236  *--- the pcap-like API ---
237  *
238  * nm_open() opens a file descriptor, binds to a port and maps memory.
239  *
240  * ifname	(netmap:foo or vale:foo) is the port name
241  * flags	can be NETMAP_SW_RING or NETMAP_HW_RING etc.
242  * ring_no 	only used if NETMAP_HW_RING is specified, is interpreted
243  *		as a string or integer indicating the ring number
244  * ring_flags	is stored in all ring flags (e.g. for transparent mode)
245  * to open. If successful, t opens the fd and maps the memory.
246  */
247 
248 static struct nm_desc_t *nm_open(const char *ifname,
249 	 const char *ring_no, int flags, int ring_flags);
250 
251 /*
252  * nm_close()	closes and restores the port to its previous state
253  */
254 
255 static int nm_close(struct nm_desc_t *);
256 
257 /*
258  * nm_inject() is the same as pcap_inject()
259  * nm_dispatch() is the same as pcap_dispatch()
260  * nm_nextpkt() is the same as pcap_next()
261  */
262 
263 static int nm_inject(struct nm_desc_t *, const void *, size_t);
264 static int nm_dispatch(struct nm_desc_t *, int, nm_cb_t, u_char *);
265 static u_char *nm_nextpkt(struct nm_desc_t *, struct nm_hdr_t *);
266 
267 
268 /*
269  * Try to open, return descriptor if successful, NULL otherwise.
270  * An invalid netmap name will return errno = 0;
271  */
272 static struct nm_desc_t *
273 nm_open(const char *ifname, const char *ring_name, int flags, int ring_flags)
274 {
275 	struct nm_desc_t *d;
276 	u_int n, namelen;
277 	char *port = NULL;
278 
279 	if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) {
280 		errno = 0; /* name not recognised */
281 		return NULL;
282 	}
283 	if (ifname[0] == 'n')
284 		ifname += 7;
285 	port = strchr(ifname, '-');
286 	if (!port) {
287 		namelen = strlen(ifname);
288 	} else {
289 		namelen = port - ifname;
290 		flags &= ~(NETMAP_SW_RING | NETMAP_HW_RING  | NETMAP_RING_MASK);
291 		if (port[1] == 's')
292 			flags |= NETMAP_SW_RING;
293 		else
294 			ring_name = port;
295 	}
296 	if (namelen >= sizeof(d->req.nr_name))
297 		namelen = sizeof(d->req.nr_name) - 1;
298 
299 	d = (struct nm_desc_t *)calloc(1, sizeof(*d));
300 	if (d == NULL) {
301 		errno = ENOMEM;
302 		return NULL;
303 	}
304 	d->self = d;	/* set this early so nm_close() works */
305 	d->fd = open("/dev/netmap", O_RDWR);
306 	if (d->fd < 0)
307 		goto fail;
308 
309 	if (flags & NETMAP_SW_RING) {
310 		d->req.nr_ringid = NETMAP_SW_RING;
311 	} else {
312 		u_int r;
313 		if (flags & NETMAP_HW_RING) /* interpret ring as int */
314 			r = (uintptr_t)ring_name;
315 		else /* interpret ring as numeric string */
316 			r = ring_name ? atoi(ring_name) : ~0;
317 		r = (r < NETMAP_RING_MASK) ? (r | NETMAP_HW_RING) : 0;
318 		d->req.nr_ringid = r; /* set the ring */
319 	}
320 	d->req.nr_ringid |= (flags & ~NETMAP_RING_MASK);
321 	d->req.nr_version = NETMAP_API;
322 	memcpy(d->req.nr_name, ifname, namelen);
323 	d->req.nr_name[namelen] = '\0';
324 	if (ioctl(d->fd, NIOCREGIF, &d->req)) {
325 		goto fail;
326 	}
327 
328 	d->memsize = d->req.nr_memsize;
329 	d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED,
330 			d->fd, 0);
331 	if (d->mem == NULL)
332 		goto fail;
333 	d->nifp = NETMAP_IF(d->mem, d->req.nr_offset);
334 	if (d->req.nr_ringid & NETMAP_SW_RING) {
335 		d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings;
336 		d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings;
337 	} else if (d->req.nr_ringid & NETMAP_HW_RING) {
338 		/* XXX check validity */
339 		d->first_tx_ring = d->last_tx_ring =
340 		d->first_rx_ring = d->last_rx_ring =
341 			d->req.nr_ringid & NETMAP_RING_MASK;
342 	} else {
343 		d->first_tx_ring = d->last_rx_ring = 0;
344 		d->last_tx_ring = d->req.nr_tx_rings - 1;
345 		d->last_rx_ring = d->req.nr_rx_rings - 1;
346 	}
347 	d->tx = NETMAP_TXRING(d->nifp, 0);
348 	d->rx = NETMAP_RXRING(d->nifp, 0);
349 	d->cur_tx_ring = d->first_tx_ring;
350 	d->cur_rx_ring = d->first_rx_ring;
351 	for (n = d->first_tx_ring; n <= d->last_tx_ring; n++) {
352 		d->tx[n].flags |= ring_flags;
353 	}
354 	for (n = d->first_rx_ring; n <= d->last_rx_ring; n++) {
355 		d->rx[n].flags |= ring_flags;
356 	}
357 	return d;
358 
359 fail:
360 	nm_close(d);
361 	errno = EINVAL;
362 	return NULL;
363 }
364 
365 
366 static int
367 nm_close(struct nm_desc_t *d)
368 {
369 	/*
370 	 * ugly trick to avoid unused warnings
371 	 */
372 	static void *__xxzt[] __attribute__ ((unused))  =
373 		{ (void *)nm_open, (void *)nm_inject,
374 		  (void *)nm_dispatch, (void *)nm_nextpkt } ;
375 
376 	if (d == NULL || d->self != d)
377 		return EINVAL;
378 	if (d->mem)
379 		munmap(d->mem, d->memsize);
380 	if (d->fd != -1)
381 		close(d->fd);
382 	bzero(d, sizeof(*d));
383 	free(d);
384 	return 0;
385 }
386 
387 
388 /*
389  * Same prototype as pcap_inject(), only need to cast.
390  */
391 static int
392 nm_inject(struct nm_desc_t *d, const void *buf, size_t size)
393 {
394 	u_int c, n = d->last_tx_ring - d->first_tx_ring + 1;
395 
396 	for (c = 0; c < n ; c++) {
397 		/* compute current ring to use */
398 		struct netmap_ring *ring;
399 		uint32_t i, idx;
400 		uint32_t ri = d->cur_tx_ring + c;
401 
402 		if (ri > d->last_tx_ring)
403 			ri = d->first_tx_ring;
404 		ring = NETMAP_TXRING(d->nifp, ri);
405 		if (nm_ring_empty(ring)) {
406 			continue;
407 		}
408 		i = ring->cur;
409 		idx = ring->slot[i].buf_idx;
410 		ring->slot[i].len = size;
411 		pkt_copy(buf, NETMAP_BUF(ring, idx), size);
412 		d->cur_tx_ring = ri;
413 		ring->head = ring->cur = nm_ring_next(ring, i);
414 		return size;
415 	}
416 	return 0; /* fail */
417 }
418 
419 
420 /*
421  * Same prototype as pcap_dispatch(), only need to cast.
422  */
423 static int
424 nm_dispatch(struct nm_desc_t *d, int cnt, nm_cb_t cb, u_char *arg)
425 {
426 	int n = d->last_rx_ring - d->first_rx_ring + 1;
427 	int c, got = 0, ri = d->cur_rx_ring;
428 
429 	if (cnt == 0)
430 		cnt = -1;
431 	/* cnt == -1 means infinite, but rings have a finite amount
432 	 * of buffers and the int is large enough that we never wrap,
433 	 * so we can omit checking for -1
434 	 */
435 	for (c=0; c < n && cnt != got; c++) {
436 		/* compute current ring to use */
437 		struct netmap_ring *ring;
438 
439 		ri = d->cur_rx_ring + c;
440 		if (ri > d->last_rx_ring)
441 			ri = d->first_rx_ring;
442 		ring = NETMAP_RXRING(d->nifp, ri);
443 		for ( ; !nm_ring_empty(ring) && cnt != got; got++) {
444 			u_int i = ring->cur;
445 			u_int idx = ring->slot[i].buf_idx;
446 			u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
447 
448 			// __builtin_prefetch(buf);
449 			d->hdr.len = d->hdr.caplen = ring->slot[i].len;
450 			d->hdr.ts = ring->ts;
451 			cb(arg, &d->hdr, buf);
452 			ring->head = ring->cur = nm_ring_next(ring, i);
453 		}
454 	}
455 	d->cur_rx_ring = ri;
456 	return got;
457 }
458 
459 static u_char *
460 nm_nextpkt(struct nm_desc_t *d, struct nm_hdr_t *hdr)
461 {
462 	int ri = d->cur_rx_ring;
463 
464 	do {
465 		/* compute current ring to use */
466 		struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri);
467 		if (!nm_ring_empty(ring)) {
468 			u_int i = ring->cur;
469 			u_int idx = ring->slot[i].buf_idx;
470 			u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
471 
472 			// __builtin_prefetch(buf);
473 			hdr->ts = ring->ts;
474 			hdr->len = hdr->caplen = ring->slot[i].len;
475 			ring->cur = nm_ring_next(ring, i);
476 			/* we could postpone advancing head if we want
477 			 * to hold the buffer. This can be supported in
478 			 * the future.
479 			 */
480 			ring->head = ring->cur;
481 			d->cur_rx_ring = ri;
482 			return buf;
483 		}
484 		ri++;
485 		if (ri > d->last_rx_ring)
486 			ri = d->first_rx_ring;
487 	} while (ri != d->cur_rx_ring);
488 	return NULL; /* nothing found */
489 }
490 
491 #endif /* !HAVE_NETMAP_WITH_LIBS */
492 
493 #endif /* NETMAP_WITH_LIBS */
494 
495 #endif /* _NET_NETMAP_USER_H_ */
496