xref: /freebsd/sys/net/netmap_user.h (revision 6d732c66bca5da4d261577aad2c8ea84519b0bea)
1 /*
2  * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright
9  *      notice, this list of conditions and the following disclaimer.
10  *   2. Redistributions in binary form must reproduce the above copyright
11  *      notice, this list of conditions and the following disclaimer in the
12  *      documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * $FreeBSD$
29  *
30  * Functions and macros to manipulate netmap structures and packets
31  * in userspace. See netmap(4) for more information.
32  *
33  * The address of the struct netmap_if, say nifp, is computed from the
34  * value returned from ioctl(.., NIOCREG, ...) and the mmap region:
35  *	ioctl(fd, NIOCREG, &req);
36  *	mem = mmap(0, ... );
37  *	nifp = NETMAP_IF(mem, req.nr_nifp);
38  *		(so simple, we could just do it manually)
39  *
40  * From there:
41  *	struct netmap_ring *NETMAP_TXRING(nifp, index)
42  *	struct netmap_ring *NETMAP_RXRING(nifp, index)
43  *		we can access ring->nr_cur, ring->nr_avail, ring->nr_flags
44  *
45  *	ring->slot[i] gives us the i-th slot (we can access
46  *		directly len, flags, buf_idx)
47  *
48  *	char *buf = NETMAP_BUF(ring, x) returns a pointer to
49  *		the buffer numbered x
50  *
51  * All ring indexes (head, cur, tail) should always move forward.
52  * To compute the next index in a circular ring you can use
53  *	i = nm_ring_next(ring, i);
54  *
55  * To ease porting apps from pcap to netmap we supply a few fuctions
56  * that can be called to open, close, read and write on netmap in a way
57  * similar to libpcap. Note that the read/write function depend on
58  * an ioctl()/select()/poll() being issued to refill rings or push
59  * packets out.
60  *
61  * In order to use these, include #define NETMAP_WITH_LIBS
62  * in the source file that invokes these functions.
63  */
64 
65 #ifndef _NET_NETMAP_USER_H_
66 #define _NET_NETMAP_USER_H_
67 
68 #include <stdint.h>
69 #include <net/if.h>		/* IFNAMSIZ */
70 
71 #ifndef likely
72 #define likely(x)	__builtin_expect(!!(x), 1)
73 #define unlikely(x)	__builtin_expect(!!(x), 0)
74 #endif /* likely and unlikely */
75 
76 #include <net/netmap.h>
77 
78 /* helper macro */
79 #define _NETMAP_OFFSET(type, ptr, offset) \
80 	((type)(void *)((char *)(ptr) + (offset)))
81 
82 #define NETMAP_IF(_base, _ofs)	_NETMAP_OFFSET(struct netmap_if *, _base, _ofs)
83 
84 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \
85 	nifp, (nifp)->ring_ofs[index] )
86 
87 #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *,	\
88 	nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] )
89 
90 #define NETMAP_BUF(ring, index)				\
91 	((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size))
92 
93 #define NETMAP_BUF_IDX(ring, buf)			\
94 	( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \
95 		(ring)->nr_buf_size )
96 
97 
98 static inline uint32_t
99 nm_ring_next(struct netmap_ring *r, uint32_t i)
100 {
101 	return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1);
102 }
103 
104 
105 /*
106  * Return 1 if we have pending transmissions in the tx ring.
107  * When everything is complete ring->cur = ring->tail + 1 (modulo ring size)
108  */
109 static inline int
110 nm_tx_pending(struct netmap_ring *r)
111 {
112 	return nm_ring_next(r, r->tail) != r->cur;
113 }
114 
115 
116 static inline uint32_t
117 nm_ring_space(struct netmap_ring *ring)
118 {
119         int ret = ring->tail - ring->cur;
120         if (ret < 0)
121                 ret += ring->num_slots;
122         return ret;
123 }
124 
125 
126 #ifdef NETMAP_WITH_LIBS
127 /*
128  * Support for simple I/O libraries.
129  * Include other system headers required for compiling this.
130  */
131 
132 #ifndef HAVE_NETMAP_WITH_LIBS
133 #define HAVE_NETMAP_WITH_LIBS
134 
135 #include <sys/time.h>
136 #include <sys/mman.h>
137 #include <string.h>	/* memset */
138 #include <sys/ioctl.h>
139 #include <sys/errno.h>	/* EINVAL */
140 #include <fcntl.h>	/* O_RDWR */
141 #include <unistd.h>	/* close() */
142 #ifdef __FreeBSD__
143 #include <stdlib.h>
144 #else
145 #include <malloc.h>	/* on FreeBSD it is stdlib.h */
146 #endif
147 
148 struct nm_hdr_t {	/* same as pcap_pkthdr */
149 	struct timeval	ts;
150 	uint32_t	caplen;
151 	uint32_t	len;
152 };
153 
154 struct nm_desc_t {
155 	struct nm_desc_t *self;
156 	int fd;
157 	void *mem;
158 	int memsize;
159 	struct netmap_if *nifp;
160 	uint16_t first_ring, last_ring, cur_ring;
161 	struct nmreq req;
162 	struct nm_hdr_t hdr;
163 };
164 
165 /*
166  * when the descriptor is open correctly, d->self == d
167  */
168 #define P2NMD(p)		((struct nm_desc_t *)(p))
169 #define IS_NETMAP_DESC(d)	(P2NMD(d)->self == P2NMD(d))
170 #define NETMAP_FD(d)		(P2NMD(d)->fd)
171 
172 
173 /*
174  * this is a slightly optimized copy routine which rounds
175  * to multiple of 64 bytes and is often faster than dealing
176  * with other odd sizes. We assume there is enough room
177  * in the source and destination buffers.
178  *
179  * XXX only for multiples of 64 bytes, non overlapped.
180  */
181 static inline void
182 pkt_copy(const void *_src, void *_dst, int l)
183 {
184 	const uint64_t *src = _src;
185 	uint64_t *dst = _dst;
186 	if (unlikely(l >= 1024)) {
187 		memcpy(dst, src, l);
188 		return;
189 	}
190 	for (; likely(l > 0); l-=64) {
191 		*dst++ = *src++;
192 		*dst++ = *src++;
193 		*dst++ = *src++;
194 		*dst++ = *src++;
195 		*dst++ = *src++;
196 		*dst++ = *src++;
197 		*dst++ = *src++;
198 		*dst++ = *src++;
199 	}
200 }
201 
202 
203 /*
204  * The callback, invoked on each received packet. Same as libpcap
205  */
206 typedef void (*nm_cb_t)(u_char *, const struct nm_hdr_t *, const u_char *d);
207 
208 /*
209  *--- the pcap-like API ---
210  *
211  * nm_open() opens a file descriptor, binds to a port and maps memory.
212  *
213  * ifname	(netmap:foo or vale:foo) is the port name
214  * flags	can be NETMAP_SW_RING or NETMAP_HW_RING etc.
215  * ring_no 	only used if NETMAP_HW_RING is specified, is interpreted
216  *		as a string or integer indicating the ring number
217  * ring_flags	is stored in all ring flags (e.g. for transparent mode)
218  * to open. If successful, t opens the fd and maps the memory.
219  */
220 
221 static struct nm_desc_t *nm_open(const char *ifname,
222 	 const char *ring_no, int flags, int ring_flags);
223 
224 /*
225  * nm_close()	closes and restores the port to its previous state
226  */
227 
228 static int nm_close(struct nm_desc_t *);
229 
230 /*
231  * nm_inject() is the same as pcap_inject()
232  * nm_dispatch() is the same as pcap_dispatch()
233  * nm_nextpkt() is the same as pcap_next()
234  */
235 
236 static int nm_inject(struct nm_desc_t *, const void *, size_t);
237 static int nm_dispatch(struct nm_desc_t *, int, nm_cb_t, u_char *);
238 static u_char *nm_nextpkt(struct nm_desc_t *, struct nm_hdr_t *);
239 
240 
241 /*
242  * Try to open, return descriptor if successful, NULL otherwise.
243  * An invalid netmap name will return errno = 0;
244  */
245 static struct nm_desc_t *
246 nm_open(const char *ifname, const char *ring_name, int flags, int ring_flags)
247 {
248 	struct nm_desc_t *d;
249 	u_int n;
250 
251 	if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) {
252 		errno = 0; /* name not recognised */
253 		return NULL;
254 	}
255 	if (ifname[0] == 'n')
256 		ifname += 7;
257 	d = (struct nm_desc_t *)calloc(1, sizeof(*d));
258 	if (d == NULL) {
259 		errno = ENOMEM;
260 		return NULL;
261 	}
262 	d->self = d;	/* set this early so nm_close() works */
263 	d->fd = open("/dev/netmap", O_RDWR);
264 	if (d->fd < 0)
265 		goto fail;
266 
267 	if (flags & NETMAP_SW_RING) {
268 		d->req.nr_ringid = NETMAP_SW_RING;
269 	} else {
270 		u_int r;
271 		if (flags & NETMAP_HW_RING) /* interpret ring as int */
272 			r = (uintptr_t)ring_name;
273 		else /* interpret ring as numeric string */
274 			r = ring_name ? atoi(ring_name) : ~0;
275 		r = (r < NETMAP_RING_MASK) ? (r | NETMAP_HW_RING) : 0;
276 		d->req.nr_ringid = r; /* set the ring */
277 	}
278 	d->req.nr_ringid |= (flags & ~NETMAP_RING_MASK);
279 	d->req.nr_version = NETMAP_API;
280 	strncpy(d->req.nr_name, ifname, sizeof(d->req.nr_name));
281 	if (ioctl(d->fd, NIOCREGIF, &d->req))
282 		goto fail;
283 
284 	d->memsize = d->req.nr_memsize;
285 	d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED,
286 			d->fd, 0);
287 	if (d->mem == NULL)
288 		goto fail;
289 	d->nifp = NETMAP_IF(d->mem, d->req.nr_offset);
290 	if (d->req.nr_ringid & NETMAP_SW_RING) {
291 		d->first_ring = d->last_ring = d->req.nr_rx_rings;
292 	} else if (d->req.nr_ringid & NETMAP_HW_RING) {
293 		d->first_ring = d->last_ring =
294 			d->req.nr_ringid & NETMAP_RING_MASK;
295 	} else {
296 		d->first_ring = 0;
297 		d->last_ring = d->req.nr_rx_rings - 1;
298 	}
299 	d->cur_ring = d->first_ring;
300 	for (n = d->first_ring; n <= d->last_ring; n++) {
301 		struct netmap_ring *ring = NETMAP_RXRING(d->nifp, n);
302 		ring->flags |= ring_flags;
303 	}
304 	return d;
305 
306 fail:
307 	nm_close(d);
308 	errno = EINVAL;
309 	return NULL;
310 }
311 
312 
313 static int
314 nm_close(struct nm_desc_t *d)
315 {
316 	/*
317 	 * ugly trick to avoid unused warnings
318 	 */
319 	static void *__xxzt[] __attribute__ ((unused))  =
320 		{ nm_open, nm_inject, nm_dispatch, nm_nextpkt } ;
321 
322 	if (d == NULL || d->self != d)
323 		return EINVAL;
324 	if (d->mem)
325 		munmap(d->mem, d->memsize);
326 	if (d->fd != -1)
327 		close(d->fd);
328 	bzero(d, sizeof(*d));
329 	free(d);
330 	return 0;
331 }
332 
333 
334 /*
335  * Same prototype as pcap_inject(), only need to cast.
336  */
337 static int
338 nm_inject(struct nm_desc_t *d, const void *buf, size_t size)
339 {
340 	u_int c, n = d->last_ring - d->first_ring + 1;
341 
342 	if (0) fprintf(stderr, "%s rings %d %d %d\n", __FUNCTION__,
343 		d->first_ring, d->cur_ring, d->last_ring);
344 	for (c = 0; c < n ; c++) {
345 		/* compute current ring to use */
346 		struct netmap_ring *ring;
347 		uint32_t i, idx;
348 		uint32_t ri = d->cur_ring + c;
349 
350 		if (ri > d->last_ring)
351 			ri = d->first_ring;
352 		ring = NETMAP_TXRING(d->nifp, ri);
353 		if (nm_ring_empty(ring)) {
354 			if (0) fprintf(stderr, "%s ring %d cur %d tail %d\n",
355 				__FUNCTION__,
356 				ri, ring->cur, ring->tail);
357 			continue;
358 		}
359 		i = ring->cur;
360 		idx = ring->slot[i].buf_idx;
361 		ring->slot[i].len = size;
362 		pkt_copy(buf, NETMAP_BUF(ring, idx), size);
363 		d->cur_ring = ri;
364 		ring->head = ring->cur = nm_ring_next(ring, i);
365 		return size;
366 	}
367 	return 0; /* fail */
368 }
369 
370 
371 /*
372  * Same prototype as pcap_dispatch(), only need to cast.
373  */
374 static int
375 nm_dispatch(struct nm_desc_t *d, int cnt, nm_cb_t cb, u_char *arg)
376 {
377 	int n = d->last_ring - d->first_ring + 1;
378 	int c, got = 0, ri = d->cur_ring;
379 
380 	if (cnt == 0)
381 		cnt = -1;
382 	/* cnt == -1 means infinite, but rings have a finite amount
383 	 * of buffers and the int is large enough that we never wrap,
384 	 * so we can omit checking for -1
385 	 */
386 	for (c=0; c < n && cnt != got; c++) {
387 		/* compute current ring to use */
388 		struct netmap_ring *ring;
389 
390 		ri = d->cur_ring + c;
391 		if (ri > d->last_ring)
392 			ri = d->first_ring;
393 		ring = NETMAP_RXRING(d->nifp, ri);
394 		for ( ; !nm_ring_empty(ring) && cnt != got; got++) {
395 			u_int i = ring->cur;
396 			u_int idx = ring->slot[i].buf_idx;
397 			u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
398 			// XXX should check valid buf
399 			// prefetch(buf);
400 			d->hdr.len = d->hdr.caplen = ring->slot[i].len;
401 			d->hdr.ts = ring->ts;
402 			cb(arg, &d->hdr, buf);
403 			ring->head = ring->cur = nm_ring_next(ring, i);
404 		}
405 	}
406 	d->cur_ring = ri;
407 	return got;
408 }
409 
410 static u_char *
411 nm_nextpkt(struct nm_desc_t *d, struct nm_hdr_t *hdr)
412 {
413 	int ri = d->cur_ring;
414 
415 	do {
416 		/* compute current ring to use */
417 		struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri);
418 		if (!nm_ring_empty(ring)) {
419 			u_int i = ring->cur;
420 			u_int idx = ring->slot[i].buf_idx;
421 			u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
422 			// XXX should check valid buf
423 			// prefetch(buf);
424 			hdr->ts = ring->ts;
425 			hdr->len = hdr->caplen = ring->slot[i].len;
426 			ring->cur = nm_ring_next(ring, i);
427 			/* we could postpone advancing head if we want
428 			 * to hold the buffer. This can be supported in
429 			 * the future.
430 			 */
431 			ring->head = ring->cur;
432 			d->cur_ring = ri;
433 			return buf;
434 		}
435 		ri++;
436 		if (ri > d->last_ring)
437 			ri = d->first_ring;
438 	} while (ri != d->cur_ring);
439 	return NULL; /* nothing found */
440 }
441 
442 #endif /* !HAVE_NETMAP_WITH_LIBS */
443 
444 #endif /* NETMAP_WITH_LIBS */
445 
446 #endif /* _NET_NETMAP_USER_H_ */
447