xref: /freebsd/sys/net/netmap_user.h (revision e1a528369708afb723290916ad8ea9c79399e933)
1 /*
2  * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright
9  *      notice, this list of conditions and the following disclaimer.
10  *   2. Redistributions in binary form must reproduce the above copyright
11  *      notice, this list of conditions and the following disclaimer in the
12  *      documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * $FreeBSD$
29  *
30  * Functions and macros to manipulate netmap structures and packets
31  * in userspace. See netmap(4) for more information.
32  *
33  * The address of the struct netmap_if, say nifp, is computed from the
34  * value returned from ioctl(.., NIOCREG, ...) and the mmap region:
35  *	ioctl(fd, NIOCREG, &req);
36  *	mem = mmap(0, ... );
37  *	nifp = NETMAP_IF(mem, req.nr_nifp);
38  *		(so simple, we could just do it manually)
39  *
40  * From there:
41  *	struct netmap_ring *NETMAP_TXRING(nifp, index)
42  *	struct netmap_ring *NETMAP_RXRING(nifp, index)
43  *		we can access ring->nr_cur, ring->nr_avail, ring->nr_flags
44  *
45  *	ring->slot[i] gives us the i-th slot (we can access
46  *		directly len, flags, buf_idx)
47  *
48  *	char *buf = NETMAP_BUF(ring, x) returns a pointer to
49  *		the buffer numbered x
50  *
51  * All ring indexes (head, cur, tail) should always move forward.
52  * To compute the next index in a circular ring you can use
53  *	i = nm_ring_next(ring, i);
54  *
55  * To ease porting apps from pcap to netmap we supply a few fuctions
56  * that can be called to open, close, read and write on netmap in a way
57  * similar to libpcap. Note that the read/write function depend on
58  * an ioctl()/select()/poll() being issued to refill rings or push
59  * packets out.
60  *
61  * In order to use these, include #define NETMAP_WITH_LIBS
62  * in the source file that invokes these functions.
63  */
64 
65 #ifndef _NET_NETMAP_USER_H_
66 #define _NET_NETMAP_USER_H_
67 
68 #include <stdint.h>
69 #include <net/if.h>		/* IFNAMSIZ */
70 
71 #ifndef likely
72 #define likely(x)	__builtin_expect(!!(x), 1)
73 #define unlikely(x)	__builtin_expect(!!(x), 0)
74 #endif /* likely and unlikely */
75 
76 #include <net/netmap.h>
77 
78 /* helper macro */
79 #define _NETMAP_OFFSET(type, ptr, offset) \
80 	((type)(void *)((char *)(ptr) + (offset)))
81 
82 #define NETMAP_IF(_base, _ofs)	_NETMAP_OFFSET(struct netmap_if *, _base, _ofs)
83 
84 #define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \
85 	nifp, (nifp)->ring_ofs[index] )
86 
87 #define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *,	\
88 	nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] )
89 
90 #define NETMAP_BUF(ring, index)				\
91 	((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size))
92 
93 #define NETMAP_BUF_IDX(ring, buf)			\
94 	( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \
95 		(ring)->nr_buf_size )
96 
97 
98 static inline uint32_t
99 nm_ring_next(struct netmap_ring *r, uint32_t i)
100 {
101 	return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1);
102 }
103 
104 
105 /*
106  * Return 1 if we have pending transmissions in the tx ring.
107  * When everything is complete ring->cur = ring->tail + 1 (modulo ring size)
108  */
109 static inline int
110 nm_tx_pending(struct netmap_ring *r)
111 {
112 	return nm_ring_next(r, r->tail) != r->cur;
113 }
114 
115 
116 static inline uint32_t
117 nm_ring_space(struct netmap_ring *ring)
118 {
119         int ret = ring->tail - ring->cur;
120         if (ret < 0)
121                 ret += ring->num_slots;
122         return ret;
123 }
124 
125 
126 #ifdef NETMAP_WITH_LIBS
127 /*
128  * Support for simple I/O libraries.
129  * Include other system headers required for compiling this.
130  */
131 
132 #ifndef HAVE_NETMAP_WITH_LIBS
133 #define HAVE_NETMAP_WITH_LIBS
134 
135 #include <sys/time.h>
136 #include <sys/mman.h>
137 #include <string.h>	/* memset */
138 #include <sys/ioctl.h>
139 #include <sys/errno.h>	/* EINVAL */
140 #include <fcntl.h>	/* O_RDWR */
141 #include <unistd.h>	/* close() */
142 #ifdef __FreeBSD__
143 #include <stdlib.h>
144 #else
145 #include <malloc.h>	/* on FreeBSD it is stdlib.h */
146 #endif
147 
148 struct nm_hdr_t {	/* same as pcap_pkthdr */
149 	struct timeval	ts;
150 	uint32_t	caplen;
151 	uint32_t	len;
152 };
153 
154 struct nm_desc_t {
155 	struct nm_desc_t *self;
156 	int fd;
157 	void *mem;
158 	int memsize;
159 	struct netmap_if *nifp;
160 	uint16_t first_ring, last_ring, cur_ring;
161 	struct nmreq req;
162 	struct nm_hdr_t hdr;
163 };
164 
165 /*
166  * when the descriptor is open correctly, d->self == d
167  * Eventually we should also use some magic number.
168  */
169 #define P2NMD(p)		((struct nm_desc_t *)(p))
170 #define IS_NETMAP_DESC(d)	(P2NMD(d)->self == P2NMD(d))
171 #define NETMAP_FD(d)		(P2NMD(d)->fd)
172 
173 
174 /*
175  * this is a slightly optimized copy routine which rounds
176  * to multiple of 64 bytes and is often faster than dealing
177  * with other odd sizes. We assume there is enough room
178  * in the source and destination buffers.
179  *
180  * XXX only for multiples of 64 bytes, non overlapped.
181  */
182 static inline void
183 pkt_copy(const void *_src, void *_dst, int l)
184 {
185 	const uint64_t *src = (const uint64_t *)_src;
186 	uint64_t *dst = (uint64_t *)_dst;
187 
188 	if (unlikely(l >= 1024)) {
189 		memcpy(dst, src, l);
190 		return;
191 	}
192 	for (; likely(l > 0); l-=64) {
193 		*dst++ = *src++;
194 		*dst++ = *src++;
195 		*dst++ = *src++;
196 		*dst++ = *src++;
197 		*dst++ = *src++;
198 		*dst++ = *src++;
199 		*dst++ = *src++;
200 		*dst++ = *src++;
201 	}
202 }
203 
204 
205 /*
206  * The callback, invoked on each received packet. Same as libpcap
207  */
208 typedef void (*nm_cb_t)(u_char *, const struct nm_hdr_t *, const u_char *d);
209 
210 /*
211  *--- the pcap-like API ---
212  *
213  * nm_open() opens a file descriptor, binds to a port and maps memory.
214  *
215  * ifname	(netmap:foo or vale:foo) is the port name
216  * flags	can be NETMAP_SW_RING or NETMAP_HW_RING etc.
217  * ring_no 	only used if NETMAP_HW_RING is specified, is interpreted
218  *		as a string or integer indicating the ring number
219  * ring_flags	is stored in all ring flags (e.g. for transparent mode)
220  * to open. If successful, t opens the fd and maps the memory.
221  */
222 
223 static struct nm_desc_t *nm_open(const char *ifname,
224 	 const char *ring_no, int flags, int ring_flags);
225 
226 /*
227  * nm_close()	closes and restores the port to its previous state
228  */
229 
230 static int nm_close(struct nm_desc_t *);
231 
232 /*
233  * nm_inject() is the same as pcap_inject()
234  * nm_dispatch() is the same as pcap_dispatch()
235  * nm_nextpkt() is the same as pcap_next()
236  */
237 
238 static int nm_inject(struct nm_desc_t *, const void *, size_t);
239 static int nm_dispatch(struct nm_desc_t *, int, nm_cb_t, u_char *);
240 static u_char *nm_nextpkt(struct nm_desc_t *, struct nm_hdr_t *);
241 
242 
243 /*
244  * Try to open, return descriptor if successful, NULL otherwise.
245  * An invalid netmap name will return errno = 0;
246  */
247 static struct nm_desc_t *
248 nm_open(const char *ifname, const char *ring_name, int flags, int ring_flags)
249 {
250 	struct nm_desc_t *d;
251 	u_int n;
252 
253 	if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) {
254 		errno = 0; /* name not recognised */
255 		return NULL;
256 	}
257 	if (ifname[0] == 'n')
258 		ifname += 7;
259 	d = (struct nm_desc_t *)calloc(1, sizeof(*d));
260 	if (d == NULL) {
261 		errno = ENOMEM;
262 		return NULL;
263 	}
264 	d->self = d;	/* set this early so nm_close() works */
265 	d->fd = open("/dev/netmap", O_RDWR);
266 	if (d->fd < 0)
267 		goto fail;
268 
269 	if (flags & NETMAP_SW_RING) {
270 		d->req.nr_ringid = NETMAP_SW_RING;
271 	} else {
272 		u_int r;
273 		if (flags & NETMAP_HW_RING) /* interpret ring as int */
274 			r = (uintptr_t)ring_name;
275 		else /* interpret ring as numeric string */
276 			r = ring_name ? atoi(ring_name) : ~0;
277 		r = (r < NETMAP_RING_MASK) ? (r | NETMAP_HW_RING) : 0;
278 		d->req.nr_ringid = r; /* set the ring */
279 	}
280 	d->req.nr_ringid |= (flags & ~NETMAP_RING_MASK);
281 	d->req.nr_version = NETMAP_API;
282 	strncpy(d->req.nr_name, ifname, sizeof(d->req.nr_name));
283 	if (ioctl(d->fd, NIOCREGIF, &d->req))
284 		goto fail;
285 
286 	d->memsize = d->req.nr_memsize;
287 	d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED,
288 			d->fd, 0);
289 	if (d->mem == NULL)
290 		goto fail;
291 	d->nifp = NETMAP_IF(d->mem, d->req.nr_offset);
292 	if (d->req.nr_ringid & NETMAP_SW_RING) {
293 		d->first_ring = d->last_ring = d->req.nr_rx_rings;
294 	} else if (d->req.nr_ringid & NETMAP_HW_RING) {
295 		d->first_ring = d->last_ring =
296 			d->req.nr_ringid & NETMAP_RING_MASK;
297 	} else {
298 		d->first_ring = 0;
299 		d->last_ring = d->req.nr_rx_rings - 1;
300 	}
301 	d->cur_ring = d->first_ring;
302 	for (n = d->first_ring; n <= d->last_ring; n++) {
303 		struct netmap_ring *ring = NETMAP_RXRING(d->nifp, n);
304 		ring->flags |= ring_flags;
305 	}
306 	return d;
307 
308 fail:
309 	nm_close(d);
310 	errno = EINVAL;
311 	return NULL;
312 }
313 
314 
315 static int
316 nm_close(struct nm_desc_t *d)
317 {
318 	/*
319 	 * ugly trick to avoid unused warnings
320 	 */
321 	static void *__xxzt[] __attribute__ ((unused))  =
322 		{ (void *)nm_open, (void *)nm_inject,
323 		  (void *)nm_dispatch, (void *)nm_nextpkt } ;
324 
325 	if (d == NULL || d->self != d)
326 		return EINVAL;
327 	if (d->mem)
328 		munmap(d->mem, d->memsize);
329 	if (d->fd != -1)
330 		close(d->fd);
331 	bzero(d, sizeof(*d));
332 	free(d);
333 	return 0;
334 }
335 
336 
337 /*
338  * Same prototype as pcap_inject(), only need to cast.
339  */
340 static int
341 nm_inject(struct nm_desc_t *d, const void *buf, size_t size)
342 {
343 	u_int c, n = d->last_ring - d->first_ring + 1;
344 
345 	if (0) fprintf(stderr, "%s rings %d %d %d\n", __FUNCTION__,
346 		d->first_ring, d->cur_ring, d->last_ring);
347 	for (c = 0; c < n ; c++) {
348 		/* compute current ring to use */
349 		struct netmap_ring *ring;
350 		uint32_t i, idx;
351 		uint32_t ri = d->cur_ring + c;
352 
353 		if (ri > d->last_ring)
354 			ri = d->first_ring;
355 		ring = NETMAP_TXRING(d->nifp, ri);
356 		if (nm_ring_empty(ring)) {
357 			if (0) fprintf(stderr, "%s ring %d cur %d tail %d\n",
358 				__FUNCTION__,
359 				ri, ring->cur, ring->tail);
360 			continue;
361 		}
362 		i = ring->cur;
363 		idx = ring->slot[i].buf_idx;
364 		ring->slot[i].len = size;
365 		pkt_copy(buf, NETMAP_BUF(ring, idx), size);
366 		d->cur_ring = ri;
367 		ring->head = ring->cur = nm_ring_next(ring, i);
368 		return size;
369 	}
370 	return 0; /* fail */
371 }
372 
373 
374 /*
375  * Same prototype as pcap_dispatch(), only need to cast.
376  */
377 static int
378 nm_dispatch(struct nm_desc_t *d, int cnt, nm_cb_t cb, u_char *arg)
379 {
380 	int n = d->last_ring - d->first_ring + 1;
381 	int c, got = 0, ri = d->cur_ring;
382 
383 	if (cnt == 0)
384 		cnt = -1;
385 	/* cnt == -1 means infinite, but rings have a finite amount
386 	 * of buffers and the int is large enough that we never wrap,
387 	 * so we can omit checking for -1
388 	 */
389 	for (c=0; c < n && cnt != got; c++) {
390 		/* compute current ring to use */
391 		struct netmap_ring *ring;
392 
393 		ri = d->cur_ring + c;
394 		if (ri > d->last_ring)
395 			ri = d->first_ring;
396 		ring = NETMAP_RXRING(d->nifp, ri);
397 		for ( ; !nm_ring_empty(ring) && cnt != got; got++) {
398 			u_int i = ring->cur;
399 			u_int idx = ring->slot[i].buf_idx;
400 			u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
401 			// XXX should check valid buf
402 			// prefetch(buf);
403 			d->hdr.len = d->hdr.caplen = ring->slot[i].len;
404 			d->hdr.ts = ring->ts;
405 			cb(arg, &d->hdr, buf);
406 			ring->head = ring->cur = nm_ring_next(ring, i);
407 		}
408 	}
409 	d->cur_ring = ri;
410 	return got;
411 }
412 
413 static u_char *
414 nm_nextpkt(struct nm_desc_t *d, struct nm_hdr_t *hdr)
415 {
416 	int ri = d->cur_ring;
417 
418 	do {
419 		/* compute current ring to use */
420 		struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri);
421 		if (!nm_ring_empty(ring)) {
422 			u_int i = ring->cur;
423 			u_int idx = ring->slot[i].buf_idx;
424 			u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
425 			// XXX should check valid buf
426 			// prefetch(buf);
427 			hdr->ts = ring->ts;
428 			hdr->len = hdr->caplen = ring->slot[i].len;
429 			ring->cur = nm_ring_next(ring, i);
430 			/* we could postpone advancing head if we want
431 			 * to hold the buffer. This can be supported in
432 			 * the future.
433 			 */
434 			ring->head = ring->cur;
435 			d->cur_ring = ri;
436 			return buf;
437 		}
438 		ri++;
439 		if (ri > d->last_ring)
440 			ri = d->first_ring;
441 	} while (ri != d->cur_ring);
442 	return NULL; /* nothing found */
443 }
444 
445 #endif /* !HAVE_NETMAP_WITH_LIBS */
446 
447 #endif /* NETMAP_WITH_LIBS */
448 
449 #endif /* _NET_NETMAP_USER_H_ */
450