xref: /freebsd/contrib/libpcap/pcap-netfilter-linux.c (revision fe75646a0234a261c0013bf1840fdac4acaf0cec)
1 /*
2  * Copyright (c) 2011 Jakub Zawadzki
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote
15  * products derived from this software without specific prior written
16  * permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #ifdef HAVE_CONFIG_H
32 #include <config.h>
33 #endif
34 
35 #include "pcap-int.h"
36 #include "diag-control.h"
37 
38 #ifdef NEED_STRERROR_H
39 #include "strerror.h"
40 #endif
41 
42 #include <errno.h>
43 #include <stdlib.h>
44 #include <unistd.h>
45 #include <string.h>
46 #include <sys/socket.h>
47 #include <arpa/inet.h>
48 
49 #include <time.h>
50 #include <sys/time.h>
51 #include <netinet/in.h>
52 #include <linux/types.h>
53 
54 #include <linux/netlink.h>
55 #include <linux/netfilter.h>
56 #include <linux/netfilter/nfnetlink.h>
57 #include <linux/netfilter/nfnetlink_log.h>
58 #include <linux/netfilter/nfnetlink_queue.h>
59 
60 /* NOTE: if your program drops privileges after pcap_activate() it WON'T work with nfqueue.
61  *       It took me quite some time to debug ;/
62  *
63  *       Sending any data to nfnetlink socket requires CAP_NET_ADMIN privileges,
64  *       and in nfqueue we need to send verdict reply after recving packet.
65  *
66  *       In tcpdump you can disable dropping privileges with -Z root
67  */
68 
69 #include "pcap-netfilter-linux.h"
70 
71 #define HDR_LENGTH (NLMSG_LENGTH(NLMSG_ALIGN(sizeof(struct nfgenmsg))))
72 
73 #define NFLOG_IFACE "nflog"
74 #define NFQUEUE_IFACE "nfqueue"
75 
76 typedef enum { OTHER = -1, NFLOG, NFQUEUE } nftype_t;
77 
78 /*
79  * Private data for capturing on Linux netfilter sockets.
80  */
81 struct pcap_netfilter {
82 	u_int	packets_read;	/* count of packets read with recvfrom() */
83 	u_int   packets_nobufs; /* ENOBUFS counter */
84 };
85 
86 static int nfqueue_send_verdict(const pcap_t *handle, uint16_t group_id, u_int32_t id, u_int32_t verdict);
87 
88 
89 static int
90 netfilter_read_linux(pcap_t *handle, int max_packets, pcap_handler callback, u_char *user)
91 {
92 	struct pcap_netfilter *handlep = handle->priv;
93 	register u_char *bp, *ep;
94 	int count = 0;
95 	ssize_t len;
96 
97 	/*
98 	 * Has "pcap_breakloop()" been called?
99 	 */
100 	if (handle->break_loop) {
101 		/*
102 		 * Yes - clear the flag that indicates that it
103 		 * has, and return PCAP_ERROR_BREAK to indicate
104 		 * that we were told to break out of the loop.
105 		 */
106 		handle->break_loop = 0;
107 		return PCAP_ERROR_BREAK;
108 	}
109 	len = handle->cc;
110 	if (len == 0) {
111 		/*
112 		 * The buffer is empty; refill it.
113 		 *
114 		 * We ignore EINTR, as that might just be due to a signal
115 		 * being delivered - if the signal should interrupt the
116 		 * loop, the signal handler should call pcap_breakloop()
117 		 * to set handle->break_loop (we ignore it on other
118 		 * platforms as well).
119 		 */
120 		do {
121 			len = recv(handle->fd, handle->buffer, handle->bufsize, 0);
122 			if (handle->break_loop) {
123 				handle->break_loop = 0;
124 				return PCAP_ERROR_BREAK;
125 			}
126 			if (errno == ENOBUFS)
127 				handlep->packets_nobufs++;
128 		} while ((len == -1) && (errno == EINTR || errno == ENOBUFS));
129 
130 		if (len < 0) {
131 			pcap_fmt_errmsg_for_errno(handle->errbuf,
132 			    PCAP_ERRBUF_SIZE, errno, "Can't receive packet");
133 			return PCAP_ERROR;
134 		}
135 
136 		bp = (unsigned char *)handle->buffer;
137 	} else
138 		bp = handle->bp;
139 
140 	/*
141 	 * Loop through each message.
142 	 *
143 	 * This assumes that a single buffer of message will have
144 	 * <= INT_MAX packets, so the message count doesn't overflow.
145 	 */
146 	ep = bp + len;
147 	while (bp < ep) {
148 		const struct nlmsghdr *nlh = (const struct nlmsghdr *) bp;
149 		uint32_t msg_len;
150 		nftype_t type = OTHER;
151 		/*
152 		 * Has "pcap_breakloop()" been called?
153 		 * If so, return immediately - if we haven't read any
154 		 * packets, clear the flag and return PCAP_ERROR_BREAK
155 		 * to indicate that we were told to break out of the loop,
156 		 * otherwise leave the flag set, so that the *next* call
157 		 * will break out of the loop without having read any
158 		 * packets, and return the number of packets we've
159 		 * processed so far.
160 		 */
161 		if (handle->break_loop) {
162 			handle->bp = bp;
163 			handle->cc = (int)(ep - bp);
164 			if (count == 0) {
165 				handle->break_loop = 0;
166 				return PCAP_ERROR_BREAK;
167 			} else
168 				return count;
169 		}
170 		/*
171 		 * NLMSG_SPACE(0) might be signed or might be unsigned,
172 		 * depending on whether the kernel defines NLMSG_ALIGNTO
173 		 * as 4, which older kernels do, or as 4U, which newer
174 		 * kernels do.
175 		 *
176 		 * ep - bp is of type ptrdiff_t, which is signed.
177 		 *
178 		 * To squelch warnings, we cast both to size_t, which
179 		 * is unsigned; ep >= bp, so the cast is safe.
180 		 */
181 		if ((size_t)(ep - bp) < (size_t)NLMSG_SPACE(0)) {
182 			/*
183 			 * There's less than one netlink message left
184 			 * in the buffer.  Give up.
185 			 */
186 			break;
187 		}
188 
189 		if (nlh->nlmsg_len < sizeof(struct nlmsghdr) || (u_int)len < nlh->nlmsg_len) {
190 			snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "Message truncated: (got: %zd) (nlmsg_len: %u)", len, nlh->nlmsg_len);
191 			return -1;
192 		}
193 
194 		if (NFNL_SUBSYS_ID(nlh->nlmsg_type) == NFNL_SUBSYS_ULOG &&
195 		    NFNL_MSG_TYPE(nlh->nlmsg_type) == NFULNL_MSG_PACKET)
196 			type = NFLOG;
197 		else if (NFNL_SUBSYS_ID(nlh->nlmsg_type) == NFNL_SUBSYS_QUEUE &&
198 		         NFNL_MSG_TYPE(nlh->nlmsg_type) == NFQNL_MSG_PACKET)
199 			type = NFQUEUE;
200 
201 		if (type != OTHER) {
202 			const unsigned char *payload = NULL;
203 			struct pcap_pkthdr pkth;
204 
205 			const struct nfgenmsg *nfg = NULL;
206 			int id = 0;
207 
208 			if (handle->linktype != DLT_NFLOG) {
209 				const struct nfattr *payload_attr = NULL;
210 
211 				if (nlh->nlmsg_len < HDR_LENGTH) {
212 					snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "Malformed message: (nlmsg_len: %u)", nlh->nlmsg_len);
213 					return -1;
214 				}
215 
216 				nfg = NLMSG_DATA(nlh);
217 				if (nlh->nlmsg_len > HDR_LENGTH) {
218 					struct nfattr *attr = NFM_NFA(nfg);
219 					int attr_len = nlh->nlmsg_len - NLMSG_ALIGN(HDR_LENGTH);
220 
221 					while (NFA_OK(attr, attr_len)) {
222 						if (type == NFQUEUE) {
223 							switch (NFA_TYPE(attr)) {
224 								case NFQA_PACKET_HDR:
225 									{
226 										const struct nfqnl_msg_packet_hdr *pkt_hdr = (const struct nfqnl_msg_packet_hdr *) NFA_DATA(attr);
227 
228 										id = ntohl(pkt_hdr->packet_id);
229 										break;
230 									}
231 								case NFQA_PAYLOAD:
232 									payload_attr = attr;
233 									break;
234 							}
235 
236 						} else if (type == NFLOG) {
237 							switch (NFA_TYPE(attr)) {
238 								case NFULA_PAYLOAD:
239 									payload_attr = attr;
240 									break;
241 							}
242 						}
243 						attr = NFA_NEXT(attr, attr_len);
244 					}
245 				}
246 
247 				if (payload_attr) {
248 					payload = NFA_DATA(payload_attr);
249 					pkth.len = pkth.caplen = NFA_PAYLOAD(payload_attr);
250 				}
251 
252 			} else {
253 				payload = NLMSG_DATA(nlh);
254 				pkth.caplen = pkth.len = nlh->nlmsg_len-NLMSG_ALIGN(sizeof(struct nlmsghdr));
255 			}
256 
257 			if (payload) {
258 				/* pkth.caplen = min (payload_len, handle->snapshot); */
259 
260 				gettimeofday(&pkth.ts, NULL);
261 				if (handle->fcode.bf_insns == NULL ||
262 						pcap_filter(handle->fcode.bf_insns, payload, pkth.len, pkth.caplen))
263 				{
264 					handlep->packets_read++;
265 					callback(user, &pkth, payload);
266 					count++;
267 				}
268 			}
269 
270 			if (type == NFQUEUE) {
271 				/* XXX, possible responses: NF_DROP, NF_ACCEPT, NF_STOLEN, NF_QUEUE, NF_REPEAT, NF_STOP */
272 				/* if type == NFQUEUE, handle->linktype is always != DLT_NFLOG,
273 				   so nfg is always initialized to NLMSG_DATA(nlh). */
274 				if (nfg != NULL)
275 					nfqueue_send_verdict(handle, ntohs(nfg->res_id), id, NF_ACCEPT);
276 			}
277 		}
278 
279 		msg_len = NLMSG_ALIGN(nlh->nlmsg_len);
280 		/*
281 		 * If the message length would run past the end of the
282 		 * buffer, truncate it to the remaining space in the
283 		 * buffer.
284 		 *
285 		 * To squelch warnings, we cast ep - bp to uint32_t, which
286 		 * is unsigned and is the type of msg_len; ep >= bp, and
287 		 * len should fit in 32 bits (either it's set from an int
288 		 * or it's set from a recv() call with a buffer size that's
289 		 * an int, and we're assuming either ILP32 or LP64), so
290 		 * the cast is safe.
291 		 */
292 		if (msg_len > (uint32_t)(ep - bp))
293 			msg_len = (uint32_t)(ep - bp);
294 
295 		bp += msg_len;
296 		if (count >= max_packets && !PACKET_COUNT_IS_UNLIMITED(max_packets)) {
297 			handle->bp = bp;
298 			handle->cc = (int)(ep - bp);
299 			if (handle->cc < 0)
300 				handle->cc = 0;
301 			return count;
302 		}
303 	}
304 
305 	handle->cc = 0;
306 	return count;
307 }
308 
309 static int
310 netfilter_set_datalink(pcap_t *handle, int dlt)
311 {
312 	handle->linktype = dlt;
313 	return 0;
314 }
315 
316 static int
317 netfilter_stats_linux(pcap_t *handle, struct pcap_stat *stats)
318 {
319 	struct pcap_netfilter *handlep = handle->priv;
320 
321 	stats->ps_recv = handlep->packets_read;
322 	stats->ps_drop = handlep->packets_nobufs;
323 	stats->ps_ifdrop = 0;
324 	return 0;
325 }
326 
327 static int
328 netfilter_inject_linux(pcap_t *handle, const void *buf _U_, int size _U_)
329 {
330 	snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
331 	    "Packet injection is not supported on netfilter devices");
332 	return (-1);
333 }
334 
335 struct my_nfattr {
336 	uint16_t nfa_len;
337 	uint16_t nfa_type;
338 	void *data;
339 };
340 
341 static int
342 netfilter_send_config_msg(const pcap_t *handle, uint16_t msg_type, int ack, u_int8_t family, u_int16_t res_id, const struct my_nfattr *mynfa)
343 {
344 	char buf[1024] __attribute__ ((aligned));
345 	memset(buf, 0, sizeof(buf));
346 
347 	struct nlmsghdr *nlh = (struct nlmsghdr *) buf;
348 	struct nfgenmsg *nfg = (struct nfgenmsg *) (buf + sizeof(struct nlmsghdr));
349 
350 	struct sockaddr_nl snl;
351 	static unsigned int seq_id;
352 
353 	if (!seq_id)
354 DIAG_OFF_NARROWING
355 		seq_id = time(NULL);
356 DIAG_ON_NARROWING
357 	++seq_id;
358 
359 	nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nfgenmsg));
360 	nlh->nlmsg_type = msg_type;
361 	nlh->nlmsg_flags = NLM_F_REQUEST | (ack ? NLM_F_ACK : 0);
362 	nlh->nlmsg_pid = 0;	/* to kernel */
363 	nlh->nlmsg_seq = seq_id;
364 
365 	nfg->nfgen_family = family;
366 	nfg->version = NFNETLINK_V0;
367 	nfg->res_id = htons(res_id);
368 
369 	if (mynfa) {
370 		struct nfattr *nfa = (struct nfattr *) (buf + NLMSG_ALIGN(nlh->nlmsg_len));
371 
372 		nfa->nfa_type = mynfa->nfa_type;
373 		nfa->nfa_len = NFA_LENGTH(mynfa->nfa_len);
374 		memcpy(NFA_DATA(nfa), mynfa->data, mynfa->nfa_len);
375 		nlh->nlmsg_len = NLMSG_ALIGN(nlh->nlmsg_len) + NFA_ALIGN(nfa->nfa_len);
376 	}
377 
378 	memset(&snl, 0, sizeof(snl));
379 	snl.nl_family = AF_NETLINK;
380 
381 	if (sendto(handle->fd, nlh, nlh->nlmsg_len, 0, (struct sockaddr *) &snl, sizeof(snl)) == -1)
382 		return -1;
383 
384 	if (!ack)
385 		return 0;
386 
387 	/* waiting for reply loop */
388 	do {
389 		socklen_t addrlen = sizeof(snl);
390 		int len;
391 
392 		/* ignore interrupt system call error */
393 		do {
394 			/*
395 			 * The buffer is not so big that its size won't
396 			 * fit into an int.
397 			 */
398 			len = (int)recvfrom(handle->fd, buf, sizeof(buf), 0, (struct sockaddr *) &snl, &addrlen);
399 		} while ((len == -1) && (errno == EINTR));
400 
401 		if (len <= 0)
402 			return len;
403 
404 		if (addrlen != sizeof(snl) || snl.nl_family != AF_NETLINK) {
405 			errno = EINVAL;
406 			return -1;
407 		}
408 
409 		nlh = (struct nlmsghdr *) buf;
410 		if (snl.nl_pid != 0 || seq_id != nlh->nlmsg_seq)	/* if not from kernel or wrong sequence skip */
411 			continue;
412 
413 		while ((u_int)len >= NLMSG_SPACE(0) && NLMSG_OK(nlh, (u_int)len)) {
414 			if (nlh->nlmsg_type == NLMSG_ERROR || (nlh->nlmsg_type == NLMSG_DONE && nlh->nlmsg_flags & NLM_F_MULTI)) {
415 				if (nlh->nlmsg_len < NLMSG_ALIGN(sizeof(struct nlmsgerr))) {
416 					errno = EBADMSG;
417 					return -1;
418 				}
419 				errno = -(*((int *)NLMSG_DATA(nlh)));
420 				return (errno == 0) ? 0 : -1;
421 			}
422 			nlh = NLMSG_NEXT(nlh, len);
423 		}
424 	} while (1);
425 
426 	return -1; /* never here */
427 }
428 
429 static int
430 nflog_send_config_msg(const pcap_t *handle, uint8_t family, u_int16_t group_id, const struct my_nfattr *mynfa)
431 {
432 	return netfilter_send_config_msg(handle, (NFNL_SUBSYS_ULOG << 8) | NFULNL_MSG_CONFIG, 1, family, group_id, mynfa);
433 }
434 
435 static int
436 nflog_send_config_cmd(const pcap_t *handle, uint16_t group_id, u_int8_t cmd, u_int8_t family)
437 {
438 	struct nfulnl_msg_config_cmd msg;
439 	struct my_nfattr nfa;
440 
441 	msg.command = cmd;
442 
443 	nfa.data = &msg;
444 	nfa.nfa_type = NFULA_CFG_CMD;
445 	nfa.nfa_len = sizeof(msg);
446 
447 	return nflog_send_config_msg(handle, family, group_id, &nfa);
448 }
449 
450 static int
451 nflog_send_config_mode(const pcap_t *handle, uint16_t group_id, u_int8_t copy_mode, u_int32_t copy_range)
452 {
453 	struct nfulnl_msg_config_mode msg;
454 	struct my_nfattr nfa;
455 
456 	msg.copy_range = htonl(copy_range);
457 	msg.copy_mode = copy_mode;
458 
459 	nfa.data = &msg;
460 	nfa.nfa_type = NFULA_CFG_MODE;
461 	nfa.nfa_len = sizeof(msg);
462 
463 	return nflog_send_config_msg(handle, AF_UNSPEC, group_id, &nfa);
464 }
465 
466 static int
467 nfqueue_send_verdict(const pcap_t *handle, uint16_t group_id, u_int32_t id, u_int32_t verdict)
468 {
469 	struct nfqnl_msg_verdict_hdr msg;
470 	struct my_nfattr nfa;
471 
472 	msg.id = htonl(id);
473 	msg.verdict = htonl(verdict);
474 
475 	nfa.data = &msg;
476 	nfa.nfa_type = NFQA_VERDICT_HDR;
477 	nfa.nfa_len = sizeof(msg);
478 
479 	return netfilter_send_config_msg(handle, (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_VERDICT, 0, AF_UNSPEC, group_id, &nfa);
480 }
481 
482 static int
483 nfqueue_send_config_msg(const pcap_t *handle, uint8_t family, u_int16_t group_id, const struct my_nfattr *mynfa)
484 {
485 	return netfilter_send_config_msg(handle, (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG, 1, family, group_id, mynfa);
486 }
487 
488 static int
489 nfqueue_send_config_cmd(const pcap_t *handle, uint16_t group_id, u_int8_t cmd, u_int16_t pf)
490 {
491 	struct nfqnl_msg_config_cmd msg;
492 	struct my_nfattr nfa;
493 
494 	msg.command = cmd;
495 	msg.pf = htons(pf);
496 
497 	nfa.data = &msg;
498 	nfa.nfa_type = NFQA_CFG_CMD;
499 	nfa.nfa_len = sizeof(msg);
500 
501 	return nfqueue_send_config_msg(handle, AF_UNSPEC, group_id, &nfa);
502 }
503 
504 static int
505 nfqueue_send_config_mode(const pcap_t *handle, uint16_t group_id, u_int8_t copy_mode, u_int32_t copy_range)
506 {
507 	struct nfqnl_msg_config_params msg;
508 	struct my_nfattr nfa;
509 
510 	msg.copy_range = htonl(copy_range);
511 	msg.copy_mode = copy_mode;
512 
513 	nfa.data = &msg;
514 	nfa.nfa_type = NFQA_CFG_PARAMS;
515 	nfa.nfa_len = sizeof(msg);
516 
517 	return nfqueue_send_config_msg(handle, AF_UNSPEC, group_id, &nfa);
518 }
519 
520 static int
521 netfilter_activate(pcap_t* handle)
522 {
523 	const char *dev = handle->opt.device;
524 	unsigned short groups[32];
525 	int group_count = 0;
526 	nftype_t type = OTHER;
527 	int i;
528 
529 	if (strncmp(dev, NFLOG_IFACE, strlen(NFLOG_IFACE)) == 0) {
530 		dev += strlen(NFLOG_IFACE);
531 		type = NFLOG;
532 
533 	} else if (strncmp(dev, NFQUEUE_IFACE, strlen(NFQUEUE_IFACE)) == 0) {
534 		dev += strlen(NFQUEUE_IFACE);
535 		type = NFQUEUE;
536 	}
537 
538 	if (type != OTHER && *dev == ':') {
539 		dev++;
540 		while (*dev) {
541 			long int group_id;
542 			char *end_dev;
543 
544 			if (group_count == 32) {
545 				snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
546 						"Maximum 32 netfilter groups! dev: %s",
547 						handle->opt.device);
548 				return PCAP_ERROR;
549 			}
550 
551 			group_id = strtol(dev, &end_dev, 0);
552 			if (end_dev != dev) {
553 				if (group_id < 0 || group_id > 65535) {
554 					snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
555 							"Netfilter group range from 0 to 65535 (got %ld)",
556 							group_id);
557 					return PCAP_ERROR;
558 				}
559 
560 				groups[group_count++] = (unsigned short) group_id;
561 				dev = end_dev;
562 			}
563 			if (*dev != ',')
564 				break;
565 			dev++;
566 		}
567 	}
568 
569 	if (type == OTHER || *dev) {
570 		snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
571 				"Can't get netfilter group(s) index from %s",
572 				handle->opt.device);
573 		return PCAP_ERROR;
574 	}
575 
576 	/* if no groups, add default: 0 */
577 	if (!group_count) {
578 		groups[0] = 0;
579 		group_count = 1;
580 	}
581 
582 	/*
583 	 * Turn a negative snapshot value (invalid), a snapshot value of
584 	 * 0 (unspecified), or a value bigger than the normal maximum
585 	 * value, into the maximum allowed value.
586 	 *
587 	 * If some application really *needs* a bigger snapshot
588 	 * length, we should just increase MAXIMUM_SNAPLEN.
589 	 */
590 	if (handle->snapshot <= 0 || handle->snapshot > MAXIMUM_SNAPLEN)
591 		handle->snapshot = MAXIMUM_SNAPLEN;
592 
593 	/* Initialize some components of the pcap structure. */
594 	handle->bufsize = 128 + handle->snapshot;
595 	handle->offset = 0;
596 	handle->read_op = netfilter_read_linux;
597 	handle->inject_op = netfilter_inject_linux;
598 	handle->setfilter_op = install_bpf_program; /* no kernel filtering */
599 	handle->setdirection_op = NULL;
600 	handle->set_datalink_op = netfilter_set_datalink;
601 	handle->getnonblock_op = pcap_getnonblock_fd;
602 	handle->setnonblock_op = pcap_setnonblock_fd;
603 	handle->stats_op = netfilter_stats_linux;
604 
605 	/* Create netlink socket */
606 	handle->fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_NETFILTER);
607 	if (handle->fd < 0) {
608 		pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
609 		    errno, "Can't create raw socket");
610 		return PCAP_ERROR;
611 	}
612 
613 	if (type == NFLOG) {
614 		handle->linktype = DLT_NFLOG;
615 		handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2);
616 		if (handle->dlt_list != NULL) {
617 			handle->dlt_list[0] = DLT_NFLOG;
618 			handle->dlt_list[1] = DLT_IPV4;
619 			handle->dlt_count = 2;
620 		}
621 
622 	} else
623 		handle->linktype = DLT_IPV4;
624 
625 	handle->buffer = malloc(handle->bufsize);
626 	if (!handle->buffer) {
627 		pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
628 		    errno, "Can't allocate dump buffer");
629 		goto close_fail;
630 	}
631 
632 	if (type == NFLOG) {
633 		if (nflog_send_config_cmd(handle, 0, NFULNL_CFG_CMD_PF_UNBIND, AF_INET) < 0) {
634 			pcap_fmt_errmsg_for_errno(handle->errbuf,
635 			    PCAP_ERRBUF_SIZE, errno,
636 			    "NFULNL_CFG_CMD_PF_UNBIND");
637 			goto close_fail;
638 		}
639 
640 		if (nflog_send_config_cmd(handle, 0, NFULNL_CFG_CMD_PF_BIND, AF_INET) < 0) {
641 			pcap_fmt_errmsg_for_errno(handle->errbuf,
642 			    PCAP_ERRBUF_SIZE, errno, "NFULNL_CFG_CMD_PF_BIND");
643 			goto close_fail;
644 		}
645 
646 		/* Bind socket to the nflog groups */
647 		for (i = 0; i < group_count; i++) {
648 			if (nflog_send_config_cmd(handle, groups[i], NFULNL_CFG_CMD_BIND, AF_UNSPEC) < 0) {
649 				pcap_fmt_errmsg_for_errno(handle->errbuf,
650 				    PCAP_ERRBUF_SIZE, errno,
651 				    "Can't listen on group index");
652 				goto close_fail;
653 			}
654 
655 			if (nflog_send_config_mode(handle, groups[i], NFULNL_COPY_PACKET, handle->snapshot) < 0) {
656 				pcap_fmt_errmsg_for_errno(handle->errbuf,
657 				    PCAP_ERRBUF_SIZE, errno,
658 				    "NFULNL_COPY_PACKET");
659 				goto close_fail;
660 			}
661 		}
662 
663 	} else {
664 		if (nfqueue_send_config_cmd(handle, 0, NFQNL_CFG_CMD_PF_UNBIND, AF_INET) < 0) {
665 			pcap_fmt_errmsg_for_errno(handle->errbuf,
666 			    PCAP_ERRBUF_SIZE, errno, "NFQNL_CFG_CMD_PF_UNBIND");
667 			goto close_fail;
668 		}
669 
670 		if (nfqueue_send_config_cmd(handle, 0, NFQNL_CFG_CMD_PF_BIND, AF_INET) < 0) {
671 			pcap_fmt_errmsg_for_errno(handle->errbuf,
672 			    PCAP_ERRBUF_SIZE, errno, "NFQNL_CFG_CMD_PF_BIND");
673 			goto close_fail;
674 		}
675 
676 		/* Bind socket to the nfqueue groups */
677 		for (i = 0; i < group_count; i++) {
678 			if (nfqueue_send_config_cmd(handle, groups[i], NFQNL_CFG_CMD_BIND, AF_UNSPEC) < 0) {
679 				pcap_fmt_errmsg_for_errno(handle->errbuf,
680 				    PCAP_ERRBUF_SIZE, errno,
681 				    "Can't listen on group index");
682 				goto close_fail;
683 			}
684 
685 			if (nfqueue_send_config_mode(handle, groups[i], NFQNL_COPY_PACKET, handle->snapshot) < 0) {
686 				pcap_fmt_errmsg_for_errno(handle->errbuf,
687 				    PCAP_ERRBUF_SIZE, errno,
688 				    "NFQNL_COPY_PACKET");
689 				goto close_fail;
690 			}
691 		}
692 	}
693 
694 	if (handle->opt.rfmon) {
695 		/*
696 		 * Monitor mode doesn't apply to netfilter devices.
697 		 */
698 		pcap_cleanup_live_common(handle);
699 		return PCAP_ERROR_RFMON_NOTSUP;
700 	}
701 
702 	if (handle->opt.buffer_size != 0) {
703 		/*
704 		 * Set the socket buffer size to the specified value.
705 		 */
706 		if (setsockopt(handle->fd, SOL_SOCKET, SO_RCVBUF, &handle->opt.buffer_size, sizeof(handle->opt.buffer_size)) == -1) {
707 			pcap_fmt_errmsg_for_errno(handle->errbuf,
708 			    PCAP_ERRBUF_SIZE, errno, "SO_RCVBUF");
709 			goto close_fail;
710 		}
711 	}
712 
713 	handle->selectable_fd = handle->fd;
714 	return 0;
715 
716 close_fail:
717 	pcap_cleanup_live_common(handle);
718 	return PCAP_ERROR;
719 }
720 
721 pcap_t *
722 netfilter_create(const char *device, char *ebuf, int *is_ours)
723 {
724 	const char *cp;
725 	pcap_t *p;
726 
727 	/* Does this look like an netfilter device? */
728 	cp = strrchr(device, '/');
729 	if (cp == NULL)
730 		cp = device;
731 
732 	/* Does it begin with NFLOG_IFACE or NFQUEUE_IFACE? */
733 	if (strncmp(cp, NFLOG_IFACE, sizeof NFLOG_IFACE - 1) == 0)
734 		cp += sizeof NFLOG_IFACE - 1;
735 	else if (strncmp(cp, NFQUEUE_IFACE, sizeof NFQUEUE_IFACE - 1) == 0)
736 		cp += sizeof NFQUEUE_IFACE - 1;
737 	else {
738 		/* Nope, doesn't begin with NFLOG_IFACE nor NFQUEUE_IFACE */
739 		*is_ours = 0;
740 		return NULL;
741 	}
742 
743 	/*
744 	 * Yes - is that either the end of the name, or is it followed
745 	 * by a colon?
746 	 */
747 	if (*cp != ':' && *cp != '\0') {
748 		/* Nope */
749 		*is_ours = 0;
750 		return NULL;
751 	}
752 
753 	/* OK, it's probably ours. */
754 	*is_ours = 1;
755 
756 	p = PCAP_CREATE_COMMON(ebuf, struct pcap_netfilter);
757 	if (p == NULL)
758 		return (NULL);
759 
760 	p->activate_op = netfilter_activate;
761 	return (p);
762 }
763 
764 int
765 netfilter_findalldevs(pcap_if_list_t *devlistp, char *err_str)
766 {
767 	int sock;
768 
769 	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_NETFILTER);
770 	if (sock < 0) {
771 		/* if netlink is not supported this is not fatal */
772 		if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT)
773 			return 0;
774 		pcap_fmt_errmsg_for_errno(err_str, PCAP_ERRBUF_SIZE,
775 		    errno, "Can't open netlink socket");
776 		return -1;
777 	}
778 	close(sock);
779 
780 	/*
781 	 * The notion of "connected" vs. "disconnected" doesn't apply.
782 	 * XXX - what about "up" and "running"?
783 	 */
784 	if (add_dev(devlistp, NFLOG_IFACE,
785 	    PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE,
786 	    "Linux netfilter log (NFLOG) interface", err_str) == NULL)
787 		return -1;
788 	if (add_dev(devlistp, NFQUEUE_IFACE,
789 	    PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE,
790 	    "Linux netfilter queue (NFQUEUE) interface", err_str) == NULL)
791 		return -1;
792 	return 0;
793 }
794