xref: /freebsd/contrib/libpcap/pcap-netfilter-linux.c (revision ae1a0648b05acf798816e7b83b3c10856de5c8e5)
1 /*
2  * Copyright (c) 2011 Jakub Zawadzki
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote
15  * products derived from this software without specific prior written
16  * permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <config.h>
32 
33 #include "pcap-int.h"
34 #include "diag-control.h"
35 
36 #ifdef NEED_STRERROR_H
37 #include "strerror.h"
38 #endif
39 
40 #include <errno.h>
41 #include <stdlib.h>
42 #include <unistd.h>
43 #include <string.h>
44 #include <sys/socket.h>
45 #include <arpa/inet.h>
46 
47 #include <time.h>
48 #include <sys/time.h>
49 #include <netinet/in.h>
50 #include <linux/types.h>
51 
52 #include <linux/netlink.h>
53 #include <linux/netfilter.h>
54 #include <linux/netfilter/nfnetlink.h>
55 #include <linux/netfilter/nfnetlink_log.h>
56 #include <linux/netfilter/nfnetlink_queue.h>
57 
58 /* NOTE: if your program drops privileges after pcap_activate() it WON'T work with nfqueue.
59  *       It took me quite some time to debug ;/
60  *
61  *       Sending any data to nfnetlink socket requires CAP_NET_ADMIN privileges,
62  *       and in nfqueue we need to send verdict reply after receiving packet.
63  *
64  *       In tcpdump you can disable dropping privileges with -Z root
65  */
66 
67 #include "pcap-netfilter-linux.h"
68 
69 #define HDR_LENGTH (NLMSG_LENGTH(NLMSG_ALIGN(sizeof(struct nfgenmsg))))
70 
71 #define NFLOG_IFACE "nflog"
72 #define NFQUEUE_IFACE "nfqueue"
73 
74 typedef enum { OTHER = -1, NFLOG, NFQUEUE } nftype_t;
75 
76 /*
77  * Private data for capturing on Linux netfilter sockets.
78  */
79 struct pcap_netfilter {
80 	u_int	packets_read;	/* count of packets read with recvfrom() */
81 	u_int   packets_nobufs; /* ENOBUFS counter */
82 };
83 
84 static int nfqueue_send_verdict(const pcap_t *handle, uint16_t group_id, u_int32_t id, u_int32_t verdict);
85 
86 
87 static int
88 netfilter_read_linux(pcap_t *handle, int max_packets, pcap_handler callback, u_char *user)
89 {
90 	struct pcap_netfilter *handlep = handle->priv;
91 	register u_char *bp, *ep;
92 	int count = 0;
93 	ssize_t len;
94 
95 	/*
96 	 * Has "pcap_breakloop()" been called?
97 	 */
98 	if (handle->break_loop) {
99 		/*
100 		 * Yes - clear the flag that indicates that it
101 		 * has, and return PCAP_ERROR_BREAK to indicate
102 		 * that we were told to break out of the loop.
103 		 */
104 		handle->break_loop = 0;
105 		return PCAP_ERROR_BREAK;
106 	}
107 	len = handle->cc;
108 	if (len == 0) {
109 		/*
110 		 * The buffer is empty; refill it.
111 		 *
112 		 * We ignore EINTR, as that might just be due to a signal
113 		 * being delivered - if the signal should interrupt the
114 		 * loop, the signal handler should call pcap_breakloop()
115 		 * to set handle->break_loop (we ignore it on other
116 		 * platforms as well).
117 		 */
118 		do {
119 			len = recv(handle->fd, handle->buffer, handle->bufsize, 0);
120 			if (handle->break_loop) {
121 				handle->break_loop = 0;
122 				return PCAP_ERROR_BREAK;
123 			}
124 			if (len == -1 && errno == ENOBUFS)
125 				handlep->packets_nobufs++;
126 		} while ((len == -1) && (errno == EINTR || errno == ENOBUFS));
127 
128 		if (len < 0) {
129 			pcapint_fmt_errmsg_for_errno(handle->errbuf,
130 			    PCAP_ERRBUF_SIZE, errno, "Can't receive packet");
131 			return PCAP_ERROR;
132 		}
133 
134 		bp = (unsigned char *)handle->buffer;
135 	} else
136 		bp = handle->bp;
137 
138 	/*
139 	 * Loop through each message.
140 	 *
141 	 * This assumes that a single buffer of message will have
142 	 * <= INT_MAX packets, so the message count doesn't overflow.
143 	 */
144 	ep = bp + len;
145 	while (bp < ep) {
146 		const struct nlmsghdr *nlh = (const struct nlmsghdr *) bp;
147 		uint32_t msg_len;
148 		nftype_t type = OTHER;
149 		/*
150 		 * Has "pcap_breakloop()" been called?
151 		 * If so, return immediately - if we haven't read any
152 		 * packets, clear the flag and return PCAP_ERROR_BREAK
153 		 * to indicate that we were told to break out of the loop,
154 		 * otherwise leave the flag set, so that the *next* call
155 		 * will break out of the loop without having read any
156 		 * packets, and return the number of packets we've
157 		 * processed so far.
158 		 */
159 		if (handle->break_loop) {
160 			handle->bp = bp;
161 			handle->cc = (int)(ep - bp);
162 			if (count == 0) {
163 				handle->break_loop = 0;
164 				return PCAP_ERROR_BREAK;
165 			} else
166 				return count;
167 		}
168 		/*
169 		 * NLMSG_SPACE(0) might be signed or might be unsigned,
170 		 * depending on whether the kernel defines NLMSG_ALIGNTO
171 		 * as 4, which older kernels do, or as 4U, which newer
172 		 * kernels do.
173 		 *
174 		 * ep - bp is of type ptrdiff_t, which is signed.
175 		 *
176 		 * To squelch warnings, we cast both to size_t, which
177 		 * is unsigned; ep >= bp, so the cast is safe.
178 		 */
179 		if ((size_t)(ep - bp) < (size_t)NLMSG_SPACE(0)) {
180 			/*
181 			 * There's less than one netlink message left
182 			 * in the buffer.  Give up.
183 			 */
184 			break;
185 		}
186 
187 		if (nlh->nlmsg_len < sizeof(struct nlmsghdr) || (u_int)len < nlh->nlmsg_len) {
188 			snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "Message truncated: (got: %zd) (nlmsg_len: %u)", len, nlh->nlmsg_len);
189 			return -1;
190 		}
191 
192 		if (NFNL_SUBSYS_ID(nlh->nlmsg_type) == NFNL_SUBSYS_ULOG &&
193 		    NFNL_MSG_TYPE(nlh->nlmsg_type) == NFULNL_MSG_PACKET)
194 			type = NFLOG;
195 		else if (NFNL_SUBSYS_ID(nlh->nlmsg_type) == NFNL_SUBSYS_QUEUE &&
196 		         NFNL_MSG_TYPE(nlh->nlmsg_type) == NFQNL_MSG_PACKET)
197 			type = NFQUEUE;
198 
199 		if (type != OTHER) {
200 			const unsigned char *payload = NULL;
201 			struct pcap_pkthdr pkth;
202 
203 			const struct nfgenmsg *nfg = NULL;
204 			int id = 0;
205 
206 			if (handle->linktype != DLT_NFLOG) {
207 				const struct nfattr *payload_attr = NULL;
208 
209 				if (nlh->nlmsg_len < HDR_LENGTH) {
210 					snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "Malformed message: (nlmsg_len: %u)", nlh->nlmsg_len);
211 					return -1;
212 				}
213 
214 				nfg = NLMSG_DATA(nlh);
215 				if (nlh->nlmsg_len > HDR_LENGTH) {
216 					struct nfattr *attr = NFM_NFA(nfg);
217 					int attr_len = nlh->nlmsg_len - NLMSG_ALIGN(HDR_LENGTH);
218 
219 					while (NFA_OK(attr, attr_len)) {
220 						if (type == NFQUEUE) {
221 							switch (NFA_TYPE(attr)) {
222 								case NFQA_PACKET_HDR:
223 									{
224 										const struct nfqnl_msg_packet_hdr *pkt_hdr = (const struct nfqnl_msg_packet_hdr *) NFA_DATA(attr);
225 
226 										id = ntohl(pkt_hdr->packet_id);
227 										break;
228 									}
229 								case NFQA_PAYLOAD:
230 									payload_attr = attr;
231 									break;
232 							}
233 
234 						} else if (type == NFLOG) {
235 							switch (NFA_TYPE(attr)) {
236 								case NFULA_PAYLOAD:
237 									payload_attr = attr;
238 									break;
239 							}
240 						}
241 						attr = NFA_NEXT(attr, attr_len);
242 					}
243 				}
244 
245 				if (payload_attr) {
246 					payload = NFA_DATA(payload_attr);
247 					pkth.len = pkth.caplen = NFA_PAYLOAD(payload_attr);
248 				}
249 
250 			} else {
251 				payload = NLMSG_DATA(nlh);
252 				pkth.caplen = pkth.len = nlh->nlmsg_len-NLMSG_ALIGN(sizeof(struct nlmsghdr));
253 			}
254 
255 			if (payload) {
256 				/* pkth.caplen = min (payload_len, handle->snapshot); */
257 
258 				gettimeofday(&pkth.ts, NULL);
259 				if (handle->fcode.bf_insns == NULL ||
260 						pcapint_filter(handle->fcode.bf_insns, payload, pkth.len, pkth.caplen))
261 				{
262 					handlep->packets_read++;
263 					callback(user, &pkth, payload);
264 					count++;
265 				}
266 			}
267 
268 			if (type == NFQUEUE) {
269 				/* XXX, possible responses: NF_DROP, NF_ACCEPT, NF_STOLEN, NF_QUEUE, NF_REPEAT, NF_STOP */
270 				/* if type == NFQUEUE, handle->linktype is always != DLT_NFLOG,
271 				   so nfg is always initialized to NLMSG_DATA(nlh). */
272 				if (nfg != NULL)
273 					nfqueue_send_verdict(handle, ntohs(nfg->res_id), id, NF_ACCEPT);
274 			}
275 		}
276 
277 		msg_len = NLMSG_ALIGN(nlh->nlmsg_len);
278 		/*
279 		 * If the message length would run past the end of the
280 		 * buffer, truncate it to the remaining space in the
281 		 * buffer.
282 		 *
283 		 * To squelch warnings, we cast ep - bp to uint32_t, which
284 		 * is unsigned and is the type of msg_len; ep >= bp, and
285 		 * len should fit in 32 bits (either it's set from an int
286 		 * or it's set from a recv() call with a buffer size that's
287 		 * an int, and we're assuming either ILP32 or LP64), so
288 		 * the cast is safe.
289 		 */
290 		if (msg_len > (uint32_t)(ep - bp))
291 			msg_len = (uint32_t)(ep - bp);
292 
293 		bp += msg_len;
294 		if (count >= max_packets && !PACKET_COUNT_IS_UNLIMITED(max_packets)) {
295 			handle->bp = bp;
296 			handle->cc = (int)(ep - bp);
297 			if (handle->cc < 0)
298 				handle->cc = 0;
299 			return count;
300 		}
301 	}
302 
303 	handle->cc = 0;
304 	return count;
305 }
306 
307 static int
308 netfilter_set_datalink(pcap_t *handle, int dlt)
309 {
310 	handle->linktype = dlt;
311 	return 0;
312 }
313 
314 static int
315 netfilter_stats_linux(pcap_t *handle, struct pcap_stat *stats)
316 {
317 	struct pcap_netfilter *handlep = handle->priv;
318 
319 	stats->ps_recv = handlep->packets_read;
320 	stats->ps_drop = handlep->packets_nobufs;
321 	stats->ps_ifdrop = 0;
322 	return 0;
323 }
324 
325 static int
326 netfilter_inject_linux(pcap_t *handle, const void *buf _U_, int size _U_)
327 {
328 	snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
329 	    "Packet injection is not supported on netfilter devices");
330 	return (-1);
331 }
332 
333 struct my_nfattr {
334 	uint16_t nfa_len;
335 	uint16_t nfa_type;
336 	void *data;
337 };
338 
339 static int
340 netfilter_send_config_msg(const pcap_t *handle, uint16_t msg_type, int ack, u_int8_t family, u_int16_t res_id, const struct my_nfattr *mynfa)
341 {
342 	char buf[1024] __attribute__ ((aligned));
343 	memset(buf, 0, sizeof(buf));
344 
345 	struct nlmsghdr *nlh = (struct nlmsghdr *) buf;
346 	struct nfgenmsg *nfg = (struct nfgenmsg *) (buf + sizeof(struct nlmsghdr));
347 
348 	struct sockaddr_nl snl;
349 	static unsigned int seq_id;
350 
351 	if (!seq_id)
352 DIAG_OFF_NARROWING
353 		seq_id = time(NULL);
354 DIAG_ON_NARROWING
355 	++seq_id;
356 
357 	nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nfgenmsg));
358 	nlh->nlmsg_type = msg_type;
359 	nlh->nlmsg_flags = NLM_F_REQUEST | (ack ? NLM_F_ACK : 0);
360 	nlh->nlmsg_pid = 0;	/* to kernel */
361 	nlh->nlmsg_seq = seq_id;
362 
363 	nfg->nfgen_family = family;
364 	nfg->version = NFNETLINK_V0;
365 	nfg->res_id = htons(res_id);
366 
367 	if (mynfa) {
368 		struct nfattr *nfa = (struct nfattr *) (buf + NLMSG_ALIGN(nlh->nlmsg_len));
369 
370 		nfa->nfa_type = mynfa->nfa_type;
371 		nfa->nfa_len = NFA_LENGTH(mynfa->nfa_len);
372 		memcpy(NFA_DATA(nfa), mynfa->data, mynfa->nfa_len);
373 		nlh->nlmsg_len = NLMSG_ALIGN(nlh->nlmsg_len) + NFA_ALIGN(nfa->nfa_len);
374 	}
375 
376 	memset(&snl, 0, sizeof(snl));
377 	snl.nl_family = AF_NETLINK;
378 
379 	if (sendto(handle->fd, nlh, nlh->nlmsg_len, 0, (struct sockaddr *) &snl, sizeof(snl)) == -1)
380 		return -1;
381 
382 	if (!ack)
383 		return 0;
384 
385 	/* waiting for reply loop */
386 	do {
387 		socklen_t addrlen = sizeof(snl);
388 		int len;
389 
390 		/* ignore interrupt system call error */
391 		do {
392 			/*
393 			 * The buffer is not so big that its size won't
394 			 * fit into an int.
395 			 */
396 			len = (int)recvfrom(handle->fd, buf, sizeof(buf), 0, (struct sockaddr *) &snl, &addrlen);
397 		} while ((len == -1) && (errno == EINTR));
398 
399 		if (len <= 0)
400 			return len;
401 
402 		if (addrlen != sizeof(snl) || snl.nl_family != AF_NETLINK) {
403 			errno = EINVAL;
404 			return -1;
405 		}
406 
407 		nlh = (struct nlmsghdr *) buf;
408 		if (snl.nl_pid != 0 || seq_id != nlh->nlmsg_seq)	/* if not from kernel or wrong sequence skip */
409 			continue;
410 
411 		while ((u_int)len >= NLMSG_SPACE(0) && NLMSG_OK(nlh, (u_int)len)) {
412 			if (nlh->nlmsg_type == NLMSG_ERROR || (nlh->nlmsg_type == NLMSG_DONE && nlh->nlmsg_flags & NLM_F_MULTI)) {
413 				if (nlh->nlmsg_len < NLMSG_ALIGN(sizeof(struct nlmsgerr))) {
414 					errno = EBADMSG;
415 					return -1;
416 				}
417 				errno = -(*((int *)NLMSG_DATA(nlh)));
418 				return (errno == 0) ? 0 : -1;
419 			}
420 			nlh = NLMSG_NEXT(nlh, len);
421 		}
422 	} while (1);
423 
424 	return -1; /* never here */
425 }
426 
427 static int
428 nflog_send_config_msg(const pcap_t *handle, uint8_t family, u_int16_t group_id, const struct my_nfattr *mynfa)
429 {
430 	return netfilter_send_config_msg(handle, (NFNL_SUBSYS_ULOG << 8) | NFULNL_MSG_CONFIG, 1, family, group_id, mynfa);
431 }
432 
433 static int
434 nflog_send_config_cmd(const pcap_t *handle, uint16_t group_id, u_int8_t cmd, u_int8_t family)
435 {
436 	struct nfulnl_msg_config_cmd msg;
437 	struct my_nfattr nfa;
438 
439 	msg.command = cmd;
440 
441 	nfa.data = &msg;
442 	nfa.nfa_type = NFULA_CFG_CMD;
443 	nfa.nfa_len = sizeof(msg);
444 
445 	return nflog_send_config_msg(handle, family, group_id, &nfa);
446 }
447 
448 static int
449 nflog_send_config_mode(const pcap_t *handle, uint16_t group_id, u_int8_t copy_mode, u_int32_t copy_range)
450 {
451 	struct nfulnl_msg_config_mode msg;
452 	struct my_nfattr nfa;
453 
454 	msg.copy_range = htonl(copy_range);
455 	msg.copy_mode = copy_mode;
456 
457 	nfa.data = &msg;
458 	nfa.nfa_type = NFULA_CFG_MODE;
459 	nfa.nfa_len = sizeof(msg);
460 
461 	return nflog_send_config_msg(handle, AF_UNSPEC, group_id, &nfa);
462 }
463 
464 static int
465 nfqueue_send_verdict(const pcap_t *handle, uint16_t group_id, u_int32_t id, u_int32_t verdict)
466 {
467 	struct nfqnl_msg_verdict_hdr msg;
468 	struct my_nfattr nfa;
469 
470 	msg.id = htonl(id);
471 	msg.verdict = htonl(verdict);
472 
473 	nfa.data = &msg;
474 	nfa.nfa_type = NFQA_VERDICT_HDR;
475 	nfa.nfa_len = sizeof(msg);
476 
477 	return netfilter_send_config_msg(handle, (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_VERDICT, 0, AF_UNSPEC, group_id, &nfa);
478 }
479 
480 static int
481 nfqueue_send_config_msg(const pcap_t *handle, uint8_t family, u_int16_t group_id, const struct my_nfattr *mynfa)
482 {
483 	return netfilter_send_config_msg(handle, (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG, 1, family, group_id, mynfa);
484 }
485 
486 static int
487 nfqueue_send_config_cmd(const pcap_t *handle, uint16_t group_id, u_int8_t cmd, u_int16_t pf)
488 {
489 	struct nfqnl_msg_config_cmd msg;
490 	struct my_nfattr nfa;
491 
492 	msg.command = cmd;
493 	msg.pf = htons(pf);
494 
495 	nfa.data = &msg;
496 	nfa.nfa_type = NFQA_CFG_CMD;
497 	nfa.nfa_len = sizeof(msg);
498 
499 	return nfqueue_send_config_msg(handle, AF_UNSPEC, group_id, &nfa);
500 }
501 
502 static int
503 nfqueue_send_config_mode(const pcap_t *handle, uint16_t group_id, u_int8_t copy_mode, u_int32_t copy_range)
504 {
505 	struct nfqnl_msg_config_params msg;
506 	struct my_nfattr nfa;
507 
508 	msg.copy_range = htonl(copy_range);
509 	msg.copy_mode = copy_mode;
510 
511 	nfa.data = &msg;
512 	nfa.nfa_type = NFQA_CFG_PARAMS;
513 	nfa.nfa_len = sizeof(msg);
514 
515 	return nfqueue_send_config_msg(handle, AF_UNSPEC, group_id, &nfa);
516 }
517 
518 static int
519 netfilter_activate(pcap_t* handle)
520 {
521 	const char *dev = handle->opt.device;
522 	unsigned short groups[32];
523 	int group_count = 0;
524 	nftype_t type = OTHER;
525 	int i;
526 
527 	if (strncmp(dev, NFLOG_IFACE, strlen(NFLOG_IFACE)) == 0) {
528 		dev += strlen(NFLOG_IFACE);
529 		type = NFLOG;
530 
531 	} else if (strncmp(dev, NFQUEUE_IFACE, strlen(NFQUEUE_IFACE)) == 0) {
532 		dev += strlen(NFQUEUE_IFACE);
533 		type = NFQUEUE;
534 	}
535 
536 	if (type != OTHER && *dev == ':') {
537 		dev++;
538 		while (*dev) {
539 			long int group_id;
540 			char *end_dev;
541 
542 			if (group_count == 32) {
543 				snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
544 						"Maximum 32 netfilter groups! dev: %s",
545 						handle->opt.device);
546 				return PCAP_ERROR;
547 			}
548 
549 			group_id = strtol(dev, &end_dev, 0);
550 			if (end_dev != dev) {
551 				if (group_id < 0 || group_id > 65535) {
552 					snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
553 							"Netfilter group range from 0 to 65535 (got %ld)",
554 							group_id);
555 					return PCAP_ERROR;
556 				}
557 
558 				groups[group_count++] = (unsigned short) group_id;
559 				dev = end_dev;
560 			}
561 			if (*dev != ',')
562 				break;
563 			dev++;
564 		}
565 	}
566 
567 	if (type == OTHER || *dev) {
568 		snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
569 				"Can't get netfilter group(s) index from %s",
570 				handle->opt.device);
571 		return PCAP_ERROR;
572 	}
573 
574 	/* if no groups, add default: 0 */
575 	if (!group_count) {
576 		groups[0] = 0;
577 		group_count = 1;
578 	}
579 
580 	/*
581 	 * Turn a negative snapshot value (invalid), a snapshot value of
582 	 * 0 (unspecified), or a value bigger than the normal maximum
583 	 * value, into the maximum allowed value.
584 	 *
585 	 * If some application really *needs* a bigger snapshot
586 	 * length, we should just increase MAXIMUM_SNAPLEN.
587 	 */
588 	if (handle->snapshot <= 0 || handle->snapshot > MAXIMUM_SNAPLEN)
589 		handle->snapshot = MAXIMUM_SNAPLEN;
590 
591 	/* Initialize some components of the pcap structure. */
592 	handle->bufsize = 128 + handle->snapshot;
593 	handle->offset = 0;
594 	handle->read_op = netfilter_read_linux;
595 	handle->inject_op = netfilter_inject_linux;
596 	handle->setfilter_op = pcapint_install_bpf_program; /* no kernel filtering */
597 	handle->setdirection_op = NULL;
598 	handle->set_datalink_op = netfilter_set_datalink;
599 	handle->getnonblock_op = pcapint_getnonblock_fd;
600 	handle->setnonblock_op = pcapint_setnonblock_fd;
601 	handle->stats_op = netfilter_stats_linux;
602 
603 	/* Create netlink socket */
604 	handle->fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_NETFILTER);
605 	if (handle->fd < 0) {
606 		pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
607 		    errno, "Can't create raw socket");
608 		return PCAP_ERROR;
609 	}
610 
611 	if (type == NFLOG) {
612 		handle->linktype = DLT_NFLOG;
613 		handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2);
614 		if (handle->dlt_list == NULL) {
615 			pcapint_fmt_errmsg_for_errno(handle->errbuf,
616 			    PCAP_ERRBUF_SIZE, errno,
617 			    "Can't allocate DLT list");
618 			goto close_fail;
619 		}
620 		handle->dlt_list[0] = DLT_NFLOG;
621 		handle->dlt_list[1] = DLT_IPV4;
622 		handle->dlt_count = 2;
623 	} else
624 		handle->linktype = DLT_IPV4;
625 
626 	handle->buffer = malloc(handle->bufsize);
627 	if (!handle->buffer) {
628 		pcapint_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
629 		    errno, "Can't allocate dump buffer");
630 		goto close_fail;
631 	}
632 
633 	if (type == NFLOG) {
634 		if (nflog_send_config_cmd(handle, 0, NFULNL_CFG_CMD_PF_UNBIND, AF_INET) < 0) {
635 			pcapint_fmt_errmsg_for_errno(handle->errbuf,
636 			    PCAP_ERRBUF_SIZE, errno,
637 			    "NFULNL_CFG_CMD_PF_UNBIND");
638 			goto close_fail;
639 		}
640 
641 		if (nflog_send_config_cmd(handle, 0, NFULNL_CFG_CMD_PF_BIND, AF_INET) < 0) {
642 			pcapint_fmt_errmsg_for_errno(handle->errbuf,
643 			    PCAP_ERRBUF_SIZE, errno, "NFULNL_CFG_CMD_PF_BIND");
644 			goto close_fail;
645 		}
646 
647 		/* Bind socket to the nflog groups */
648 		for (i = 0; i < group_count; i++) {
649 			if (nflog_send_config_cmd(handle, groups[i], NFULNL_CFG_CMD_BIND, AF_UNSPEC) < 0) {
650 				pcapint_fmt_errmsg_for_errno(handle->errbuf,
651 				    PCAP_ERRBUF_SIZE, errno,
652 				    "Can't listen on group index");
653 				goto close_fail;
654 			}
655 
656 			if (nflog_send_config_mode(handle, groups[i], NFULNL_COPY_PACKET, handle->snapshot) < 0) {
657 				pcapint_fmt_errmsg_for_errno(handle->errbuf,
658 				    PCAP_ERRBUF_SIZE, errno,
659 				    "NFULNL_COPY_PACKET");
660 				goto close_fail;
661 			}
662 		}
663 
664 	} else {
665 		if (nfqueue_send_config_cmd(handle, 0, NFQNL_CFG_CMD_PF_UNBIND, AF_INET) < 0) {
666 			pcapint_fmt_errmsg_for_errno(handle->errbuf,
667 			    PCAP_ERRBUF_SIZE, errno, "NFQNL_CFG_CMD_PF_UNBIND");
668 			goto close_fail;
669 		}
670 
671 		if (nfqueue_send_config_cmd(handle, 0, NFQNL_CFG_CMD_PF_BIND, AF_INET) < 0) {
672 			pcapint_fmt_errmsg_for_errno(handle->errbuf,
673 			    PCAP_ERRBUF_SIZE, errno, "NFQNL_CFG_CMD_PF_BIND");
674 			goto close_fail;
675 		}
676 
677 		/* Bind socket to the nfqueue groups */
678 		for (i = 0; i < group_count; i++) {
679 			if (nfqueue_send_config_cmd(handle, groups[i], NFQNL_CFG_CMD_BIND, AF_UNSPEC) < 0) {
680 				pcapint_fmt_errmsg_for_errno(handle->errbuf,
681 				    PCAP_ERRBUF_SIZE, errno,
682 				    "Can't listen on group index");
683 				goto close_fail;
684 			}
685 
686 			if (nfqueue_send_config_mode(handle, groups[i], NFQNL_COPY_PACKET, handle->snapshot) < 0) {
687 				pcapint_fmt_errmsg_for_errno(handle->errbuf,
688 				    PCAP_ERRBUF_SIZE, errno,
689 				    "NFQNL_COPY_PACKET");
690 				goto close_fail;
691 			}
692 		}
693 	}
694 
695 	if (handle->opt.rfmon) {
696 		/*
697 		 * Monitor mode doesn't apply to netfilter devices.
698 		 */
699 		pcapint_cleanup_live_common(handle);
700 		return PCAP_ERROR_RFMON_NOTSUP;
701 	}
702 
703 	if (handle->opt.buffer_size != 0) {
704 		/*
705 		 * Set the socket buffer size to the specified value.
706 		 */
707 		if (setsockopt(handle->fd, SOL_SOCKET, SO_RCVBUF, &handle->opt.buffer_size, sizeof(handle->opt.buffer_size)) == -1) {
708 			pcapint_fmt_errmsg_for_errno(handle->errbuf,
709 			    PCAP_ERRBUF_SIZE, errno, "SO_RCVBUF");
710 			goto close_fail;
711 		}
712 	}
713 
714 	handle->selectable_fd = handle->fd;
715 	return 0;
716 
717 close_fail:
718 	pcapint_cleanup_live_common(handle);
719 	return PCAP_ERROR;
720 }
721 
722 pcap_t *
723 netfilter_create(const char *device, char *ebuf, int *is_ours)
724 {
725 	const char *cp;
726 	pcap_t *p;
727 
728 	/* Does this look like an netfilter device? */
729 	cp = strrchr(device, '/');
730 	if (cp == NULL)
731 		cp = device;
732 
733 	/* Does it begin with NFLOG_IFACE or NFQUEUE_IFACE? */
734 	if (strncmp(cp, NFLOG_IFACE, sizeof NFLOG_IFACE - 1) == 0)
735 		cp += sizeof NFLOG_IFACE - 1;
736 	else if (strncmp(cp, NFQUEUE_IFACE, sizeof NFQUEUE_IFACE - 1) == 0)
737 		cp += sizeof NFQUEUE_IFACE - 1;
738 	else {
739 		/* Nope, doesn't begin with NFLOG_IFACE nor NFQUEUE_IFACE */
740 		*is_ours = 0;
741 		return NULL;
742 	}
743 
744 	/*
745 	 * Yes - is that either the end of the name, or is it followed
746 	 * by a colon?
747 	 */
748 	if (*cp != ':' && *cp != '\0') {
749 		/* Nope */
750 		*is_ours = 0;
751 		return NULL;
752 	}
753 
754 	/* OK, it's probably ours. */
755 	*is_ours = 1;
756 
757 	p = PCAP_CREATE_COMMON(ebuf, struct pcap_netfilter);
758 	if (p == NULL)
759 		return (NULL);
760 
761 	p->activate_op = netfilter_activate;
762 	return (p);
763 }
764 
765 int
766 netfilter_findalldevs(pcap_if_list_t *devlistp, char *err_str)
767 {
768 	int sock;
769 
770 	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_NETFILTER);
771 	if (sock < 0) {
772 		/* if netlink is not supported this is not fatal */
773 		if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT)
774 			return 0;
775 		pcapint_fmt_errmsg_for_errno(err_str, PCAP_ERRBUF_SIZE,
776 		    errno, "Can't open netlink socket");
777 		return -1;
778 	}
779 	close(sock);
780 
781 	/*
782 	 * The notion of "connected" vs. "disconnected" doesn't apply.
783 	 * XXX - what about "up" and "running"?
784 	 */
785 	if (pcapint_add_dev(devlistp, NFLOG_IFACE,
786 	    PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE,
787 	    "Linux netfilter log (NFLOG) interface", err_str) == NULL)
788 		return -1;
789 	if (pcapint_add_dev(devlistp, NFQUEUE_IFACE,
790 	    PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE,
791 	    "Linux netfilter queue (NFQUEUE) interface", err_str) == NULL)
792 		return -1;
793 	return 0;
794 }
795