xref: /titanic_51/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_pf.c (revision 980a6e61aeb2038ab2b640d7ac80b36cf5c7d84b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"	/* SunOS	*/
27 
28 #include <stdio.h>
29 #include <stddef.h>
30 #include <ctype.h>
31 #include <string.h>
32 #include <fcntl.h>
33 #include <string.h>
34 #include <sys/types.h>
35 #include <sys/time.h>
36 #include <sys/isa_defs.h>
37 
38 #include <sys/socket.h>
39 #include <sys/vlan.h>
40 #include <net/if.h>
41 #include <netinet/in_systm.h>
42 #include <netinet/in.h>
43 #include <netinet/ip.h>
44 #include <netinet/if_ether.h>
45 #include <netinet/tcp.h>
46 #include <netinet/udp.h>
47 #include <netdb.h>
48 #include <rpc/rpc.h>
49 #include <setjmp.h>
50 
51 #include <sys/pfmod.h>
52 #include "snoop.h"
53 #include "snoop_vlan.h"
54 
55 /*
56  * This module generates code for the kernel packet filter.
57  * The kernel packet filter is more efficient since it
58  * operates without context switching or moving data into
59  * the capture buffer.  On the other hand, it is limited
60  * in its filtering ability i.e. can't cope with variable
61  * length headers, can't compare the packet size, 1 and 4 octet
62  * comparisons are awkward, code space is limited to ENMAXFILTERS
63  * halfwords, etc.
64  * The parser is the same for the user-level packet filter though
65  * more limited in the variety of expressions it can generate
66  * code for.  If the pf compiler finds an expression it can't
67  * handle, it tries to set up a split filter in kernel and do the
68  * remaining filtering in userland. If that also fails, it resorts
69  * to userland filter. (See additional comment in pf_compile)
70  */
71 
72 extern struct Pf_ext_packetfilt pf;
73 static ushort_t *pfp;
74 jmp_buf env;
75 
76 int eaddr;	/* need ethernet addr */
77 
78 int opstack;	/* operand stack depth */
79 
80 #define	EQ(val)		(strcmp(token, val) == 0)
81 #define	IPV4_ONLY	0
82 #define	IPV6_ONLY	1
83 #define	IPV4_AND_IPV6	2
84 
85 /*
86  * The following constants represent the offsets in bytes from the beginning
87  * of the packet of the link and IP(v6) layer source/destination/type fields,
88  * initialized for Ethernet. Media specific code can set any unavailable
89  * link layer property's offset to -1 to indicate that the property's value
90  * is not available from the frame.
91  */
92 static int link_header_len = 14, link_type_offset = 12;
93 static int link_dest_offset = 0, link_src_offset = 6;
94 static int link_addr_len = 6;
95 
96 #define	IPV4_SRCADDR_OFFSET	(link_header_len + 12)
97 #define	IPV4_DSTADDR_OFFSET	(link_header_len + 16)
98 #define	IPV6_SRCADDR_OFFSET	(link_header_len + 8)
99 #define	IPV6_DSTADDR_OFFSET	(link_header_len + 24)
100 
101 static int inBrace = 0, inBraceOR = 0;
102 static int foundOR = 0;
103 char *tkp, *sav_tkp;
104 char *token;
105 enum { EOL, ALPHA, NUMBER, FIELD, ADDR_IP, ADDR_ETHER, SPECIAL,
106 	ADDR_IP6 } tokentype;
107 uint_t tokenval;
108 
109 enum direction { ANY, TO, FROM };
110 enum direction dir;
111 
112 extern void next();
113 
114 static void pf_expression();
115 static void pf_check_vlan_tag(uint_t offset);
116 static void pf_clear_offset_register();
117 static void pf_emit_load_offset(uint_t offset);
118 static void pf_match_ethertype(uint_t ethertype);
119 static void pf_check_transport_protocol(uint_t transport_protocol);
120 static void pf_compare_value_mask_generic(int offset, uint_t len,
121     uint_t val, int mask, uint_t op);
122 
123 /*
124  * This pointer points to the function that last generated
125  * instructions to change the offset register.  It's used
126  * for comparisons to see if we need to issue more instructions
127  * to change the register.
128  *
129  * It's initialized to pf_clear_offset_register because the offset
130  * register in pfmod is initialized to zero, similar to the state
131  * it would be in after executing the instructions issued by
132  * pf_clear_offset_register.
133  */
134 static void *last_offset_operation = (void*)pf_clear_offset_register;
135 
136 static void
137 pf_emit(x)
138 	ushort_t x;
139 {
140 	if (pfp > &pf.Pf_Filter[PF_MAXFILTERS - 1])
141 		longjmp(env, 1);
142 	*pfp++ = x;
143 }
144 
145 static void
146 pf_codeprint(code, len)
147 	ushort_t *code;
148 	int len;
149 {
150 	ushort_t *pc;
151 	ushort_t *plast = code + len;
152 	int op, action;
153 
154 	if (len > 0) {
155 		printf("Kernel Filter:\n");
156 	}
157 
158 	for (pc = code; pc < plast; pc++) {
159 		printf("\t%3d: ", pc - code);
160 
161 		op = *pc & 0xfc00;	/* high 10 bits */
162 		action = *pc & 0x3ff;	/* low   6 bits */
163 
164 		switch (action) {
165 		case ENF_PUSHLIT:
166 			printf("PUSHLIT ");
167 			break;
168 		case ENF_PUSHZERO:
169 			printf("PUSHZERO ");
170 			break;
171 #ifdef ENF_PUSHONE
172 		case ENF_PUSHONE:
173 			printf("PUSHONE ");
174 			break;
175 #endif
176 #ifdef ENF_PUSHFFFF
177 		case ENF_PUSHFFFF:
178 			printf("PUSHFFFF ");
179 			break;
180 #endif
181 #ifdef ENF_PUSHFF00
182 		case ENF_PUSHFF00:
183 			printf("PUSHFF00 ");
184 			break;
185 #endif
186 #ifdef ENF_PUSH00FF
187 		case ENF_PUSH00FF:
188 			printf("PUSH00FF ");
189 			break;
190 #endif
191 		case ENF_LOAD_OFFSET:
192 			printf("LOAD_OFFSET ");
193 			break;
194 		case ENF_BRTR:
195 			printf("BRTR ");
196 			break;
197 		case ENF_BRFL:
198 			printf("BRFL ");
199 			break;
200 		case ENF_POP:
201 			printf("POP ");
202 			break;
203 		}
204 
205 		if (action >= ENF_PUSHWORD)
206 			printf("PUSHWORD %d ", action - ENF_PUSHWORD);
207 
208 		switch (op) {
209 		case ENF_EQ:
210 			printf("EQ ");
211 			break;
212 		case ENF_LT:
213 			printf("LT ");
214 			break;
215 		case ENF_LE:
216 			printf("LE ");
217 			break;
218 		case ENF_GT:
219 			printf("GT ");
220 			break;
221 		case ENF_GE:
222 			printf("GE ");
223 			break;
224 		case ENF_AND:
225 			printf("AND ");
226 			break;
227 		case ENF_OR:
228 			printf("OR ");
229 			break;
230 		case ENF_XOR:
231 			printf("XOR ");
232 			break;
233 		case ENF_COR:
234 			printf("COR ");
235 			break;
236 		case ENF_CAND:
237 			printf("CAND ");
238 			break;
239 		case ENF_CNOR:
240 			printf("CNOR ");
241 			break;
242 		case ENF_CNAND:
243 			printf("CNAND ");
244 			break;
245 		case ENF_NEQ:
246 			printf("NEQ ");
247 			break;
248 		}
249 
250 		if (action == ENF_PUSHLIT ||
251 		    action == ENF_LOAD_OFFSET ||
252 		    action == ENF_BRTR ||
253 		    action == ENF_BRFL) {
254 			pc++;
255 			printf("\n\t%3d:   %d (0x%04x)", pc - code, *pc, *pc);
256 		}
257 
258 		printf("\n");
259 	}
260 }
261 
262 /*
263  * Emit packet filter code to check a
264  * field in the packet for a particular value.
265  * Need different code for each field size.
266  * Since the pf can only compare 16 bit quantities
267  * we have to use masking to compare byte values.
268  * Long word (32 bit) quantities have to be done
269  * as two 16 bit comparisons.
270  */
271 static void
272 pf_compare_value(int offset, uint_t len, uint_t val)
273 {
274 	/*
275 	 * If the property being filtered on is absent in the media
276 	 * packet, error out.
277 	 */
278 	if (offset == -1)
279 		pr_err("filter option unsupported on media");
280 
281 	switch (len) {
282 	case 1:
283 		pf_emit(ENF_PUSHWORD + offset / 2);
284 #if defined(_BIG_ENDIAN)
285 		if (offset % 2)
286 #else
287 		if (!(offset % 2))
288 #endif
289 		{
290 #ifdef ENF_PUSH00FF
291 			pf_emit(ENF_PUSH00FF | ENF_AND);
292 #else
293 			pf_emit(ENF_PUSHLIT | ENF_AND);
294 			pf_emit(0x00FF);
295 #endif
296 			pf_emit(ENF_PUSHLIT | ENF_EQ);
297 			pf_emit(val);
298 		} else {
299 #ifdef ENF_PUSHFF00
300 			pf_emit(ENF_PUSHFF00 | ENF_AND);
301 #else
302 			pf_emit(ENF_PUSHLIT | ENF_AND);
303 			pf_emit(0xFF00);
304 #endif
305 			pf_emit(ENF_PUSHLIT | ENF_EQ);
306 			pf_emit(val << 8);
307 		}
308 		break;
309 
310 	case 2:
311 		pf_emit(ENF_PUSHWORD + offset / 2);
312 		pf_emit(ENF_PUSHLIT | ENF_EQ);
313 		pf_emit((ushort_t)val);
314 		break;
315 
316 	case 4:
317 		pf_emit(ENF_PUSHWORD + offset / 2);
318 		pf_emit(ENF_PUSHLIT | ENF_EQ);
319 #if defined(_BIG_ENDIAN)
320 		pf_emit(val >> 16);
321 #elif defined(_LITTLE_ENDIAN)
322 		pf_emit(val & 0xffff);
323 #else
324 #error One of _BIG_ENDIAN and _LITTLE_ENDIAN must be defined
325 #endif
326 		pf_emit(ENF_PUSHWORD + (offset / 2) + 1);
327 		pf_emit(ENF_PUSHLIT | ENF_EQ);
328 #if defined(_BIG_ENDIAN)
329 		pf_emit(val & 0xffff);
330 #else
331 		pf_emit(val >> 16);
332 #endif
333 		pf_emit(ENF_AND);
334 		break;
335 	}
336 }
337 
338 /*
339  * same as pf_compare_value, but only for emiting code to
340  * compare ipv6 addresses.
341  */
342 static void
343 pf_compare_value_v6(int offset, uint_t len, struct in6_addr val)
344 {
345 	int i;
346 
347 	for (i = 0; i < len; i += 2) {
348 		pf_emit(ENF_PUSHWORD + offset / 2 + i / 2);
349 		pf_emit(ENF_PUSHLIT | ENF_EQ);
350 		pf_emit(*(uint16_t *)&val.s6_addr[i]);
351 		if (i != 0)
352 			pf_emit(ENF_AND);
353 	}
354 }
355 
356 
357 /*
358  * Same as above except mask the field value
359  * before doing the comparison.  The comparison checks
360  * to make sure the values are equal.
361  */
362 static void
363 pf_compare_value_mask(int offset, uint_t len, uint_t val, int mask)
364 {
365 	pf_compare_value_mask_generic(offset, len, val, mask, ENF_EQ);
366 }
367 
368 /*
369  * Same as above except the values are compared to see if they are not
370  * equal.
371  */
372 static void
373 pf_compare_value_mask_neq(int offset, uint_t len, uint_t val, int mask)
374 {
375 	pf_compare_value_mask_generic(offset, len, val, mask, ENF_NEQ);
376 }
377 
378 /*
379  * Similar to pf_compare_value.
380  *
381  * This is the utility function that does the actual work to compare
382  * two values using a mask.  The comparison operation is passed into
383  * the function.
384  */
385 static void
386 pf_compare_value_mask_generic(int offset, uint_t len, uint_t val, int mask,
387     uint_t op)
388 {
389 	/*
390 	 * If the property being filtered on is absent in the media
391 	 * packet, error out.
392 	 */
393 	if (offset == -1)
394 		pr_err("filter option unsupported on media");
395 
396 	switch (len) {
397 	case 1:
398 		pf_emit(ENF_PUSHWORD + offset / 2);
399 #if defined(_BIG_ENDIAN)
400 		if (offset % 2)
401 #else
402 		if (!offset % 2)
403 #endif
404 		{
405 			pf_emit(ENF_PUSHLIT | ENF_AND);
406 			pf_emit(mask & 0x00ff);
407 			pf_emit(ENF_PUSHLIT | op);
408 			pf_emit(val);
409 		} else {
410 			pf_emit(ENF_PUSHLIT | ENF_AND);
411 			pf_emit((mask << 8) & 0xff00);
412 			pf_emit(ENF_PUSHLIT | op);
413 			pf_emit(val << 8);
414 		}
415 		break;
416 
417 	case 2:
418 		pf_emit(ENF_PUSHWORD + offset / 2);
419 		pf_emit(ENF_PUSHLIT | ENF_AND);
420 		pf_emit(htons((ushort_t)mask));
421 		pf_emit(ENF_PUSHLIT | op);
422 		pf_emit(htons((ushort_t)val));
423 		break;
424 
425 	case 4:
426 		pf_emit(ENF_PUSHWORD + offset / 2);
427 		pf_emit(ENF_PUSHLIT | ENF_AND);
428 		pf_emit(htons((ushort_t)((mask >> 16) & 0xffff)));
429 		pf_emit(ENF_PUSHLIT | op);
430 		pf_emit(htons((ushort_t)((val >> 16) & 0xffff)));
431 
432 		pf_emit(ENF_PUSHWORD + (offset / 2) + 1);
433 		pf_emit(ENF_PUSHLIT | ENF_AND);
434 		pf_emit(htons((ushort_t)(mask & 0xffff)));
435 		pf_emit(ENF_PUSHLIT | op);
436 		pf_emit(htons((ushort_t)(val & 0xffff)));
437 
438 		pf_emit(ENF_AND);
439 		break;
440 	}
441 }
442 
443 /*
444  * Generate pf code to match an IPv4 or IPv6 address.
445  */
446 static void
447 pf_ipaddr_match(which, hostname, inet_type)
448 	enum direction which;
449 	char *hostname;
450 	int inet_type;
451 {
452 	bool_t found_host;
453 	uint_t *addr4ptr;
454 	uint_t addr4;
455 	struct in6_addr *addr6ptr;
456 	int h_addr_index;
457 	struct hostent *hp = NULL;
458 	int error_num = 0;
459 	boolean_t first = B_TRUE;
460 	int pass = 0;
461 
462 	/*
463 	 * The addr4offset and addr6offset variables simplify the code which
464 	 * generates the address comparison filter.  With these two variables,
465 	 * duplicate code need not exist for the TO and FROM case.
466 	 * A value of -1 describes the ANY case (TO and FROM).
467 	 */
468 	int addr4offset;
469 	int addr6offset;
470 
471 	found_host = 0;
472 
473 	if (tokentype == ADDR_IP) {
474 		hp = getipnodebyname(hostname, AF_INET, 0, &error_num);
475 		if (hp == NULL) {
476 			if (error_num == TRY_AGAIN) {
477 				pr_err("could not resolve %s (try again later)",
478 				    hostname);
479 			} else {
480 				pr_err("could not resolve %s", hostname);
481 			}
482 		}
483 		inet_type = IPV4_ONLY;
484 	} else if (tokentype == ADDR_IP6) {
485 		hp = getipnodebyname(hostname, AF_INET6, 0, &error_num);
486 		if (hp == NULL) {
487 			if (error_num == TRY_AGAIN) {
488 				pr_err("could not resolve %s (try again later)",
489 				    hostname);
490 			} else {
491 				pr_err("could not resolve %s", hostname);
492 			}
493 		}
494 		inet_type = IPV6_ONLY;
495 	} else if (tokentype == ALPHA) {
496 		/* Some hostname i.e. tokentype is ALPHA */
497 		switch (inet_type) {
498 		case IPV4_ONLY:
499 			/* Only IPv4 address is needed */
500 			hp = getipnodebyname(hostname, AF_INET, 0, &error_num);
501 			if (hp != NULL) {
502 				found_host = 1;
503 			}
504 			break;
505 		case IPV6_ONLY:
506 			/* Only IPv6 address is needed */
507 			hp = getipnodebyname(hostname, AF_INET6, 0, &error_num);
508 			if (hp != NULL) {
509 				found_host = 1;
510 			}
511 			break;
512 		case IPV4_AND_IPV6:
513 			/* Both IPv4 and IPv6 are needed */
514 			hp = getipnodebyname(hostname, AF_INET6,
515 			    AI_ALL | AI_V4MAPPED, &error_num);
516 			if (hp != NULL) {
517 				found_host = 1;
518 			}
519 			break;
520 		default:
521 			found_host = 0;
522 		}
523 
524 		if (!found_host) {
525 			if (error_num == TRY_AGAIN) {
526 				pr_err("could not resolve %s (try again later)",
527 				    hostname);
528 			} else {
529 				pr_err("could not resolve %s", hostname);
530 			}
531 		}
532 	} else {
533 		pr_err("unknown token type: %s", hostname);
534 	}
535 
536 	switch (which) {
537 	case TO:
538 		addr4offset = IPV4_DSTADDR_OFFSET;
539 		addr6offset = IPV6_DSTADDR_OFFSET;
540 		break;
541 	case FROM:
542 		addr4offset = IPV4_SRCADDR_OFFSET;
543 		addr6offset = IPV6_SRCADDR_OFFSET;
544 		break;
545 	case ANY:
546 		addr4offset = -1;
547 		addr6offset = -1;
548 		break;
549 	}
550 
551 	if (hp != NULL && hp->h_addrtype == AF_INET) {
552 		pf_match_ethertype(ETHERTYPE_IP);
553 		pf_check_vlan_tag(ENCAP_ETHERTYPE_OFF/2);
554 		h_addr_index = 0;
555 		addr4ptr = (uint_t *)hp->h_addr_list[h_addr_index];
556 		while (addr4ptr != NULL) {
557 			if (addr4offset == -1) {
558 				pf_compare_value(IPV4_SRCADDR_OFFSET, 4,
559 				    *addr4ptr);
560 				if (h_addr_index != 0)
561 					pf_emit(ENF_OR);
562 				pf_compare_value(IPV4_DSTADDR_OFFSET, 4,
563 				    *addr4ptr);
564 				pf_emit(ENF_OR);
565 			} else {
566 				pf_compare_value(addr4offset, 4,
567 				    *addr4ptr);
568 				if (h_addr_index != 0)
569 					pf_emit(ENF_OR);
570 			}
571 			addr4ptr = (uint_t *)hp->h_addr_list[++h_addr_index];
572 		}
573 		pf_emit(ENF_AND);
574 	} else {
575 		/* first pass: IPv4 addresses */
576 		h_addr_index = 0;
577 		addr6ptr = (struct in6_addr *)hp->h_addr_list[h_addr_index];
578 		first = B_TRUE;
579 		while (addr6ptr != NULL) {
580 			if (IN6_IS_ADDR_V4MAPPED(addr6ptr)) {
581 				if (first) {
582 					pf_match_ethertype(ETHERTYPE_IP);
583 					pf_check_vlan_tag(
584 					    ENCAP_ETHERTYPE_OFF/2);
585 					pass++;
586 				}
587 				IN6_V4MAPPED_TO_INADDR(addr6ptr,
588 				    (struct in_addr *)&addr4);
589 				if (addr4offset == -1) {
590 					pf_compare_value(IPV4_SRCADDR_OFFSET, 4,
591 					    addr4);
592 					if (!first)
593 						pf_emit(ENF_OR);
594 					pf_compare_value(IPV4_DSTADDR_OFFSET, 4,
595 					    addr4);
596 					pf_emit(ENF_OR);
597 				} else {
598 					pf_compare_value(addr4offset, 4,
599 					    addr4);
600 					if (!first)
601 						pf_emit(ENF_OR);
602 				}
603 				if (first)
604 					first = B_FALSE;
605 			}
606 			addr6ptr = (struct in6_addr *)
607 				hp->h_addr_list[++h_addr_index];
608 		}
609 		if (!first) {
610 			pf_emit(ENF_AND);
611 		}
612 		/* second pass: IPv6 addresses */
613 		h_addr_index = 0;
614 		addr6ptr = (struct in6_addr *)hp->h_addr_list[h_addr_index];
615 		first = B_TRUE;
616 		while (addr6ptr != NULL) {
617 			if (!IN6_IS_ADDR_V4MAPPED(addr6ptr)) {
618 				if (first) {
619 					pf_match_ethertype(ETHERTYPE_IPV6);
620 					pf_check_vlan_tag(
621 					    ENCAP_ETHERTYPE_OFF/2);
622 					pass++;
623 				}
624 				if (addr6offset == -1) {
625 					pf_compare_value_v6(IPV6_SRCADDR_OFFSET,
626 					    16, *addr6ptr);
627 					if (!first)
628 						pf_emit(ENF_OR);
629 					pf_compare_value_v6(IPV6_DSTADDR_OFFSET,
630 					    16, *addr6ptr);
631 					pf_emit(ENF_OR);
632 				} else {
633 					pf_compare_value_v6(addr6offset, 16,
634 					    *addr6ptr);
635 					if (!first)
636 						pf_emit(ENF_OR);
637 				}
638 				if (first)
639 					first = B_FALSE;
640 			}
641 			addr6ptr = (struct in6_addr *)
642 				hp->h_addr_list[++h_addr_index];
643 		}
644 		if (!first) {
645 			pf_emit(ENF_AND);
646 		}
647 		if (pass == 2) {
648 			pf_emit(ENF_OR);
649 		}
650 	}
651 
652 	if (hp != NULL) {
653 		freehostent(hp);
654 	}
655 }
656 
657 
658 static void
659 pf_compare_address(int offset, uint_t len, uchar_t *addr)
660 {
661 	uint32_t val;
662 	uint16_t sval;
663 	boolean_t didone = B_FALSE;
664 
665 	/*
666 	 * If the property being filtered on is absent in the media
667 	 * packet, error out.
668 	 */
669 	if (offset == -1)
670 		pr_err("filter option unsupported on media");
671 
672 	while (len > 0) {
673 		if (len >= 4) {
674 			(void) memcpy(&val, addr, 4);
675 			pf_compare_value(offset, 4, val);
676 			addr += 4;
677 			offset += 4;
678 			len -= 4;
679 		} else if (len >= 2) {
680 			(void) memcpy(&sval, addr, 2);
681 			pf_compare_value(offset, 2, sval);
682 			addr += 2;
683 			offset += 2;
684 			len -= 2;
685 		} else {
686 			pf_compare_value(offset++, 1, *addr++);
687 			len--;
688 		}
689 		if (didone)
690 			pf_emit(ENF_AND);
691 		didone = B_TRUE;
692 	}
693 }
694 
695 /*
696  * Compare ethernet addresses.
697  */
698 static void
699 pf_etheraddr_match(which, hostname)
700 	enum direction which;
701 	char *hostname;
702 {
703 	struct ether_addr e, *ep = NULL;
704 
705 	if (isxdigit(*hostname))
706 		ep = ether_aton(hostname);
707 	if (ep == NULL) {
708 		if (ether_hostton(hostname, &e))
709 			if (!arp_for_ether(hostname, &e))
710 				pr_err("cannot obtain ether addr for %s",
711 					hostname);
712 		ep = &e;
713 	}
714 
715 	pf_clear_offset_register();
716 
717 	switch (which) {
718 	case TO:
719 		pf_compare_address(link_dest_offset, link_addr_len,
720 		    (uchar_t *)ep);
721 		break;
722 	case FROM:
723 		pf_compare_address(link_src_offset, link_addr_len,
724 		    (uchar_t *)ep);
725 		break;
726 	case ANY:
727 		pf_compare_address(link_dest_offset, link_addr_len,
728 		    (uchar_t *)ep);
729 		pf_compare_address(link_src_offset, link_addr_len,
730 		    (uchar_t *)ep);
731 		pf_emit(ENF_OR);
732 		break;
733 	}
734 }
735 
736 /*
737  * Emit code to compare the network part of
738  * an IP address.
739  */
740 static void
741 pf_netaddr_match(which, netname)
742 	enum direction which;
743 	char *netname;
744 {
745 	uint_t addr;
746 	uint_t mask = 0xff000000;
747 	struct netent *np;
748 
749 	if (isdigit(*netname)) {
750 		addr = inet_network(netname);
751 	} else {
752 		np = getnetbyname(netname);
753 		if (np == NULL)
754 			pr_err("net %s not known", netname);
755 		addr = np->n_net;
756 	}
757 
758 	/*
759 	 * Left justify the address and figure
760 	 * out a mask based on the supplied address.
761 	 * Set the mask according to the number of zero
762 	 * low-order bytes.
763 	 * Note: this works only for whole octet masks.
764 	 */
765 	if (addr) {
766 		while ((addr & ~mask) != 0) {
767 			mask |= (mask >> 8);
768 		}
769 	}
770 
771 	pf_check_vlan_tag(ENCAP_ETHERTYPE_OFF/2);
772 
773 	switch (which) {
774 	case TO:
775 		pf_compare_value_mask(IPV4_DSTADDR_OFFSET, 4, addr, mask);
776 		break;
777 	case FROM:
778 		pf_compare_value_mask(IPV4_SRCADDR_OFFSET, 4, addr, mask);
779 		break;
780 	case ANY:
781 		pf_compare_value_mask(IPV4_SRCADDR_OFFSET, 4, addr, mask);
782 		pf_compare_value_mask(IPV4_DSTADDR_OFFSET, 4, addr, mask);
783 		pf_emit(ENF_OR);
784 		break;
785 	}
786 }
787 
788 /*
789  * A helper function to keep the code to emit instructions
790  * to change the offset register in one place.
791  *
792  * INPUTS: offset - An value representing an offset in 16-bit
793  *                  words.
794  * OUTPUTS:  If there is enough room in the storage for the
795  *           packet filtering program, instructions to load
796  *           a constant to the offset register.  Otherwise,
797  *           nothing.
798  */
799 static void
800 pf_emit_load_offset(uint_t offset)
801 {
802 	pf_emit(ENF_LOAD_OFFSET | ENF_NOP);
803 	pf_emit(offset);
804 }
805 
806 /*
807  * Clear pfmod's offset register.
808  *
809  * INPUTS:  none
810  * OUTPUTS:  Instructions to clear the offset register if
811  *           there is enough space remaining in the packet
812  *           filtering program structure's storage, and
813  *           the last thing done to the offset register was
814  *           not clearing the offset register.  Otherwise,
815  *           nothing.
816  */
817 static void
818 pf_clear_offset_register()
819 {
820 	if (last_offset_operation != (void*)pf_clear_offset_register) {
821 		pf_emit_load_offset(0);
822 		last_offset_operation = (void*)pf_clear_offset_register;
823 	}
824 }
825 
826 /*
827  * This function will issue opcodes to check if a packet
828  * is VLAN tagged, and if so, update the offset register
829  * with the appropriate offset.
830  *
831  * Note that if the packet is not VLAN tagged, then the offset
832  * register will be cleared.
833  *
834  * If the interface type is not an ethernet type, then this
835  * function returns without doing anything.
836  *
837  * If the last attempt to change the offset register occured because
838  * of a call to this function that was called with the same offset,
839  * then we don't issue packet filtering instructions.
840  *
841  * INPUTS:  offset - an offset in 16 bit words.  The function
842  *                   will set the offset register to this
843  *                   value if the packet is VLAN tagged.
844  * OUTPUTS:  If the conditions are met, packet filtering instructions.
845  */
846 static void
847 pf_check_vlan_tag(uint_t offset)
848 {
849 	static uint_t last_offset = 0;
850 
851 	if ((interface->mac_type == DL_ETHER ||
852 	    interface->mac_type == DL_CSMACD) &&
853 	    (last_offset_operation != (void*)pf_check_vlan_tag ||
854 	    last_offset != offset)) {
855 		/*
856 		 * First thing is to clear the offset register.
857 		 * We don't know what state it is in, and if it
858 		 * is not zero, then we have no idea what we load
859 		 * when we execute ENF_PUSHWORD.
860 		 */
861 		pf_clear_offset_register();
862 
863 		/*
864 		 * Check the ethertype.
865 		 */
866 		pf_compare_value(link_type_offset, 2, htons(ETHERTYPE_VLAN));
867 
868 		/*
869 		 * And if it's not VLAN, don't load offset to the offset
870 		 * register.
871 		 */
872 		pf_emit(ENF_BRFL | ENF_NOP);
873 		pf_emit(3);
874 
875 		/*
876 		 * Otherwise, load offset to the offset register.
877 		 */
878 		pf_emit_load_offset(offset);
879 
880 		/*
881 		 * Now get rid of the results of the comparison,
882 		 * we don't want the results of the comparison to affect
883 		 * other logic in the packet filtering program.
884 		 */
885 		pf_emit(ENF_POP | ENF_NOP);
886 
887 		/*
888 		 * Set the last operation at the end, or any time
889 		 * after the call to pf_clear_offset because
890 		 * pf_clear_offset uses it.
891 		 */
892 		last_offset_operation = (void*)pf_check_vlan_tag;
893 		last_offset = offset;
894 	}
895 }
896 
897 /*
898  * Utility function used to emit packet filtering code
899  * to match an ethertype.
900  *
901  * INPUTS:  ethertype - The ethertype we want to check for.
902  *                      Don't call htons on the ethertype before
903  *                      calling this function.
904  * OUTPUTS:  If there is sufficient storage available, packet
905  *           filtering code to check an ethertype.  Otherwise,
906  *           nothing.
907  */
908 static void
909 pf_match_ethertype(uint_t ethertype)
910 {
911 	/*
912 	 * If the user wants to filter on ethertype VLAN,
913 	 * then clear the offset register so that the offset
914 	 * for ENF_PUSHWORD points to the right place in the
915 	 * packet.
916 	 *
917 	 * Otherwise, call pf_check_vlan_tag to set the offset
918 	 * register such that the contents of the offset register
919 	 * plus the argument for ENF_PUSHWORD point to the right
920 	 * part of the packet, whether or not the packet is VLAN
921 	 * tagged.  We call pf_check_vlan_tag with an offset of
922 	 * two words because if the packet is VLAN tagged, we have
923 	 * to move past the ethertype in the ethernet header, and
924 	 * past the lower two octets of the VLAN header to get to
925 	 * the ethertype in the VLAN header.
926 	 */
927 	if (ethertype == ETHERTYPE_VLAN)
928 		pf_clear_offset_register();
929 	else
930 		pf_check_vlan_tag(2);
931 
932 	pf_compare_value(link_type_offset, 2, htons(ethertype));
933 }
934 
935 typedef struct {
936 	int	transport_protocol;
937 	int	network_protocol;
938 	/*
939 	 * offset is the offset in bytes from the beginning
940 	 * of the network protocol header to where the transport
941 	 * protocol type is.
942 	 */
943 	int	offset;
944 } transport_protocol_table_t;
945 
946 static transport_protocol_table_t mapping_table[] = {
947 	{IPPROTO_TCP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
948 	{IPPROTO_TCP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
949 	{IPPROTO_UDP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
950 	{IPPROTO_UDP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
951 	{IPPROTO_OSPF, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
952 	{IPPROTO_OSPF, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
953 	{IPPROTO_SCTP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
954 	{IPPROTO_SCTP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
955 	{IPPROTO_ICMP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
956 	{IPPROTO_ICMPV6, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
957 	{IPPROTO_ENCAP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
958 	{IPPROTO_ESP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
959 	{IPPROTO_ESP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
960 	{IPPROTO_AH, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
961 	{IPPROTO_AH, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
962 	{-1, 0, 0}	/* must be the final entry */
963 };
964 
965 /*
966  * This function uses the table above to generate a
967  * piece of a packet filtering program to check a transport
968  * protocol type.
969  *
970  * INPUTS:  tranport_protocol - the transport protocol we're
971  *                              interested in.
972  * OUTPUTS:  If there is sufficient storage, then packet filtering
973  *           code to check a transport protocol type.  Otherwise,
974  *           nothing.
975  */
976 static void
977 pf_check_transport_protocol(uint_t transport_protocol)
978 {
979 	int i = 0;
980 	uint_t number_of_matches = 0;
981 
982 	for (i = 0; mapping_table[i].transport_protocol != -1; i++) {
983 		if (transport_protocol ==
984 		    (uint_t)mapping_table[i].transport_protocol) {
985 			number_of_matches++;
986 			pf_match_ethertype(mapping_table[i].network_protocol);
987 			pf_check_vlan_tag(ENCAP_ETHERTYPE_OFF/2);
988 			pf_compare_value(
989 			    mapping_table[i].offset + link_header_len, 1,
990 			    transport_protocol);
991 			pf_emit(ENF_AND);
992 			if (number_of_matches > 1) {
993 				/*
994 				 * Since we have two or more matches, in
995 				 * order to have a correct and complete
996 				 * program we need to OR the result of
997 				 * each block of comparisons together.
998 				 */
999 				pf_emit(ENF_OR);
1000 			}
1001 		}
1002 	}
1003 }
1004 
1005 static void
1006 pf_primary()
1007 {
1008 	for (;;) {
1009 		if (tokentype == FIELD)
1010 			break;
1011 
1012 		if (EQ("ip")) {
1013 			pf_match_ethertype(ETHERTYPE_IP);
1014 			opstack++;
1015 			next();
1016 			break;
1017 		}
1018 
1019 		if (EQ("ip6")) {
1020 			pf_match_ethertype(ETHERTYPE_IPV6);
1021 			opstack++;
1022 			next();
1023 			break;
1024 		}
1025 
1026 		if (EQ("pppoe")) {
1027 			pf_match_ethertype(ETHERTYPE_PPPOED);
1028 			pf_match_ethertype(ETHERTYPE_PPPOES);
1029 			pf_emit(ENF_OR);
1030 			opstack++;
1031 			next();
1032 			break;
1033 		}
1034 
1035 		if (EQ("pppoed")) {
1036 			pf_match_ethertype(ETHERTYPE_PPPOED);
1037 			opstack++;
1038 			next();
1039 			break;
1040 		}
1041 
1042 		if (EQ("pppoes")) {
1043 			pf_match_ethertype(ETHERTYPE_PPPOES);
1044 			opstack++;
1045 			next();
1046 			break;
1047 		}
1048 
1049 		if (EQ("arp")) {
1050 			pf_match_ethertype(ETHERTYPE_ARP);
1051 			opstack++;
1052 			next();
1053 			break;
1054 		}
1055 
1056 		if (EQ("vlan")) {
1057 			pf_match_ethertype(ETHERTYPE_VLAN);
1058 			pf_compare_value_mask_neq(VLAN_ID_OFFSET, 2,
1059 			    0, VLAN_ID_MASK);
1060 			pf_emit(ENF_AND);
1061 			opstack++;
1062 			next();
1063 			break;
1064 		}
1065 
1066 		if (EQ("vlan-id")) {
1067 			next();
1068 			if (tokentype != NUMBER)
1069 				pr_err("VLAN ID expected");
1070 			pf_match_ethertype(ETHERTYPE_VLAN);
1071 			pf_compare_value_mask(VLAN_ID_OFFSET, 2, tokenval,
1072 			    VLAN_ID_MASK);
1073 			pf_emit(ENF_AND);
1074 			opstack++;
1075 			next();
1076 			break;
1077 		}
1078 
1079 		if (EQ("rarp")) {
1080 			pf_match_ethertype(ETHERTYPE_REVARP);
1081 			opstack++;
1082 			next();
1083 			break;
1084 		}
1085 
1086 		if (EQ("tcp")) {
1087 			pf_check_transport_protocol(IPPROTO_TCP);
1088 			opstack++;
1089 			next();
1090 			break;
1091 		}
1092 
1093 		if (EQ("udp")) {
1094 			pf_check_transport_protocol(IPPROTO_UDP);
1095 			opstack++;
1096 			next();
1097 			break;
1098 		}
1099 
1100 		if (EQ("ospf")) {
1101 			pf_check_transport_protocol(IPPROTO_OSPF);
1102 			opstack++;
1103 			next();
1104 			break;
1105 		}
1106 
1107 
1108 		if (EQ("sctp")) {
1109 			pf_check_transport_protocol(IPPROTO_SCTP);
1110 			opstack++;
1111 			next();
1112 			break;
1113 		}
1114 
1115 		if (EQ("icmp")) {
1116 			pf_check_transport_protocol(IPPROTO_ICMP);
1117 			opstack++;
1118 			next();
1119 			break;
1120 		}
1121 
1122 		if (EQ("icmp6")) {
1123 			pf_check_transport_protocol(IPPROTO_ICMPV6);
1124 			opstack++;
1125 			next();
1126 			break;
1127 		}
1128 
1129 		if (EQ("ip-in-ip")) {
1130 			pf_check_transport_protocol(IPPROTO_ENCAP);
1131 			opstack++;
1132 			next();
1133 			break;
1134 		}
1135 
1136 		if (EQ("esp")) {
1137 			pf_check_transport_protocol(IPPROTO_ESP);
1138 			opstack++;
1139 			next();
1140 			break;
1141 		}
1142 
1143 		if (EQ("ah")) {
1144 			pf_check_transport_protocol(IPPROTO_AH);
1145 			opstack++;
1146 			next();
1147 			break;
1148 		}
1149 
1150 		if (EQ("(")) {
1151 			inBrace++;
1152 			next();
1153 			pf_expression();
1154 			if (EQ(")")) {
1155 				if (inBrace)
1156 					inBraceOR--;
1157 				inBrace--;
1158 				next();
1159 			}
1160 			break;
1161 		}
1162 
1163 		if (EQ("to") || EQ("dst")) {
1164 			dir = TO;
1165 			next();
1166 			continue;
1167 		}
1168 
1169 		if (EQ("from") || EQ("src")) {
1170 			dir = FROM;
1171 			next();
1172 			continue;
1173 		}
1174 
1175 		if (EQ("ether")) {
1176 			eaddr = 1;
1177 			next();
1178 			continue;
1179 		}
1180 
1181 		if (EQ("inet")) {
1182 			next();
1183 			if (EQ("host"))
1184 				next();
1185 			if (tokentype != ALPHA && tokentype != ADDR_IP)
1186 				pr_err("host/IPv4 addr expected after inet");
1187 			pf_ipaddr_match(dir, token, IPV4_ONLY);
1188 			opstack++;
1189 			next();
1190 			break;
1191 		}
1192 
1193 		if (EQ("inet6")) {
1194 			next();
1195 			if (EQ("host"))
1196 				next();
1197 			if (tokentype != ALPHA && tokentype != ADDR_IP6)
1198 				pr_err("host/IPv6 addr expected after inet6");
1199 			pf_ipaddr_match(dir, token, IPV6_ONLY);
1200 			opstack++;
1201 			next();
1202 			break;
1203 		}
1204 
1205 		if (EQ("proto")) {
1206 			next();
1207 			if (tokentype != NUMBER)
1208 				pr_err("IP proto type expected");
1209 			pf_check_vlan_tag(ENCAP_ETHERTYPE_OFF/2);
1210 			pf_compare_value(
1211 			    IPV4_TYPE_HEADER_OFFSET + link_header_len, 1,
1212 			    tokenval);
1213 			opstack++;
1214 			next();
1215 			break;
1216 		}
1217 
1218 		if (EQ("broadcast")) {
1219 			pf_clear_offset_register();
1220 			pf_compare_value(link_dest_offset, 4, 0xffffffff);
1221 			opstack++;
1222 			next();
1223 			break;
1224 		}
1225 
1226 		if (EQ("multicast")) {
1227 			pf_clear_offset_register();
1228 			pf_compare_value_mask(link_dest_offset, 1, 0x01, 0x01);
1229 			opstack++;
1230 			next();
1231 			break;
1232 		}
1233 
1234 		if (EQ("ethertype")) {
1235 			next();
1236 			if (tokentype != NUMBER)
1237 				pr_err("ether type expected");
1238 			pf_match_ethertype(tokenval);
1239 			opstack++;
1240 			next();
1241 			break;
1242 		}
1243 
1244 		if (EQ("net") || EQ("dstnet") || EQ("srcnet")) {
1245 			if (EQ("dstnet"))
1246 				dir = TO;
1247 			else if (EQ("srcnet"))
1248 				dir = FROM;
1249 			next();
1250 			pf_netaddr_match(dir, token);
1251 			dir = ANY;
1252 			opstack++;
1253 			next();
1254 			break;
1255 		}
1256 
1257 		/*
1258 		 * Give up on anything that's obviously
1259 		 * not a primary.
1260 		 */
1261 		if (EQ("and") || EQ("or") ||
1262 		    EQ("not") || EQ("decnet") || EQ("apple") ||
1263 		    EQ("length") || EQ("less") || EQ("greater") ||
1264 		    EQ("port") || EQ("srcport") || EQ("dstport") ||
1265 		    EQ("rpc") || EQ("gateway") || EQ("nofrag") ||
1266 		    EQ("bootp") || EQ("dhcp") || EQ("dhcp6") ||
1267 		    EQ("slp") || EQ("ldap")) {
1268 			break;
1269 		}
1270 
1271 		if (EQ("host") || EQ("between") ||
1272 		    tokentype == ALPHA || /* assume its a hostname */
1273 		    tokentype == ADDR_IP ||
1274 		    tokentype == ADDR_IP6 ||
1275 		    tokentype == ADDR_ETHER) {
1276 			if (EQ("host") || EQ("between"))
1277 				next();
1278 			if (eaddr || tokentype == ADDR_ETHER) {
1279 				pf_etheraddr_match(dir, token);
1280 			} else if (tokentype == ALPHA) {
1281 				pf_ipaddr_match(dir, token, IPV4_AND_IPV6);
1282 			} else if (tokentype == ADDR_IP) {
1283 				pf_ipaddr_match(dir, token, IPV4_ONLY);
1284 			} else {
1285 				pf_ipaddr_match(dir, token, IPV6_ONLY);
1286 			}
1287 			dir = ANY;
1288 			eaddr = 0;
1289 			opstack++;
1290 			next();
1291 			break;
1292 		}
1293 
1294 		break;	/* unknown token */
1295 	}
1296 }
1297 
1298 static void
1299 pf_alternation()
1300 {
1301 	int s = opstack;
1302 
1303 	pf_primary();
1304 	for (;;) {
1305 		if (EQ("and"))
1306 			next();
1307 		pf_primary();
1308 		if (opstack != s + 2)
1309 			break;
1310 		pf_emit(ENF_AND);
1311 		opstack--;
1312 	}
1313 }
1314 
1315 static void
1316 pf_expression()
1317 {
1318 	pf_alternation();
1319 	while (EQ("or") || EQ(",")) {
1320 		if (inBrace)
1321 			inBraceOR++;
1322 		else
1323 			foundOR++;
1324 		next();
1325 		pf_alternation();
1326 		pf_emit(ENF_OR);
1327 		opstack--;
1328 	}
1329 }
1330 
1331 /*
1332  * Attempt to compile the expression
1333  * in the string "e".  If we can generate
1334  * pf code for it then return 1 - otherwise
1335  * return 0 and leave it up to the user-level
1336  * filter.
1337  */
1338 int
1339 pf_compile(e, print)
1340 	char *e;
1341 	int print;
1342 {
1343 	char *argstr;
1344 	char *sav_str, *ptr, *sav_ptr;
1345 	int inBr = 0, aheadOR = 0;
1346 
1347 	argstr = strdup(e);
1348 	sav_str = e;
1349 	tkp = argstr;
1350 	dir = ANY;
1351 
1352 	pfp = &pf.Pf_Filter[0];
1353 	if (setjmp(env)) {
1354 		return (0);
1355 	}
1356 
1357 	/*
1358 	 * Set media specific packet offsets that this code uses.
1359 	 */
1360 	if (interface->mac_type == DL_IB) {
1361 		link_header_len = 4;
1362 		link_type_offset = 0;
1363 		link_dest_offset = link_src_offset = -1;
1364 		link_addr_len = 20;
1365 	}
1366 
1367 	next();
1368 	pf_expression();
1369 
1370 	if (tokentype != EOL) {
1371 		/*
1372 		 * The idea here is to do as much filtering as possible in
1373 		 * the kernel. So even if we find a token we don't understand,
1374 		 * we try to see if we can still set up a portion of the filter
1375 		 * in the kernel and use the userland filter to filter the
1376 		 * remaining stuff. Obviously, if our filter expression is of
1377 		 * type A AND B, we can filter A in kernel and then apply B
1378 		 * to the packets that got through. The same is not true for
1379 		 * a filter of type A OR B. We can't apply A first and then B
1380 		 * on the packets filtered through A.
1381 		 *
1382 		 * (We need to keep track of the fact when we find an OR,
1383 		 * and the fact that we are inside brackets when we find OR.
1384 		 * The variable 'foundOR' tells us if there was an OR behind,
1385 		 * 'inBraceOR' tells us if we found an OR before we could find
1386 		 * the end brace i.e. ')', and variable 'aheadOR' checks if
1387 		 * there is an OR in the expression ahead. if either of these
1388 		 * cases become true, we can't split the filtering)
1389 		 */
1390 
1391 		if (foundOR || inBraceOR) {
1392 			/* FORGET IN KERNEL FILTERING */
1393 			return (0);
1394 		} else {
1395 
1396 			/* CHECK IF NO OR AHEAD */
1397 			sav_ptr = (char *)((uintptr_t)sav_str +
1398 						(uintptr_t)sav_tkp -
1399 						(uintptr_t)argstr);
1400 			ptr = sav_ptr;
1401 			while (*ptr != '\0') {
1402 				switch (*ptr) {
1403 				case '(':
1404 					inBr++;
1405 					break;
1406 				case ')':
1407 					inBr--;
1408 					break;
1409 				case 'o':
1410 				case 'O':
1411 					if ((*(ptr + 1) == 'R' ||
1412 						*(ptr + 1) == 'r') && !inBr)
1413 						aheadOR = 1;
1414 					break;
1415 				case ',':
1416 					if (!inBr)
1417 						aheadOR = 1;
1418 					break;
1419 				}
1420 				ptr++;
1421 			}
1422 			if (!aheadOR) {
1423 				/* NO OR AHEAD, SPLIT UP THE FILTERING */
1424 				pf.Pf_FilterLen = pfp - &pf.Pf_Filter[0];
1425 				pf.Pf_Priority = 5;
1426 				if (print) {
1427 					pf_codeprint(&pf.Pf_Filter[0],
1428 							pf.Pf_FilterLen);
1429 				}
1430 				compile(sav_ptr, print);
1431 				return (2);
1432 			} else
1433 				return (0);
1434 		}
1435 	}
1436 
1437 	pf.Pf_FilterLen = pfp - &pf.Pf_Filter[0];
1438 	pf.Pf_Priority = 5;	/* unimportant, so long as > 2 */
1439 	if (print) {
1440 		pf_codeprint(&pf.Pf_Filter[0], pf.Pf_FilterLen);
1441 	}
1442 	return (1);
1443 }
1444