xref: /illumos-gate/usr/src/uts/common/io/pfmod.c (revision 2983dda76a6d296fdb560c88114fe41caad1b84f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * STREAMS Packet Filter Module
30  *
31  * This module applies a filter to messages arriving on its read
32  * queue, passing on messages that the filter accepts adn discarding
33  * the others.  It supports ioctls for setting the filter.
34  *
35  * On the write side, the module simply passes everything through
36  * unchanged.
37  *
38  * Based on SunOS 4.x version.  This version has minor changes:
39  *	- general SVR4 porting stuff
40  * 	- change name and prefixes from "nit" buffer to streams buffer
41  *	- multithreading assumes configured as D_MTQPAIR
42  */
43 
44 #include <sys/types.h>
45 #include <sys/sysmacros.h>
46 #include <sys/errno.h>
47 #include <sys/debug.h>
48 #include <sys/time.h>
49 #include <sys/stropts.h>
50 #include <sys/stream.h>
51 #include <sys/conf.h>
52 #include <sys/ddi.h>
53 #include <sys/sunddi.h>
54 #include <sys/kmem.h>
55 #include <sys/strsun.h>
56 #include <sys/pfmod.h>
57 #include <sys/modctl.h>
58 
59 /*
60  * Expanded version of the Packetfilt structure that includes
61  * some additional fields that aid filter execution efficiency.
62  */
63 struct epacketfilt {
64 	struct Pf_ext_packetfilt	pf;
65 #define	pf_Priority	pf.Pf_Priority
66 #define	pf_FilterLen	pf.Pf_FilterLen
67 #define	pf_Filter	pf.Pf_Filter
68 	/* pointer to word immediately past end of filter */
69 	ushort_t		*pf_FilterEnd;
70 	/* length in bytes of packet prefix the filter examines */
71 	ushort_t		pf_PByteLen;
72 };
73 
74 /*
75  * (Internal) packet descriptor for FilterPacket
76  */
77 struct packdesc {
78 	ushort_t	*pd_hdr;	/* header starting address */
79 	uint_t		pd_hdrlen;	/* header length in shorts */
80 	ushort_t	*pd_body;	/* body starting address */
81 	uint_t		pd_bodylen;	/* body length in shorts */
82 };
83 
84 
85 /*
86  * Function prototypes.
87  */
88 static	int	pfopen(queue_t *, dev_t *, int, int, cred_t *);
89 static	int	pfclose(queue_t *);
90 static void	pfioctl(queue_t *wq, mblk_t *mp);
91 static	int	FilterPacket(struct packdesc *, struct epacketfilt *);
92 /*
93  * To save instructions, since STREAMS ignores the return value
94  * from these functions, they are defined as void here. Kind of icky, but...
95  */
96 static void	pfwput(queue_t *, mblk_t *);
97 static void	pfrput(queue_t *, mblk_t *);
98 
99 static struct module_info pf_minfo = {
100 	22,		/* mi_idnum */
101 	"pfmod",	/* mi_idname */
102 	0,		/* mi_minpsz */
103 	INFPSZ,		/* mi_maxpsz */
104 	0,		/* mi_hiwat */
105 	0		/* mi_lowat */
106 };
107 
108 static struct qinit pf_rinit = {
109 	(int (*)())pfrput,	/* qi_putp */
110 	NULL,
111 	pfopen,			/* qi_qopen */
112 	pfclose,		/* qi_qclose */
113 	NULL,			/* qi_qadmin */
114 	&pf_minfo,		/* qi_minfo */
115 	NULL			/* qi_mstat */
116 };
117 
118 static struct qinit pf_winit = {
119 	(int (*)())pfwput,	/* qi_putp */
120 	NULL,			/* qi_srvp */
121 	NULL,			/* qi_qopen */
122 	NULL,			/* qi_qclose */
123 	NULL,			/* qi_qadmin */
124 	&pf_minfo,		/* qi_minfo */
125 	NULL			/* qi_mstat */
126 };
127 
128 static struct streamtab pf_info = {
129 	&pf_rinit,	/* st_rdinit */
130 	&pf_winit,	/* st_wrinit */
131 	NULL,		/* st_muxrinit */
132 	NULL		/* st_muxwinit */
133 };
134 
135 static struct fmodsw fsw = {
136 	"pfmod",
137 	&pf_info,
138 	D_MTQPAIR | D_MP
139 };
140 
141 static struct modlstrmod modlstrmod = {
142 	&mod_strmodops, "streams packet filter module", &fsw
143 };
144 
145 static struct modlinkage modlinkage = {
146 	MODREV_1, &modlstrmod, NULL
147 };
148 
149 int
150 _init(void)
151 {
152 	return (mod_install(&modlinkage));
153 }
154 
155 int
156 _fini(void)
157 {
158 	return (mod_remove(&modlinkage));
159 }
160 
161 int
162 _info(struct modinfo *modinfop)
163 {
164 	return (mod_info(&modlinkage, modinfop));
165 }
166 
167 /*ARGSUSED*/
168 static int
169 pfopen(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
170 {
171 	struct epacketfilt	*pfp;
172 
173 	ASSERT(rq);
174 
175 	if (sflag != MODOPEN)
176 		return (EINVAL);
177 
178 	if (rq->q_ptr)
179 		return (0);
180 
181 	/*
182 	 * Allocate and initialize per-Stream structure.
183 	 */
184 	pfp = kmem_alloc(sizeof (struct epacketfilt), KM_SLEEP);
185 	rq->q_ptr = WR(rq)->q_ptr = (char *)pfp;
186 
187 	qprocson(rq);
188 
189 	return (0);
190 }
191 
192 static int
193 pfclose(queue_t	*rq)
194 {
195 	struct	epacketfilt	*pfp = (struct epacketfilt *)rq->q_ptr;
196 
197 	ASSERT(pfp);
198 
199 	qprocsoff(rq);
200 
201 	kmem_free(pfp, sizeof (struct epacketfilt));
202 	rq->q_ptr = WR(rq)->q_ptr = NULL;
203 
204 	return (0);
205 }
206 
207 /*
208  * Write-side put procedure.  Its main task is to detect ioctls.
209  * Other message types are passed on through.
210  */
211 static void
212 pfwput(queue_t *wq, mblk_t *mp)
213 {
214 	switch (mp->b_datap->db_type) {
215 	case M_IOCTL:
216 		pfioctl(wq, mp);
217 		break;
218 
219 	default:
220 		putnext(wq, mp);
221 		break;
222 	}
223 }
224 
225 /*
226  * Read-side put procedure.  It's responsible for applying the
227  * packet filter and passing upstream message on or discarding it
228  * depending upon the results.
229  *
230  * Upstream messages can start with zero or more M_PROTO mblks
231  * which are skipped over before executing the packet filter
232  * on any remaining M_DATA mblks.
233  */
234 static void
235 pfrput(queue_t *rq, mblk_t *mp)
236 {
237 	struct	epacketfilt	*pfp = (struct epacketfilt *)rq->q_ptr;
238 	mblk_t	*mbp, *mpp;
239 	struct	packdesc	pd;
240 	int	need;
241 
242 	ASSERT(pfp);
243 
244 	switch (DB_TYPE(mp)) {
245 	case M_PROTO:
246 	case M_DATA:
247 		/*
248 		 * Skip over protocol information and find the start
249 		 * of the message body, saving the overall message
250 		 * start in mpp.
251 		 */
252 		for (mpp = mp; mp && (DB_TYPE(mp) == M_PROTO); mp = mp->b_cont)
253 			;
254 
255 		/*
256 		 * Null body (exclusive of M_PROTO blocks) ==> accept.
257 		 * Note that a null body is not the same as an empty body.
258 		 */
259 		if (mp == NULL) {
260 			putnext(rq, mpp);
261 			break;
262 		}
263 
264 		/*
265 		 * Pull the packet up to the length required by
266 		 * the filter.  Note that doing so destroys sharing
267 		 * relationships, which is unfortunate, since the
268 		 * results of pulling up here are likely to be useful
269 		 * for shared messages applied to a filter on a sibling
270 		 * stream.
271 		 *
272 		 * Most packet sources will provide the packet in two
273 		 * logical pieces: an initial header in a single mblk,
274 		 * and a body in a sequence of mblks hooked to the
275 		 * header.  We're prepared to deal with variant forms,
276 		 * but in any case, the pullup applies only to the body
277 		 * part.
278 		 */
279 		mbp = mp->b_cont;
280 		need = pfp->pf_PByteLen;
281 		if (mbp && (MBLKL(mbp) < need)) {
282 			int len = msgdsize(mbp);
283 
284 			/* XXX discard silently on pullupmsg failure */
285 			if (pullupmsg(mbp, MIN(need, len)) == 0) {
286 				freemsg(mpp);
287 				break;
288 			}
289 		}
290 
291 		/*
292 		 * Misalignment (not on short boundary) ==> reject.
293 		 */
294 		if (((uintptr_t)mp->b_rptr & (sizeof (ushort_t) - 1)) ||
295 		    (mbp != NULL &&
296 		    ((uintptr_t)mbp->b_rptr & (sizeof (ushort_t) - 1)))) {
297 			freemsg(mpp);
298 			break;
299 		}
300 
301 		/*
302 		 * These assignments are distasteful, but necessary,
303 		 * since the packet filter wants to work in terms of
304 		 * shorts.  Odd bytes at the end of header or data can't
305 		 * participate in the filtering operation.
306 		 */
307 		pd.pd_hdr = (ushort_t *)mp->b_rptr;
308 		pd.pd_hdrlen = (mp->b_wptr - mp->b_rptr) / sizeof (ushort_t);
309 		if (mbp) {
310 			pd.pd_body = (ushort_t *)mbp->b_rptr;
311 			pd.pd_bodylen = (mbp->b_wptr - mbp->b_rptr) /
312 							sizeof (ushort_t);
313 		} else {
314 			pd.pd_body = NULL;
315 			pd.pd_bodylen = 0;
316 		}
317 
318 		/*
319 		 * Apply the filter.
320 		 */
321 		if (FilterPacket(&pd, pfp))
322 			putnext(rq, mpp);
323 		else
324 			freemsg(mpp);
325 
326 		break;
327 
328 	default:
329 		putnext(rq, mp);
330 		break;
331 	}
332 
333 }
334 
335 /*
336  * Handle write-side M_IOCTL messages.
337  */
338 static void
339 pfioctl(queue_t *wq, mblk_t *mp)
340 {
341 	struct	epacketfilt	*pfp = (struct epacketfilt *)wq->q_ptr;
342 	struct	Pf_ext_packetfilt	*upfp;
343 	struct	packetfilt	*opfp;
344 	ushort_t	*fwp;
345 	int	arg;
346 	int	maxoff = 0;
347 	int	maxoffreg = 0;
348 	struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
349 	int	error;
350 
351 	switch (iocp->ioc_cmd) {
352 	case PFIOCSETF:
353 		/*
354 		 * Verify argument length. Since the size of packet filter
355 		 * got increased (ENMAXFILTERS was bumped up to 2047), to
356 		 * maintain backwards binary compatibility, we need to
357 		 * check for both possible sizes.
358 		 */
359 		switch (iocp->ioc_count) {
360 		case sizeof (struct Pf_ext_packetfilt):
361 			error = miocpullup(mp,
362 			    sizeof (struct Pf_ext_packetfilt));
363 			if (error != 0) {
364 				miocnak(wq, mp, 0, error);
365 				return;
366 			}
367 			upfp = (struct Pf_ext_packetfilt *)mp->b_cont->b_rptr;
368 			if (upfp->Pf_FilterLen > PF_MAXFILTERS) {
369 				miocnak(wq, mp, 0, EINVAL);
370 				return;
371 			}
372 
373 			bcopy(upfp, pfp, sizeof (struct Pf_ext_packetfilt));
374 			pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
375 			break;
376 
377 		case sizeof (struct packetfilt):
378 			error = miocpullup(mp, sizeof (struct packetfilt));
379 			if (error != 0) {
380 				miocnak(wq, mp, 0, error);
381 				return;
382 			}
383 			opfp = (struct packetfilt *)mp->b_cont->b_rptr;
384 			/* this strange comparison keeps gcc from complaining */
385 			if (opfp->Pf_FilterLen - 1 >= ENMAXFILTERS) {
386 				miocnak(wq, mp, 0, EINVAL);
387 				return;
388 			}
389 
390 			pfp->pf.Pf_Priority = opfp->Pf_Priority;
391 			pfp->pf.Pf_FilterLen = (unsigned int)opfp->Pf_FilterLen;
392 
393 			bcopy(opfp->Pf_Filter, pfp->pf.Pf_Filter,
394 			    sizeof (opfp->Pf_Filter));
395 			pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
396 			break;
397 
398 		default:
399 			miocnak(wq, mp, 0, EINVAL);
400 			return;
401 		}
402 
403 		/*
404 		 * Find and record maximum byte offset that the
405 		 * filter users.  We use this when executing the
406 		 * filter to determine how much of the packet
407 		 * body to pull up.  This code depends on the
408 		 * filter encoding.
409 		 */
410 		for (fwp = pfp->pf_Filter; fwp < pfp->pf_FilterEnd; fwp++) {
411 			arg = *fwp & ((1 << ENF_NBPA) - 1);
412 			switch (arg) {
413 			default:
414 				if ((arg -= ENF_PUSHWORD) > maxoff)
415 					maxoff = arg;
416 				break;
417 
418 			case ENF_LOAD_OFFSET:
419 				/* Point to the offset */
420 				fwp++;
421 				if (*fwp > maxoffreg)
422 					maxoffreg = *fwp;
423 				break;
424 
425 			case ENF_PUSHLIT:
426 			case ENF_BRTR:
427 			case ENF_BRFL:
428 				/* Skip over the literal. */
429 				fwp++;
430 				break;
431 
432 			case ENF_PUSHZERO:
433 			case ENF_PUSHONE:
434 			case ENF_PUSHFFFF:
435 			case ENF_PUSHFF00:
436 			case ENF_PUSH00FF:
437 			case ENF_NOPUSH:
438 			case ENF_POP:
439 				break;
440 			}
441 		}
442 
443 		/*
444 		 * Convert word offset to length in bytes.
445 		 */
446 		pfp->pf_PByteLen = (maxoff + maxoffreg + 1) * sizeof (ushort_t);
447 		miocack(wq, mp, 0, 0);
448 		break;
449 
450 	default:
451 		putnext(wq, mp);
452 		break;
453 	}
454 }
455 
456 /* #define	DEBUG	1 */
457 /* #define	INNERDEBUG	1 */
458 
459 #ifdef	INNERDEBUG
460 #define	enprintf(a)	printf a
461 #else
462 #define	enprintf(a)
463 #endif
464 
465 /*
466  * Apply the packet filter given by pfp to the packet given by
467  * pp.  Return nonzero iff the filter accepts the packet.
468  *
469  * The packet comes in two pieces, a header and a body, since
470  * that's the most convenient form for our caller.  The header
471  * is in contiguous memory, whereas the body is in a mbuf.
472  * Our caller will have adjusted the mbuf chain so that its first
473  * min(MLEN, length(body)) bytes are guaranteed contiguous.  For
474  * the sake of efficiency (and some laziness) the filter is prepared
475  * to examine only these two contiguous pieces.  Furthermore, it
476  * assumes that the header length is even, so that there's no need
477  * to glue the last byte of header to the first byte of data.
478  */
479 
480 #define	opx(i)	((i) >> ENF_NBPA)
481 
482 static int
483 FilterPacket(struct packdesc *pp, struct epacketfilt *pfp)
484 {
485 	int		maxhdr = pp->pd_hdrlen;
486 	int		maxword = maxhdr + pp->pd_bodylen;
487 	ushort_t	*sp;
488 	ushort_t	*fp;
489 	ushort_t	*fpe;
490 	unsigned	op;
491 	unsigned	arg;
492 	unsigned	offreg = 0;
493 	ushort_t	stack[ENMAXFILTERS+1];
494 
495 	fp = &pfp->pf_Filter[0];
496 	fpe = pfp->pf_FilterEnd;
497 
498 	enprintf(("FilterPacket(%p, %p, %p, %p):\n", pp, pfp, fp, fpe));
499 
500 	/*
501 	 * Push TRUE on stack to start.  The stack size is chosen such
502 	 * that overflow can't occur -- each operation can push at most
503 	 * one item on the stack, and the stack size equals the maximum
504 	 * program length.
505 	 */
506 	sp = &stack[ENMAXFILTERS];
507 	*sp = 1;
508 
509 	while (fp < fpe) {
510 	op = *fp >> ENF_NBPA;
511 	arg = *fp & ((1 << ENF_NBPA) - 1);
512 	fp++;
513 
514 	switch (arg) {
515 	default:
516 		arg -= ENF_PUSHWORD;
517 		/*
518 		 * Since arg is unsigned,
519 		 * if it were less than ENF_PUSHWORD before,
520 		 * it would now be huge.
521 		 */
522 		if (arg + offreg < maxhdr)
523 			*--sp = pp->pd_hdr[arg + offreg];
524 		else if (arg + offreg < maxword)
525 			*--sp = pp->pd_body[arg - maxhdr + offreg];
526 		else {
527 			enprintf(("=>0(len)\n"));
528 			return (0);
529 		}
530 		break;
531 	case ENF_PUSHLIT:
532 		*--sp = *fp++;
533 		break;
534 	case ENF_PUSHZERO:
535 		*--sp = 0;
536 		break;
537 	case ENF_PUSHONE:
538 		*--sp = 1;
539 		break;
540 	case ENF_PUSHFFFF:
541 		*--sp = 0xffff;
542 		break;
543 	case ENF_PUSHFF00:
544 		*--sp = 0xff00;
545 		break;
546 	case ENF_PUSH00FF:
547 		*--sp = 0x00ff;
548 		break;
549 	case ENF_LOAD_OFFSET:
550 		offreg = *fp++;
551 		break;
552 	case ENF_BRTR:
553 		if (*sp != 0)
554 			fp += *fp;
555 		else
556 			fp++;
557 		if (fp >= fpe) {
558 			enprintf(("BRTR: fp>=fpe\n"));
559 			return (0);
560 		}
561 		break;
562 	case ENF_BRFL:
563 		if (*sp == 0)
564 			fp += *fp;
565 		else
566 			fp++;
567 		if (fp >= fpe) {
568 			enprintf(("BRFL: fp>=fpe\n"));
569 			return (0);
570 		}
571 		break;
572 	case ENF_POP:
573 		++sp;
574 		if (sp > &stack[ENMAXFILTERS]) {
575 			enprintf(("stack underflow\n"));
576 			return (0);
577 		}
578 		break;
579 	case ENF_NOPUSH:
580 		break;
581 	}
582 
583 	if (sp < &stack[2]) {	/* check stack overflow: small yellow zone */
584 		enprintf(("=>0(--sp)\n"));
585 		return (0);
586 	}
587 
588 	if (op == ENF_NOP)
589 		continue;
590 
591 	/*
592 	 * all non-NOP operators binary, must have at least two operands
593 	 * on stack to evaluate.
594 	 */
595 	if (sp > &stack[ENMAXFILTERS-2]) {
596 		enprintf(("=>0(sp++)\n"));
597 		return (0);
598 	}
599 
600 	arg = *sp++;
601 	switch (op) {
602 	default:
603 		enprintf(("=>0(def)\n"));
604 		return (0);
605 	case opx(ENF_AND):
606 		*sp &= arg;
607 		break;
608 	case opx(ENF_OR):
609 		*sp |= arg;
610 		break;
611 	case opx(ENF_XOR):
612 		*sp ^= arg;
613 		break;
614 	case opx(ENF_EQ):
615 		*sp = (*sp == arg);
616 		break;
617 	case opx(ENF_NEQ):
618 		*sp = (*sp != arg);
619 		break;
620 	case opx(ENF_LT):
621 		*sp = (*sp < arg);
622 		break;
623 	case opx(ENF_LE):
624 		*sp = (*sp <= arg);
625 		break;
626 	case opx(ENF_GT):
627 		*sp = (*sp > arg);
628 		break;
629 	case opx(ENF_GE):
630 		*sp = (*sp >= arg);
631 		break;
632 
633 	/* short-circuit operators */
634 
635 	case opx(ENF_COR):
636 		if (*sp++ == arg) {
637 			enprintf(("=>COR %x\n", *sp));
638 			return (1);
639 		}
640 		break;
641 	case opx(ENF_CAND):
642 		if (*sp++ != arg) {
643 			enprintf(("=>CAND %x\n", *sp));
644 			return (0);
645 		}
646 		break;
647 	case opx(ENF_CNOR):
648 		if (*sp++ == arg) {
649 			enprintf(("=>COR %x\n", *sp));
650 			return (0);
651 		}
652 		break;
653 	case opx(ENF_CNAND):
654 		if (*sp++ != arg) {
655 			enprintf(("=>CNAND %x\n", *sp));
656 			return (1);
657 		}
658 		break;
659 	}
660 	}
661 	enprintf(("=>%x\n", *sp));
662 	return (*sp);
663 }
664