xref: /illumos-gate/usr/src/uts/common/io/pfmod.c (revision 3a18338393f3485e50eae6288b6a9ab89e9f715a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * STREAMS Packet Filter Module
28  *
29  * This module applies a filter to messages arriving on its read
30  * queue, passing on messages that the filter accepts adn discarding
31  * the others.  It supports ioctls for setting the filter.
32  *
33  * On the write side, the module simply passes everything through
34  * unchanged.
35  *
36  * Based on SunOS 4.x version.  This version has minor changes:
37  *	- general SVR4 porting stuff
38  *	- change name and prefixes from "nit" buffer to streams buffer
39  *	- multithreading assumes configured as D_MTQPAIR
40  */
41 
42 #include <sys/types.h>
43 #include <sys/sysmacros.h>
44 #include <sys/errno.h>
45 #include <sys/debug.h>
46 #include <sys/time.h>
47 #include <sys/stropts.h>
48 #include <sys/stream.h>
49 #include <sys/conf.h>
50 #include <sys/ddi.h>
51 #include <sys/sunddi.h>
52 #include <sys/kmem.h>
53 #include <sys/strsun.h>
54 #include <sys/pfmod.h>
55 #include <sys/modctl.h>
56 
57 /*
58  * Expanded version of the Packetfilt structure that includes
59  * some additional fields that aid filter execution efficiency.
60  */
61 struct epacketfilt {
62 	struct Pf_ext_packetfilt	pf;
63 #define	pf_Priority	pf.Pf_Priority
64 #define	pf_FilterLen	pf.Pf_FilterLen
65 #define	pf_Filter	pf.Pf_Filter
66 	/* pointer to word immediately past end of filter */
67 	ushort_t		*pf_FilterEnd;
68 	/* length in bytes of packet prefix the filter examines */
69 	ushort_t		pf_PByteLen;
70 };
71 
72 /*
73  * (Internal) packet descriptor for FilterPacket
74  */
75 struct packdesc {
76 	ushort_t	*pd_hdr;	/* header starting address */
77 	uint_t		pd_hdrlen;	/* header length in shorts */
78 	ushort_t	*pd_body;	/* body starting address */
79 	uint_t		pd_bodylen;	/* body length in shorts */
80 };
81 
82 
83 /*
84  * Function prototypes.
85  */
86 static	int	pfopen(queue_t *, dev_t *, int, int, cred_t *);
87 static	int	pfclose(queue_t *, int, cred_t *);
88 static void	pfioctl(queue_t *wq, mblk_t *mp);
89 static	int	FilterPacket(struct packdesc *, struct epacketfilt *);
90 static int	pfwput(queue_t *, mblk_t *);
91 static int	pfrput(queue_t *, mblk_t *);
92 
93 static struct module_info pf_minfo = {
94 	22,		/* mi_idnum */
95 	"pfmod",	/* mi_idname */
96 	0,		/* mi_minpsz */
97 	INFPSZ,		/* mi_maxpsz */
98 	0,		/* mi_hiwat */
99 	0		/* mi_lowat */
100 };
101 
102 static struct qinit pf_rinit = {
103 	pfrput,			/* qi_putp */
104 	NULL,
105 	pfopen,			/* qi_qopen */
106 	pfclose,		/* qi_qclose */
107 	NULL,			/* qi_qadmin */
108 	&pf_minfo,		/* qi_minfo */
109 	NULL			/* qi_mstat */
110 };
111 
112 static struct qinit pf_winit = {
113 	pfwput,			/* qi_putp */
114 	NULL,			/* qi_srvp */
115 	NULL,			/* qi_qopen */
116 	NULL,			/* qi_qclose */
117 	NULL,			/* qi_qadmin */
118 	&pf_minfo,		/* qi_minfo */
119 	NULL			/* qi_mstat */
120 };
121 
122 static struct streamtab pf_info = {
123 	&pf_rinit,	/* st_rdinit */
124 	&pf_winit,	/* st_wrinit */
125 	NULL,		/* st_muxrinit */
126 	NULL		/* st_muxwinit */
127 };
128 
129 static struct fmodsw fsw = {
130 	"pfmod",
131 	&pf_info,
132 	D_MTQPAIR | D_MP
133 };
134 
135 static struct modlstrmod modlstrmod = {
136 	&mod_strmodops, "streams packet filter module", &fsw
137 };
138 
139 static struct modlinkage modlinkage = {
140 	MODREV_1, &modlstrmod, NULL
141 };
142 
143 int
144 _init(void)
145 {
146 	return (mod_install(&modlinkage));
147 }
148 
149 int
150 _fini(void)
151 {
152 	return (mod_remove(&modlinkage));
153 }
154 
155 int
156 _info(struct modinfo *modinfop)
157 {
158 	return (mod_info(&modlinkage, modinfop));
159 }
160 
161 /*ARGSUSED*/
162 static int
163 pfopen(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
164 {
165 	struct epacketfilt	*pfp;
166 
167 	ASSERT(rq);
168 
169 	if (sflag != MODOPEN)
170 		return (EINVAL);
171 
172 	if (rq->q_ptr)
173 		return (0);
174 
175 	/*
176 	 * Allocate and initialize per-Stream structure.
177 	 */
178 	pfp = kmem_alloc(sizeof (struct epacketfilt), KM_SLEEP);
179 	rq->q_ptr = WR(rq)->q_ptr = (char *)pfp;
180 
181 	qprocson(rq);
182 
183 	return (0);
184 }
185 
186 /* ARGSUSED */
187 static int
188 pfclose(queue_t	*rq, int flags __unused, cred_t *credp __unused)
189 {
190 	struct	epacketfilt	*pfp = (struct epacketfilt *)rq->q_ptr;
191 
192 	ASSERT(pfp);
193 
194 	qprocsoff(rq);
195 
196 	kmem_free(pfp, sizeof (struct epacketfilt));
197 	rq->q_ptr = WR(rq)->q_ptr = NULL;
198 
199 	return (0);
200 }
201 
202 /*
203  * Write-side put procedure.  Its main task is to detect ioctls.
204  * Other message types are passed on through.
205  */
206 static int
207 pfwput(queue_t *wq, mblk_t *mp)
208 {
209 	switch (mp->b_datap->db_type) {
210 	case M_IOCTL:
211 		pfioctl(wq, mp);
212 		break;
213 
214 	default:
215 		putnext(wq, mp);
216 		break;
217 	}
218 	return (0);
219 }
220 
221 /*
222  * Read-side put procedure.  It's responsible for applying the
223  * packet filter and passing upstream message on or discarding it
224  * depending upon the results.
225  *
226  * Upstream messages can start with zero or more M_PROTO mblks
227  * which are skipped over before executing the packet filter
228  * on any remaining M_DATA mblks.
229  */
230 static int
231 pfrput(queue_t *rq, mblk_t *mp)
232 {
233 	struct	epacketfilt	*pfp = (struct epacketfilt *)rq->q_ptr;
234 	mblk_t	*mbp, *mpp;
235 	struct	packdesc	pd;
236 	int	need;
237 
238 	ASSERT(pfp);
239 
240 	switch (DB_TYPE(mp)) {
241 	case M_PROTO:
242 	case M_DATA:
243 		/*
244 		 * Skip over protocol information and find the start
245 		 * of the message body, saving the overall message
246 		 * start in mpp.
247 		 */
248 		for (mpp = mp; mp && (DB_TYPE(mp) == M_PROTO); mp = mp->b_cont)
249 			;
250 
251 		/*
252 		 * Null body (exclusive of M_PROTO blocks) ==> accept.
253 		 * Note that a null body is not the same as an empty body.
254 		 */
255 		if (mp == NULL) {
256 			putnext(rq, mpp);
257 			break;
258 		}
259 
260 		/*
261 		 * Pull the packet up to the length required by
262 		 * the filter.  Note that doing so destroys sharing
263 		 * relationships, which is unfortunate, since the
264 		 * results of pulling up here are likely to be useful
265 		 * for shared messages applied to a filter on a sibling
266 		 * stream.
267 		 *
268 		 * Most packet sources will provide the packet in two
269 		 * logical pieces: an initial header in a single mblk,
270 		 * and a body in a sequence of mblks hooked to the
271 		 * header.  We're prepared to deal with variant forms,
272 		 * but in any case, the pullup applies only to the body
273 		 * part.
274 		 */
275 		mbp = mp->b_cont;
276 		need = pfp->pf_PByteLen;
277 		if (mbp && (MBLKL(mbp) < need)) {
278 			int len = msgdsize(mbp);
279 
280 			/* XXX discard silently on pullupmsg failure */
281 			if (pullupmsg(mbp, MIN(need, len)) == 0) {
282 				freemsg(mpp);
283 				break;
284 			}
285 		}
286 
287 		/*
288 		 * Misalignment (not on short boundary) ==> reject.
289 		 */
290 		if (((uintptr_t)mp->b_rptr & (sizeof (ushort_t) - 1)) ||
291 		    (mbp != NULL &&
292 		    ((uintptr_t)mbp->b_rptr & (sizeof (ushort_t) - 1)))) {
293 			freemsg(mpp);
294 			break;
295 		}
296 
297 		/*
298 		 * These assignments are distasteful, but necessary,
299 		 * since the packet filter wants to work in terms of
300 		 * shorts.  Odd bytes at the end of header or data can't
301 		 * participate in the filtering operation.
302 		 */
303 		pd.pd_hdr = (ushort_t *)mp->b_rptr;
304 		pd.pd_hdrlen = (mp->b_wptr - mp->b_rptr) / sizeof (ushort_t);
305 		if (mbp) {
306 			pd.pd_body = (ushort_t *)mbp->b_rptr;
307 			pd.pd_bodylen = (mbp->b_wptr - mbp->b_rptr) /
308 			    sizeof (ushort_t);
309 		} else {
310 			pd.pd_body = NULL;
311 			pd.pd_bodylen = 0;
312 		}
313 
314 		/*
315 		 * Apply the filter.
316 		 */
317 		if (FilterPacket(&pd, pfp))
318 			putnext(rq, mpp);
319 		else
320 			freemsg(mpp);
321 
322 		break;
323 
324 	default:
325 		putnext(rq, mp);
326 		break;
327 	}
328 	return (0);
329 }
330 
331 /*
332  * Handle write-side M_IOCTL messages.
333  */
334 static void
335 pfioctl(queue_t *wq, mblk_t *mp)
336 {
337 	struct	epacketfilt	*pfp = (struct epacketfilt *)wq->q_ptr;
338 	struct	Pf_ext_packetfilt	*upfp;
339 	struct	packetfilt	*opfp;
340 	ushort_t	*fwp;
341 	int	arg;
342 	int	maxoff = 0;
343 	int	maxoffreg = 0;
344 	struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
345 	int	error;
346 
347 	switch (iocp->ioc_cmd) {
348 	case PFIOCSETF:
349 		/*
350 		 * Verify argument length. Since the size of packet filter
351 		 * got increased (ENMAXFILTERS was bumped up to 2047), to
352 		 * maintain backwards binary compatibility, we need to
353 		 * check for both possible sizes.
354 		 */
355 		switch (iocp->ioc_count) {
356 		case sizeof (struct Pf_ext_packetfilt):
357 			error = miocpullup(mp,
358 			    sizeof (struct Pf_ext_packetfilt));
359 			if (error != 0) {
360 				miocnak(wq, mp, 0, error);
361 				return;
362 			}
363 			upfp = (struct Pf_ext_packetfilt *)mp->b_cont->b_rptr;
364 			if (upfp->Pf_FilterLen > PF_MAXFILTERS) {
365 				miocnak(wq, mp, 0, EINVAL);
366 				return;
367 			}
368 
369 			bcopy(upfp, pfp, sizeof (struct Pf_ext_packetfilt));
370 			pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
371 			break;
372 
373 		case sizeof (struct packetfilt):
374 			error = miocpullup(mp, sizeof (struct packetfilt));
375 			if (error != 0) {
376 				miocnak(wq, mp, 0, error);
377 				return;
378 			}
379 			opfp = (struct packetfilt *)mp->b_cont->b_rptr;
380 			/* this strange comparison keeps gcc from complaining */
381 			if (opfp->Pf_FilterLen - 1 >= ENMAXFILTERS) {
382 				miocnak(wq, mp, 0, EINVAL);
383 				return;
384 			}
385 
386 			pfp->pf.Pf_Priority = opfp->Pf_Priority;
387 			pfp->pf.Pf_FilterLen = (unsigned int)opfp->Pf_FilterLen;
388 
389 			bcopy(opfp->Pf_Filter, pfp->pf.Pf_Filter,
390 			    sizeof (opfp->Pf_Filter));
391 			pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
392 			break;
393 
394 		default:
395 			miocnak(wq, mp, 0, EINVAL);
396 			return;
397 		}
398 
399 		/*
400 		 * Find and record maximum byte offset that the
401 		 * filter users.  We use this when executing the
402 		 * filter to determine how much of the packet
403 		 * body to pull up.  This code depends on the
404 		 * filter encoding.
405 		 */
406 		for (fwp = pfp->pf_Filter; fwp < pfp->pf_FilterEnd; fwp++) {
407 			arg = *fwp & ((1 << ENF_NBPA) - 1);
408 			switch (arg) {
409 			default:
410 				if ((arg -= ENF_PUSHWORD) > maxoff)
411 					maxoff = arg;
412 				break;
413 
414 			case ENF_LOAD_OFFSET:
415 				/* Point to the offset */
416 				fwp++;
417 				if (*fwp > maxoffreg)
418 					maxoffreg = *fwp;
419 				break;
420 
421 			case ENF_PUSHLIT:
422 			case ENF_BRTR:
423 			case ENF_BRFL:
424 				/* Skip over the literal. */
425 				fwp++;
426 				break;
427 
428 			case ENF_PUSHZERO:
429 			case ENF_PUSHONE:
430 			case ENF_PUSHFFFF:
431 			case ENF_PUSHFF00:
432 			case ENF_PUSH00FF:
433 			case ENF_NOPUSH:
434 			case ENF_POP:
435 				break;
436 			}
437 		}
438 
439 		/*
440 		 * Convert word offset to length in bytes.
441 		 */
442 		pfp->pf_PByteLen = (maxoff + maxoffreg + 1) * sizeof (ushort_t);
443 		miocack(wq, mp, 0, 0);
444 		break;
445 
446 	default:
447 		putnext(wq, mp);
448 		break;
449 	}
450 }
451 
452 /* #define	DEBUG	1 */
453 /* #define	INNERDEBUG	1 */
454 
455 #ifdef	INNERDEBUG
456 #define	enprintf(a)	printf a
457 #else
458 #define	enprintf(a)
459 #endif
460 
461 /*
462  * Apply the packet filter given by pfp to the packet given by
463  * pp.  Return nonzero iff the filter accepts the packet.
464  *
465  * The packet comes in two pieces, a header and a body, since
466  * that's the most convenient form for our caller.  The header
467  * is in contiguous memory, whereas the body is in a mbuf.
468  * Our caller will have adjusted the mbuf chain so that its first
469  * min(MLEN, length(body)) bytes are guaranteed contiguous.  For
470  * the sake of efficiency (and some laziness) the filter is prepared
471  * to examine only these two contiguous pieces.  Furthermore, it
472  * assumes that the header length is even, so that there's no need
473  * to glue the last byte of header to the first byte of data.
474  */
475 
476 #define	opx(i)	((i) >> ENF_NBPA)
477 
478 static int
479 FilterPacket(struct packdesc *pp, struct epacketfilt *pfp)
480 {
481 	int		maxhdr = pp->pd_hdrlen;
482 	int		maxword = maxhdr + pp->pd_bodylen;
483 	ushort_t	*sp;
484 	ushort_t	*fp;
485 	ushort_t	*fpe;
486 	unsigned	op;
487 	unsigned	arg;
488 	unsigned	offreg = 0;
489 	ushort_t	stack[ENMAXFILTERS+1];
490 
491 	fp = &pfp->pf_Filter[0];
492 	fpe = pfp->pf_FilterEnd;
493 
494 	enprintf(("FilterPacket(%p, %p, %p, %p):\n", pp, pfp, fp, fpe));
495 
496 	/*
497 	 * Push TRUE on stack to start.  The stack size is chosen such
498 	 * that overflow can't occur -- each operation can push at most
499 	 * one item on the stack, and the stack size equals the maximum
500 	 * program length.
501 	 */
502 	sp = &stack[ENMAXFILTERS];
503 	*sp = 1;
504 
505 	while (fp < fpe) {
506 	op = *fp >> ENF_NBPA;
507 	arg = *fp & ((1 << ENF_NBPA) - 1);
508 	fp++;
509 
510 	switch (arg) {
511 	default:
512 		arg -= ENF_PUSHWORD;
513 		/*
514 		 * Since arg is unsigned,
515 		 * if it were less than ENF_PUSHWORD before,
516 		 * it would now be huge.
517 		 */
518 		if (arg + offreg < maxhdr)
519 			*--sp = pp->pd_hdr[arg + offreg];
520 		else if (arg + offreg < maxword)
521 			*--sp = pp->pd_body[arg - maxhdr + offreg];
522 		else {
523 			enprintf(("=>0(len)\n"));
524 			return (0);
525 		}
526 		break;
527 	case ENF_PUSHLIT:
528 		*--sp = *fp++;
529 		break;
530 	case ENF_PUSHZERO:
531 		*--sp = 0;
532 		break;
533 	case ENF_PUSHONE:
534 		*--sp = 1;
535 		break;
536 	case ENF_PUSHFFFF:
537 		*--sp = 0xffff;
538 		break;
539 	case ENF_PUSHFF00:
540 		*--sp = 0xff00;
541 		break;
542 	case ENF_PUSH00FF:
543 		*--sp = 0x00ff;
544 		break;
545 	case ENF_LOAD_OFFSET:
546 		offreg = *fp++;
547 		break;
548 	case ENF_BRTR:
549 		if (*sp != 0)
550 			fp += *fp;
551 		else
552 			fp++;
553 		if (fp >= fpe) {
554 			enprintf(("BRTR: fp>=fpe\n"));
555 			return (0);
556 		}
557 		break;
558 	case ENF_BRFL:
559 		if (*sp == 0)
560 			fp += *fp;
561 		else
562 			fp++;
563 		if (fp >= fpe) {
564 			enprintf(("BRFL: fp>=fpe\n"));
565 			return (0);
566 		}
567 		break;
568 	case ENF_POP:
569 		++sp;
570 		if (sp > &stack[ENMAXFILTERS]) {
571 			enprintf(("stack underflow\n"));
572 			return (0);
573 		}
574 		break;
575 	case ENF_NOPUSH:
576 		break;
577 	}
578 
579 	if (sp < &stack[2]) {	/* check stack overflow: small yellow zone */
580 		enprintf(("=>0(--sp)\n"));
581 		return (0);
582 	}
583 
584 	if (op == ENF_NOP)
585 		continue;
586 
587 	/*
588 	 * all non-NOP operators binary, must have at least two operands
589 	 * on stack to evaluate.
590 	 */
591 	if (sp > &stack[ENMAXFILTERS-2]) {
592 		enprintf(("=>0(sp++)\n"));
593 		return (0);
594 	}
595 
596 	arg = *sp++;
597 	switch (op) {
598 	default:
599 		enprintf(("=>0(def)\n"));
600 		return (0);
601 	case opx(ENF_AND):
602 		*sp &= arg;
603 		break;
604 	case opx(ENF_OR):
605 		*sp |= arg;
606 		break;
607 	case opx(ENF_XOR):
608 		*sp ^= arg;
609 		break;
610 	case opx(ENF_EQ):
611 		*sp = (*sp == arg);
612 		break;
613 	case opx(ENF_NEQ):
614 		*sp = (*sp != arg);
615 		break;
616 	case opx(ENF_LT):
617 		*sp = (*sp < arg);
618 		break;
619 	case opx(ENF_LE):
620 		*sp = (*sp <= arg);
621 		break;
622 	case opx(ENF_GT):
623 		*sp = (*sp > arg);
624 		break;
625 	case opx(ENF_GE):
626 		*sp = (*sp >= arg);
627 		break;
628 
629 	/* short-circuit operators */
630 
631 	case opx(ENF_COR):
632 		if (*sp++ == arg) {
633 			enprintf(("=>COR %x\n", *sp));
634 			return (1);
635 		}
636 		break;
637 	case opx(ENF_CAND):
638 		if (*sp++ != arg) {
639 			enprintf(("=>CAND %x\n", *sp));
640 			return (0);
641 		}
642 		break;
643 	case opx(ENF_CNOR):
644 		if (*sp++ == arg) {
645 			enprintf(("=>COR %x\n", *sp));
646 			return (0);
647 		}
648 		break;
649 	case opx(ENF_CNAND):
650 		if (*sp++ != arg) {
651 			enprintf(("=>CNAND %x\n", *sp));
652 			return (1);
653 		}
654 		break;
655 	}
656 	}
657 	enprintf(("=>%x\n", *sp));
658 	return (*sp);
659 }
660