xref: /illumos-gate/usr/src/uts/common/io/pfmod.c (revision 67d74cc3e7c9d9461311136a0b2069813a3fd927)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * STREAMS Packet Filter Module
28  *
29  * This module applies a filter to messages arriving on its read
30  * queue, passing on messages that the filter accepts adn discarding
31  * the others.  It supports ioctls for setting the filter.
32  *
33  * On the write side, the module simply passes everything through
34  * unchanged.
35  *
36  * Based on SunOS 4.x version.  This version has minor changes:
37  *	- general SVR4 porting stuff
38  * 	- change name and prefixes from "nit" buffer to streams buffer
39  *	- multithreading assumes configured as D_MTQPAIR
40  */
41 
42 #include <sys/types.h>
43 #include <sys/sysmacros.h>
44 #include <sys/errno.h>
45 #include <sys/debug.h>
46 #include <sys/time.h>
47 #include <sys/stropts.h>
48 #include <sys/stream.h>
49 #include <sys/conf.h>
50 #include <sys/ddi.h>
51 #include <sys/sunddi.h>
52 #include <sys/kmem.h>
53 #include <sys/strsun.h>
54 #include <sys/pfmod.h>
55 #include <sys/modctl.h>
56 
57 /*
58  * Expanded version of the Packetfilt structure that includes
59  * some additional fields that aid filter execution efficiency.
60  */
61 struct epacketfilt {
62 	struct Pf_ext_packetfilt	pf;
63 #define	pf_Priority	pf.Pf_Priority
64 #define	pf_FilterLen	pf.Pf_FilterLen
65 #define	pf_Filter	pf.Pf_Filter
66 	/* pointer to word immediately past end of filter */
67 	ushort_t		*pf_FilterEnd;
68 	/* length in bytes of packet prefix the filter examines */
69 	ushort_t		pf_PByteLen;
70 };
71 
72 /*
73  * (Internal) packet descriptor for FilterPacket
74  */
75 struct packdesc {
76 	ushort_t	*pd_hdr;	/* header starting address */
77 	uint_t		pd_hdrlen;	/* header length in shorts */
78 	ushort_t	*pd_body;	/* body starting address */
79 	uint_t		pd_bodylen;	/* body length in shorts */
80 };
81 
82 
83 /*
84  * Function prototypes.
85  */
86 static	int	pfopen(queue_t *, dev_t *, int, int, cred_t *);
87 static	int	pfclose(queue_t *, int, cred_t *);
88 static void	pfioctl(queue_t *wq, mblk_t *mp);
89 static	int	FilterPacket(struct packdesc *, struct epacketfilt *);
90 /*
91  * To save instructions, since STREAMS ignores the return value
92  * from these functions, they are defined as void here. Kind of icky, but...
93  */
94 static void	pfwput(queue_t *, mblk_t *);
95 static void	pfrput(queue_t *, mblk_t *);
96 
97 static struct module_info pf_minfo = {
98 	22,		/* mi_idnum */
99 	"pfmod",	/* mi_idname */
100 	0,		/* mi_minpsz */
101 	INFPSZ,		/* mi_maxpsz */
102 	0,		/* mi_hiwat */
103 	0		/* mi_lowat */
104 };
105 
106 static struct qinit pf_rinit = {
107 	(int (*)())pfrput,	/* qi_putp */
108 	NULL,
109 	pfopen,			/* qi_qopen */
110 	pfclose,		/* qi_qclose */
111 	NULL,			/* qi_qadmin */
112 	&pf_minfo,		/* qi_minfo */
113 	NULL			/* qi_mstat */
114 };
115 
116 static struct qinit pf_winit = {
117 	(int (*)())pfwput,	/* qi_putp */
118 	NULL,			/* qi_srvp */
119 	NULL,			/* qi_qopen */
120 	NULL,			/* qi_qclose */
121 	NULL,			/* qi_qadmin */
122 	&pf_minfo,		/* qi_minfo */
123 	NULL			/* qi_mstat */
124 };
125 
126 static struct streamtab pf_info = {
127 	&pf_rinit,	/* st_rdinit */
128 	&pf_winit,	/* st_wrinit */
129 	NULL,		/* st_muxrinit */
130 	NULL		/* st_muxwinit */
131 };
132 
133 static struct fmodsw fsw = {
134 	"pfmod",
135 	&pf_info,
136 	D_MTQPAIR | D_MP
137 };
138 
139 static struct modlstrmod modlstrmod = {
140 	&mod_strmodops, "streams packet filter module", &fsw
141 };
142 
143 static struct modlinkage modlinkage = {
144 	MODREV_1, &modlstrmod, NULL
145 };
146 
147 int
148 _init(void)
149 {
150 	return (mod_install(&modlinkage));
151 }
152 
153 int
154 _fini(void)
155 {
156 	return (mod_remove(&modlinkage));
157 }
158 
159 int
160 _info(struct modinfo *modinfop)
161 {
162 	return (mod_info(&modlinkage, modinfop));
163 }
164 
165 /*ARGSUSED*/
166 static int
167 pfopen(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
168 {
169 	struct epacketfilt	*pfp;
170 
171 	ASSERT(rq);
172 
173 	if (sflag != MODOPEN)
174 		return (EINVAL);
175 
176 	if (rq->q_ptr)
177 		return (0);
178 
179 	/*
180 	 * Allocate and initialize per-Stream structure.
181 	 */
182 	pfp = kmem_alloc(sizeof (struct epacketfilt), KM_SLEEP);
183 	rq->q_ptr = WR(rq)->q_ptr = (char *)pfp;
184 
185 	qprocson(rq);
186 
187 	return (0);
188 }
189 
190 /* ARGSUSED */
191 static int
192 pfclose(queue_t	*rq, int flags __unused, cred_t *credp __unused)
193 {
194 	struct	epacketfilt	*pfp = (struct epacketfilt *)rq->q_ptr;
195 
196 	ASSERT(pfp);
197 
198 	qprocsoff(rq);
199 
200 	kmem_free(pfp, sizeof (struct epacketfilt));
201 	rq->q_ptr = WR(rq)->q_ptr = NULL;
202 
203 	return (0);
204 }
205 
206 /*
207  * Write-side put procedure.  Its main task is to detect ioctls.
208  * Other message types are passed on through.
209  */
210 static void
211 pfwput(queue_t *wq, mblk_t *mp)
212 {
213 	switch (mp->b_datap->db_type) {
214 	case M_IOCTL:
215 		pfioctl(wq, mp);
216 		break;
217 
218 	default:
219 		putnext(wq, mp);
220 		break;
221 	}
222 }
223 
224 /*
225  * Read-side put procedure.  It's responsible for applying the
226  * packet filter and passing upstream message on or discarding it
227  * depending upon the results.
228  *
229  * Upstream messages can start with zero or more M_PROTO mblks
230  * which are skipped over before executing the packet filter
231  * on any remaining M_DATA mblks.
232  */
233 static void
234 pfrput(queue_t *rq, mblk_t *mp)
235 {
236 	struct	epacketfilt	*pfp = (struct epacketfilt *)rq->q_ptr;
237 	mblk_t	*mbp, *mpp;
238 	struct	packdesc	pd;
239 	int	need;
240 
241 	ASSERT(pfp);
242 
243 	switch (DB_TYPE(mp)) {
244 	case M_PROTO:
245 	case M_DATA:
246 		/*
247 		 * Skip over protocol information and find the start
248 		 * of the message body, saving the overall message
249 		 * start in mpp.
250 		 */
251 		for (mpp = mp; mp && (DB_TYPE(mp) == M_PROTO); mp = mp->b_cont)
252 			;
253 
254 		/*
255 		 * Null body (exclusive of M_PROTO blocks) ==> accept.
256 		 * Note that a null body is not the same as an empty body.
257 		 */
258 		if (mp == NULL) {
259 			putnext(rq, mpp);
260 			break;
261 		}
262 
263 		/*
264 		 * Pull the packet up to the length required by
265 		 * the filter.  Note that doing so destroys sharing
266 		 * relationships, which is unfortunate, since the
267 		 * results of pulling up here are likely to be useful
268 		 * for shared messages applied to a filter on a sibling
269 		 * stream.
270 		 *
271 		 * Most packet sources will provide the packet in two
272 		 * logical pieces: an initial header in a single mblk,
273 		 * and a body in a sequence of mblks hooked to the
274 		 * header.  We're prepared to deal with variant forms,
275 		 * but in any case, the pullup applies only to the body
276 		 * part.
277 		 */
278 		mbp = mp->b_cont;
279 		need = pfp->pf_PByteLen;
280 		if (mbp && (MBLKL(mbp) < need)) {
281 			int len = msgdsize(mbp);
282 
283 			/* XXX discard silently on pullupmsg failure */
284 			if (pullupmsg(mbp, MIN(need, len)) == 0) {
285 				freemsg(mpp);
286 				break;
287 			}
288 		}
289 
290 		/*
291 		 * Misalignment (not on short boundary) ==> reject.
292 		 */
293 		if (((uintptr_t)mp->b_rptr & (sizeof (ushort_t) - 1)) ||
294 		    (mbp != NULL &&
295 		    ((uintptr_t)mbp->b_rptr & (sizeof (ushort_t) - 1)))) {
296 			freemsg(mpp);
297 			break;
298 		}
299 
300 		/*
301 		 * These assignments are distasteful, but necessary,
302 		 * since the packet filter wants to work in terms of
303 		 * shorts.  Odd bytes at the end of header or data can't
304 		 * participate in the filtering operation.
305 		 */
306 		pd.pd_hdr = (ushort_t *)mp->b_rptr;
307 		pd.pd_hdrlen = (mp->b_wptr - mp->b_rptr) / sizeof (ushort_t);
308 		if (mbp) {
309 			pd.pd_body = (ushort_t *)mbp->b_rptr;
310 			pd.pd_bodylen = (mbp->b_wptr - mbp->b_rptr) /
311 							sizeof (ushort_t);
312 		} else {
313 			pd.pd_body = NULL;
314 			pd.pd_bodylen = 0;
315 		}
316 
317 		/*
318 		 * Apply the filter.
319 		 */
320 		if (FilterPacket(&pd, pfp))
321 			putnext(rq, mpp);
322 		else
323 			freemsg(mpp);
324 
325 		break;
326 
327 	default:
328 		putnext(rq, mp);
329 		break;
330 	}
331 
332 }
333 
334 /*
335  * Handle write-side M_IOCTL messages.
336  */
337 static void
338 pfioctl(queue_t *wq, mblk_t *mp)
339 {
340 	struct	epacketfilt	*pfp = (struct epacketfilt *)wq->q_ptr;
341 	struct	Pf_ext_packetfilt	*upfp;
342 	struct	packetfilt	*opfp;
343 	ushort_t	*fwp;
344 	int	arg;
345 	int	maxoff = 0;
346 	int	maxoffreg = 0;
347 	struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
348 	int	error;
349 
350 	switch (iocp->ioc_cmd) {
351 	case PFIOCSETF:
352 		/*
353 		 * Verify argument length. Since the size of packet filter
354 		 * got increased (ENMAXFILTERS was bumped up to 2047), to
355 		 * maintain backwards binary compatibility, we need to
356 		 * check for both possible sizes.
357 		 */
358 		switch (iocp->ioc_count) {
359 		case sizeof (struct Pf_ext_packetfilt):
360 			error = miocpullup(mp,
361 			    sizeof (struct Pf_ext_packetfilt));
362 			if (error != 0) {
363 				miocnak(wq, mp, 0, error);
364 				return;
365 			}
366 			upfp = (struct Pf_ext_packetfilt *)mp->b_cont->b_rptr;
367 			if (upfp->Pf_FilterLen > PF_MAXFILTERS) {
368 				miocnak(wq, mp, 0, EINVAL);
369 				return;
370 			}
371 
372 			bcopy(upfp, pfp, sizeof (struct Pf_ext_packetfilt));
373 			pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
374 			break;
375 
376 		case sizeof (struct packetfilt):
377 			error = miocpullup(mp, sizeof (struct packetfilt));
378 			if (error != 0) {
379 				miocnak(wq, mp, 0, error);
380 				return;
381 			}
382 			opfp = (struct packetfilt *)mp->b_cont->b_rptr;
383 			/* this strange comparison keeps gcc from complaining */
384 			if (opfp->Pf_FilterLen - 1 >= ENMAXFILTERS) {
385 				miocnak(wq, mp, 0, EINVAL);
386 				return;
387 			}
388 
389 			pfp->pf.Pf_Priority = opfp->Pf_Priority;
390 			pfp->pf.Pf_FilterLen = (unsigned int)opfp->Pf_FilterLen;
391 
392 			bcopy(opfp->Pf_Filter, pfp->pf.Pf_Filter,
393 			    sizeof (opfp->Pf_Filter));
394 			pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
395 			break;
396 
397 		default:
398 			miocnak(wq, mp, 0, EINVAL);
399 			return;
400 		}
401 
402 		/*
403 		 * Find and record maximum byte offset that the
404 		 * filter users.  We use this when executing the
405 		 * filter to determine how much of the packet
406 		 * body to pull up.  This code depends on the
407 		 * filter encoding.
408 		 */
409 		for (fwp = pfp->pf_Filter; fwp < pfp->pf_FilterEnd; fwp++) {
410 			arg = *fwp & ((1 << ENF_NBPA) - 1);
411 			switch (arg) {
412 			default:
413 				if ((arg -= ENF_PUSHWORD) > maxoff)
414 					maxoff = arg;
415 				break;
416 
417 			case ENF_LOAD_OFFSET:
418 				/* Point to the offset */
419 				fwp++;
420 				if (*fwp > maxoffreg)
421 					maxoffreg = *fwp;
422 				break;
423 
424 			case ENF_PUSHLIT:
425 			case ENF_BRTR:
426 			case ENF_BRFL:
427 				/* Skip over the literal. */
428 				fwp++;
429 				break;
430 
431 			case ENF_PUSHZERO:
432 			case ENF_PUSHONE:
433 			case ENF_PUSHFFFF:
434 			case ENF_PUSHFF00:
435 			case ENF_PUSH00FF:
436 			case ENF_NOPUSH:
437 			case ENF_POP:
438 				break;
439 			}
440 		}
441 
442 		/*
443 		 * Convert word offset to length in bytes.
444 		 */
445 		pfp->pf_PByteLen = (maxoff + maxoffreg + 1) * sizeof (ushort_t);
446 		miocack(wq, mp, 0, 0);
447 		break;
448 
449 	default:
450 		putnext(wq, mp);
451 		break;
452 	}
453 }
454 
455 /* #define	DEBUG	1 */
456 /* #define	INNERDEBUG	1 */
457 
458 #ifdef	INNERDEBUG
459 #define	enprintf(a)	printf a
460 #else
461 #define	enprintf(a)
462 #endif
463 
464 /*
465  * Apply the packet filter given by pfp to the packet given by
466  * pp.  Return nonzero iff the filter accepts the packet.
467  *
468  * The packet comes in two pieces, a header and a body, since
469  * that's the most convenient form for our caller.  The header
470  * is in contiguous memory, whereas the body is in a mbuf.
471  * Our caller will have adjusted the mbuf chain so that its first
472  * min(MLEN, length(body)) bytes are guaranteed contiguous.  For
473  * the sake of efficiency (and some laziness) the filter is prepared
474  * to examine only these two contiguous pieces.  Furthermore, it
475  * assumes that the header length is even, so that there's no need
476  * to glue the last byte of header to the first byte of data.
477  */
478 
479 #define	opx(i)	((i) >> ENF_NBPA)
480 
481 static int
482 FilterPacket(struct packdesc *pp, struct epacketfilt *pfp)
483 {
484 	int		maxhdr = pp->pd_hdrlen;
485 	int		maxword = maxhdr + pp->pd_bodylen;
486 	ushort_t	*sp;
487 	ushort_t	*fp;
488 	ushort_t	*fpe;
489 	unsigned	op;
490 	unsigned	arg;
491 	unsigned	offreg = 0;
492 	ushort_t	stack[ENMAXFILTERS+1];
493 
494 	fp = &pfp->pf_Filter[0];
495 	fpe = pfp->pf_FilterEnd;
496 
497 	enprintf(("FilterPacket(%p, %p, %p, %p):\n", pp, pfp, fp, fpe));
498 
499 	/*
500 	 * Push TRUE on stack to start.  The stack size is chosen such
501 	 * that overflow can't occur -- each operation can push at most
502 	 * one item on the stack, and the stack size equals the maximum
503 	 * program length.
504 	 */
505 	sp = &stack[ENMAXFILTERS];
506 	*sp = 1;
507 
508 	while (fp < fpe) {
509 	op = *fp >> ENF_NBPA;
510 	arg = *fp & ((1 << ENF_NBPA) - 1);
511 	fp++;
512 
513 	switch (arg) {
514 	default:
515 		arg -= ENF_PUSHWORD;
516 		/*
517 		 * Since arg is unsigned,
518 		 * if it were less than ENF_PUSHWORD before,
519 		 * it would now be huge.
520 		 */
521 		if (arg + offreg < maxhdr)
522 			*--sp = pp->pd_hdr[arg + offreg];
523 		else if (arg + offreg < maxword)
524 			*--sp = pp->pd_body[arg - maxhdr + offreg];
525 		else {
526 			enprintf(("=>0(len)\n"));
527 			return (0);
528 		}
529 		break;
530 	case ENF_PUSHLIT:
531 		*--sp = *fp++;
532 		break;
533 	case ENF_PUSHZERO:
534 		*--sp = 0;
535 		break;
536 	case ENF_PUSHONE:
537 		*--sp = 1;
538 		break;
539 	case ENF_PUSHFFFF:
540 		*--sp = 0xffff;
541 		break;
542 	case ENF_PUSHFF00:
543 		*--sp = 0xff00;
544 		break;
545 	case ENF_PUSH00FF:
546 		*--sp = 0x00ff;
547 		break;
548 	case ENF_LOAD_OFFSET:
549 		offreg = *fp++;
550 		break;
551 	case ENF_BRTR:
552 		if (*sp != 0)
553 			fp += *fp;
554 		else
555 			fp++;
556 		if (fp >= fpe) {
557 			enprintf(("BRTR: fp>=fpe\n"));
558 			return (0);
559 		}
560 		break;
561 	case ENF_BRFL:
562 		if (*sp == 0)
563 			fp += *fp;
564 		else
565 			fp++;
566 		if (fp >= fpe) {
567 			enprintf(("BRFL: fp>=fpe\n"));
568 			return (0);
569 		}
570 		break;
571 	case ENF_POP:
572 		++sp;
573 		if (sp > &stack[ENMAXFILTERS]) {
574 			enprintf(("stack underflow\n"));
575 			return (0);
576 		}
577 		break;
578 	case ENF_NOPUSH:
579 		break;
580 	}
581 
582 	if (sp < &stack[2]) {	/* check stack overflow: small yellow zone */
583 		enprintf(("=>0(--sp)\n"));
584 		return (0);
585 	}
586 
587 	if (op == ENF_NOP)
588 		continue;
589 
590 	/*
591 	 * all non-NOP operators binary, must have at least two operands
592 	 * on stack to evaluate.
593 	 */
594 	if (sp > &stack[ENMAXFILTERS-2]) {
595 		enprintf(("=>0(sp++)\n"));
596 		return (0);
597 	}
598 
599 	arg = *sp++;
600 	switch (op) {
601 	default:
602 		enprintf(("=>0(def)\n"));
603 		return (0);
604 	case opx(ENF_AND):
605 		*sp &= arg;
606 		break;
607 	case opx(ENF_OR):
608 		*sp |= arg;
609 		break;
610 	case opx(ENF_XOR):
611 		*sp ^= arg;
612 		break;
613 	case opx(ENF_EQ):
614 		*sp = (*sp == arg);
615 		break;
616 	case opx(ENF_NEQ):
617 		*sp = (*sp != arg);
618 		break;
619 	case opx(ENF_LT):
620 		*sp = (*sp < arg);
621 		break;
622 	case opx(ENF_LE):
623 		*sp = (*sp <= arg);
624 		break;
625 	case opx(ENF_GT):
626 		*sp = (*sp > arg);
627 		break;
628 	case opx(ENF_GE):
629 		*sp = (*sp >= arg);
630 		break;
631 
632 	/* short-circuit operators */
633 
634 	case opx(ENF_COR):
635 		if (*sp++ == arg) {
636 			enprintf(("=>COR %x\n", *sp));
637 			return (1);
638 		}
639 		break;
640 	case opx(ENF_CAND):
641 		if (*sp++ != arg) {
642 			enprintf(("=>CAND %x\n", *sp));
643 			return (0);
644 		}
645 		break;
646 	case opx(ENF_CNOR):
647 		if (*sp++ == arg) {
648 			enprintf(("=>COR %x\n", *sp));
649 			return (0);
650 		}
651 		break;
652 	case opx(ENF_CNAND):
653 		if (*sp++ != arg) {
654 			enprintf(("=>CNAND %x\n", *sp));
655 			return (1);
656 		}
657 		break;
658 	}
659 	}
660 	enprintf(("=>%x\n", *sp));
661 	return (*sp);
662 }
663