xref: /titanic_41/usr/src/uts/common/io/pfmod.c (revision ba2e4443695ee6a6f420a35cd4fc3d3346d22932)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * STREAMS Packet Filter Module
31  *
32  * This module applies a filter to messages arriving on its read
33  * queue, passing on messages that the filter accepts adn discarding
34  * the others.  It supports ioctls for setting the filter.
35  *
36  * On the write side, the module simply passes everything through
37  * unchanged.
38  *
39  * Based on SunOS 4.x version.  This version has minor changes:
40  *	- general SVR4 porting stuff
41  * 	- change name and prefixes from "nit" buffer to streams buffer
42  *	- multithreading assumes configured as D_MTQPAIR
43  */
44 
45 #include <sys/types.h>
46 #include <sys/sysmacros.h>
47 #include <sys/errno.h>
48 #include <sys/debug.h>
49 #include <sys/time.h>
50 #include <sys/stropts.h>
51 #include <sys/stream.h>
52 #include <sys/conf.h>
53 #include <sys/ddi.h>
54 #include <sys/sunddi.h>
55 #include <sys/kmem.h>
56 #include <sys/strsun.h>
57 #include <sys/pfmod.h>
58 #include <sys/modctl.h>
59 
60 /*
61  * Expanded version of the Packetfilt structure that includes
62  * some additional fields that aid filter execution efficiency.
63  */
64 struct epacketfilt {
65 	struct Pf_ext_packetfilt	pf;
66 #define	pf_Priority	pf.Pf_Priority
67 #define	pf_FilterLen	pf.Pf_FilterLen
68 #define	pf_Filter	pf.Pf_Filter
69 	/* pointer to word immediately past end of filter */
70 	ushort_t		*pf_FilterEnd;
71 	/* length in bytes of packet prefix the filter examines */
72 	ushort_t		pf_PByteLen;
73 };
74 
75 /*
76  * (Internal) packet descriptor for FilterPacket
77  */
78 struct packdesc {
79 	ushort_t	*pd_hdr;	/* header starting address */
80 	uint_t		pd_hdrlen;	/* header length in shorts */
81 	ushort_t	*pd_body;	/* body starting address */
82 	uint_t		pd_bodylen;	/* body length in shorts */
83 };
84 
85 
86 /*
87  * Function prototypes.
88  */
89 static	int	pfopen(queue_t *, dev_t *, int, int, cred_t *);
90 static	int	pfclose(queue_t *);
91 static void	pfioctl(queue_t *wq, mblk_t *mp);
92 static	int	FilterPacket(struct packdesc *, struct epacketfilt *);
93 /*
94  * To save instructions, since STREAMS ignores the return value
95  * from these functions, they are defined as void here. Kind of icky, but...
96  */
97 static void	pfwput(queue_t *, mblk_t *);
98 static void	pfrput(queue_t *, mblk_t *);
99 
100 static struct module_info pf_minfo = {
101 	22,		/* mi_idnum */
102 	"pfmod",	/* mi_idname */
103 	0,		/* mi_minpsz */
104 	INFPSZ,		/* mi_maxpsz */
105 	0,		/* mi_hiwat */
106 	0		/* mi_lowat */
107 };
108 
109 static struct qinit pf_rinit = {
110 	(int (*)())pfrput,	/* qi_putp */
111 	NULL,
112 	pfopen,			/* qi_qopen */
113 	pfclose,		/* qi_qclose */
114 	NULL,			/* qi_qadmin */
115 	&pf_minfo,		/* qi_minfo */
116 	NULL			/* qi_mstat */
117 };
118 
119 static struct qinit pf_winit = {
120 	(int (*)())pfwput,	/* qi_putp */
121 	NULL,			/* qi_srvp */
122 	NULL,			/* qi_qopen */
123 	NULL,			/* qi_qclose */
124 	NULL,			/* qi_qadmin */
125 	&pf_minfo,		/* qi_minfo */
126 	NULL			/* qi_mstat */
127 };
128 
129 static struct streamtab pf_info = {
130 	&pf_rinit,	/* st_rdinit */
131 	&pf_winit,	/* st_wrinit */
132 	NULL,		/* st_muxrinit */
133 	NULL		/* st_muxwinit */
134 };
135 
136 static struct fmodsw fsw = {
137 	"pfmod",
138 	&pf_info,
139 	D_MTQPAIR | D_MP
140 };
141 
142 static struct modlstrmod modlstrmod = {
143 	&mod_strmodops, "streams packet filter module", &fsw
144 };
145 
146 static struct modlinkage modlinkage = {
147 	MODREV_1, &modlstrmod, NULL
148 };
149 
150 int
151 _init(void)
152 {
153 	return (mod_install(&modlinkage));
154 }
155 
156 int
157 _fini(void)
158 {
159 	return (mod_remove(&modlinkage));
160 }
161 
162 int
163 _info(struct modinfo *modinfop)
164 {
165 	return (mod_info(&modlinkage, modinfop));
166 }
167 
168 /*ARGSUSED*/
169 static int
170 pfopen(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
171 {
172 	struct epacketfilt	*pfp;
173 
174 	ASSERT(rq);
175 
176 	if (sflag != MODOPEN)
177 		return (EINVAL);
178 
179 	if (rq->q_ptr)
180 		return (0);
181 
182 	/*
183 	 * Allocate and initialize per-Stream structure.
184 	 */
185 	pfp = kmem_alloc(sizeof (struct epacketfilt), KM_SLEEP);
186 	rq->q_ptr = WR(rq)->q_ptr = (char *)pfp;
187 
188 	qprocson(rq);
189 
190 	return (0);
191 }
192 
193 static int
194 pfclose(queue_t	*rq)
195 {
196 	struct	epacketfilt	*pfp = (struct epacketfilt *)rq->q_ptr;
197 
198 	ASSERT(pfp);
199 
200 	qprocsoff(rq);
201 
202 	kmem_free(pfp, sizeof (struct epacketfilt));
203 	rq->q_ptr = WR(rq)->q_ptr = NULL;
204 
205 	return (0);
206 }
207 
208 /*
209  * Write-side put procedure.  Its main task is to detect ioctls.
210  * Other message types are passed on through.
211  */
212 static void
213 pfwput(queue_t *wq, mblk_t *mp)
214 {
215 	switch (mp->b_datap->db_type) {
216 	case M_IOCTL:
217 		pfioctl(wq, mp);
218 		break;
219 
220 	default:
221 		putnext(wq, mp);
222 		break;
223 	}
224 }
225 
226 /*
227  * Read-side put procedure.  It's responsible for applying the
228  * packet filter and passing upstream message on or discarding it
229  * depending upon the results.
230  *
231  * Upstream messages can start with zero or more M_PROTO mblks
232  * which are skipped over before executing the packet filter
233  * on any remaining M_DATA mblks.
234  */
235 static void
236 pfrput(queue_t *rq, mblk_t *mp)
237 {
238 	struct	epacketfilt	*pfp = (struct epacketfilt *)rq->q_ptr;
239 	mblk_t	*mbp, *mpp;
240 	struct	packdesc	pd;
241 	int	need;
242 
243 	ASSERT(pfp);
244 
245 	switch (DB_TYPE(mp)) {
246 	case M_PROTO:
247 	case M_DATA:
248 		/*
249 		 * Skip over protocol information and find the start
250 		 * of the message body, saving the overall message
251 		 * start in mpp.
252 		 */
253 		for (mpp = mp; mp && (DB_TYPE(mp) == M_PROTO); mp = mp->b_cont)
254 			;
255 
256 		/*
257 		 * Null body (exclusive of M_PROTO blocks) ==> accept.
258 		 * Note that a null body is not the same as an empty body.
259 		 */
260 		if (mp == NULL) {
261 			putnext(rq, mpp);
262 			break;
263 		}
264 
265 		/*
266 		 * Pull the packet up to the length required by
267 		 * the filter.  Note that doing so destroys sharing
268 		 * relationships, which is unfortunate, since the
269 		 * results of pulling up here are likely to be useful
270 		 * for shared messages applied to a filter on a sibling
271 		 * stream.
272 		 *
273 		 * Most packet sources will provide the packet in two
274 		 * logical pieces: an initial header in a single mblk,
275 		 * and a body in a sequence of mblks hooked to the
276 		 * header.  We're prepared to deal with variant forms,
277 		 * but in any case, the pullup applies only to the body
278 		 * part.
279 		 */
280 		mbp = mp->b_cont;
281 		need = pfp->pf_PByteLen;
282 		if (mbp && (MBLKL(mbp) < need)) {
283 			int len = msgdsize(mbp);
284 
285 			/* XXX discard silently on pullupmsg failure */
286 			if (pullupmsg(mbp, MIN(need, len)) == 0) {
287 				freemsg(mpp);
288 				break;
289 			}
290 		}
291 
292 		/*
293 		 * Misalignment (not on short boundary) ==> reject.
294 		 */
295 		if (((uintptr_t)mp->b_rptr & (sizeof (ushort_t) - 1)) ||
296 		    (mbp != NULL &&
297 		    ((uintptr_t)mbp->b_rptr & (sizeof (ushort_t) - 1)))) {
298 			freemsg(mpp);
299 			break;
300 		}
301 
302 		/*
303 		 * These assignments are distasteful, but necessary,
304 		 * since the packet filter wants to work in terms of
305 		 * shorts.  Odd bytes at the end of header or data can't
306 		 * participate in the filtering operation.
307 		 */
308 		pd.pd_hdr = (ushort_t *)mp->b_rptr;
309 		pd.pd_hdrlen = (mp->b_wptr - mp->b_rptr) / sizeof (ushort_t);
310 		if (mbp) {
311 			pd.pd_body = (ushort_t *)mbp->b_rptr;
312 			pd.pd_bodylen = (mbp->b_wptr - mbp->b_rptr) /
313 							sizeof (ushort_t);
314 		} else {
315 			pd.pd_body = NULL;
316 			pd.pd_bodylen = 0;
317 		}
318 
319 		/*
320 		 * Apply the filter.
321 		 */
322 		if (FilterPacket(&pd, pfp))
323 			putnext(rq, mpp);
324 		else
325 			freemsg(mpp);
326 
327 		break;
328 
329 	default:
330 		putnext(rq, mp);
331 		break;
332 	}
333 
334 }
335 
336 /*
337  * Handle write-side M_IOCTL messages.
338  */
339 static void
340 pfioctl(queue_t *wq, mblk_t *mp)
341 {
342 	struct	epacketfilt	*pfp = (struct epacketfilt *)wq->q_ptr;
343 	struct	Pf_ext_packetfilt	*upfp;
344 	struct	packetfilt	*opfp;
345 	ushort_t	*fwp;
346 	int	maxoff, arg;
347 	struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
348 	int	error;
349 
350 	switch (iocp->ioc_cmd) {
351 	case PFIOCSETF:
352 		/*
353 		 * Verify argument length. Since the size of packet filter
354 		 * got increased (ENMAXFILTERS was bumped up to 2047), to
355 		 * maintain backwards binary compatibility, we need to
356 		 * check for both possible sizes.
357 		 */
358 		switch (iocp->ioc_count) {
359 		case sizeof (struct Pf_ext_packetfilt):
360 			error = miocpullup(mp,
361 			    sizeof (struct Pf_ext_packetfilt));
362 			if (error != 0) {
363 				miocnak(wq, mp, 0, error);
364 				return;
365 			}
366 			upfp = (struct Pf_ext_packetfilt *)mp->b_cont->b_rptr;
367 			if (upfp->Pf_FilterLen > PF_MAXFILTERS) {
368 				miocnak(wq, mp, 0, EINVAL);
369 				return;
370 			}
371 
372 			bcopy(upfp, pfp, sizeof (struct Pf_ext_packetfilt));
373 			pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
374 			break;
375 
376 		case sizeof (struct packetfilt):
377 			error = miocpullup(mp, sizeof (struct packetfilt));
378 			if (error != 0) {
379 				miocnak(wq, mp, 0, error);
380 				return;
381 			}
382 			opfp = (struct packetfilt *)mp->b_cont->b_rptr;
383 			/* this strange comparison keeps gcc from complaining */
384 			if (opfp->Pf_FilterLen - 1 >= ENMAXFILTERS) {
385 				miocnak(wq, mp, 0, EINVAL);
386 				return;
387 			}
388 
389 			pfp->pf.Pf_Priority = opfp->Pf_Priority;
390 			pfp->pf.Pf_FilterLen = (unsigned int)opfp->Pf_FilterLen;
391 
392 			bcopy(opfp->Pf_Filter, pfp->pf.Pf_Filter,
393 			    sizeof (opfp->Pf_Filter));
394 			pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
395 			break;
396 
397 		default:
398 			miocnak(wq, mp, 0, EINVAL);
399 			return;
400 		}
401 
402 		/*
403 		 * Find and record maximum byte offset that the
404 		 * filter users.  We use this when executing the
405 		 * filter to determine how much of the packet
406 		 * body to pull up.  This code depends on the
407 		 * filter encoding.
408 		 */
409 		maxoff = 0;
410 		for (fwp = pfp->pf_Filter; fwp < pfp->pf_FilterEnd; fwp++) {
411 			arg = *fwp & ((1 << ENF_NBPA) - 1);
412 			switch (arg) {
413 			default:
414 				if ((arg -= ENF_PUSHWORD) > maxoff)
415 					maxoff = arg;
416 				break;
417 
418 			case ENF_PUSHLIT:
419 				/* Skip over the literal. */
420 				fwp++;
421 				break;
422 
423 			case ENF_PUSHZERO:
424 			case ENF_PUSHONE:
425 			case ENF_PUSHFFFF:
426 			case ENF_PUSHFF00:
427 			case ENF_PUSH00FF:
428 			case ENF_NOPUSH:
429 				break;
430 			}
431 		}
432 
433 		/*
434 		 * Convert word offset to length in bytes.
435 		 */
436 		pfp->pf_PByteLen = (maxoff + 1) * sizeof (ushort_t);
437 
438 		miocack(wq, mp, 0, 0);
439 		break;
440 
441 	default:
442 		putnext(wq, mp);
443 		break;
444 	}
445 }
446 
447 /* #define	DEBUG	1 */
448 /* #define	INNERDEBUG	1 */
449 
450 #ifdef	INNERDEBUG
451 #define	enprintf(flags)	if (enDebug & (flags)) printf
452 
453 /*
454  * Symbolic definitions for enDebug flag bits
455  *	ENDBG_TRACE should be 1 because it is the most common
456  *	use in the code, and the compiler generates faster code
457  *	for testing the low bit in a word.
458  */
459 
460 #define	ENDBG_TRACE	1	/* trace most operations */
461 #define	ENDBG_DESQ	2	/* trace descriptor queues */
462 #define	ENDBG_INIT	4	/* initialization info */
463 #define	ENDBG_SCAV	8	/* scavenger operation */
464 #define	ENDBG_ABNORM	16	/* abnormal events */
465 
466 int	enDebug = /* ENDBG_ABNORM | ENDBG_INIT | ENDBG_TRACE */ -1;
467 #endif /* INNERDEBUG */
468 
469 /*
470  * Apply the packet filter given by pfp to the packet given by
471  * pp.  Return nonzero iff the filter accepts the packet.
472  *
473  * The packet comes in two pieces, a header and a body, since
474  * that's the most convenient form for our caller.  The header
475  * is in contiguous memory, whereas the body is in a mbuf.
476  * Our caller will have adjusted the mbuf chain so that its first
477  * min(MLEN, length(body)) bytes are guaranteed contiguous.  For
478  * the sake of efficiency (and some laziness) the filter is prepared
479  * to examine only these two contiguous pieces.  Furthermore, it
480  * assumes that the header length is even, so that there's no need
481  * to glue the last byte of header to the first byte of data.
482  */
483 
484 #define	opx(i)	((i) >> ENF_NBPA)
485 
486 static int
487 FilterPacket(struct packdesc *pp, struct epacketfilt *pfp)
488 {
489 	int		maxhdr = pp->pd_hdrlen;
490 	int		maxword = maxhdr + pp->pd_bodylen;
491 	ushort_t	*sp;
492 	ushort_t	*fp;
493 	ushort_t	*fpe;
494 	unsigned	op;
495 	unsigned	arg;
496 	ushort_t	stack[ENMAXFILTERS+1];
497 
498 	fp = &pfp->pf_Filter[0];
499 	fpe = pfp->pf_FilterEnd;
500 
501 #ifdef	INNERDEBUG
502 	enprintf(ENDBG_TRACE)("FilterPacket(%p, %p, %p, %p):\n",
503 		pp, pfp, fp, fpe);
504 #endif
505 
506 	/*
507 	 * Push TRUE on stack to start.  The stack size is chosen such
508 	 * that overflow can't occur -- each operation can push at most
509 	 * one item on the stack, and the stack size equals the maximum
510 	 * program length.
511 	 */
512 	sp = &stack[ENMAXFILTERS];
513 	*sp = 1;
514 
515 	while (fp < fpe) {
516 	op = *fp >> ENF_NBPA;
517 	arg = *fp & ((1 << ENF_NBPA) - 1);
518 	fp++;
519 
520 	switch (arg) {
521 	default:
522 		arg -= ENF_PUSHWORD;
523 		/*
524 		 * Since arg is unsigned,
525 		 * if it were less than ENF_PUSHWORD before,
526 		 * it would now be huge.
527 		 */
528 		if (arg < maxhdr)
529 			*--sp = pp->pd_hdr[arg];
530 		else if (arg < maxword)
531 			*--sp = pp->pd_body[arg - maxhdr];
532 		else {
533 #ifdef	INNERDEBUG
534 			enprintf(ENDBG_TRACE)("=>0(len)\n");
535 #endif
536 			return (0);
537 		}
538 		break;
539 	case ENF_PUSHLIT:
540 		*--sp = *fp++;
541 		break;
542 	case ENF_PUSHZERO:
543 		*--sp = 0;
544 		break;
545 	case ENF_PUSHONE:
546 		*--sp = 1;
547 		break;
548 	case ENF_PUSHFFFF:
549 		*--sp = 0xffff;
550 		break;
551 	case ENF_PUSHFF00:
552 		*--sp = 0xff00;
553 		break;
554 	case ENF_PUSH00FF:
555 		*--sp = 0x00ff;
556 		break;
557 	case ENF_NOPUSH:
558 		break;
559 	}
560 
561 	if (sp < &stack[2]) {	/* check stack overflow: small yellow zone */
562 #ifdef	INNERDEBUG
563 		enprintf(ENDBG_TRACE)("=>0(--sp)\n");
564 #endif
565 		return (0);
566 	}
567 
568 	if (op == ENF_NOP)
569 		continue;
570 
571 	/*
572 	 * all non-NOP operators binary, must have at least two operands
573 	 * on stack to evaluate.
574 	 */
575 	if (sp > &stack[ENMAXFILTERS-2]) {
576 #ifdef	INNERDEBUG
577 		enprintf(ENDBG_TRACE)("=>0(sp++)\n");
578 #endif
579 		return (0);
580 	}
581 
582 	arg = *sp++;
583 	switch (op) {
584 	default:
585 #ifdef	INNERDEBUG
586 		enprintf(ENDBG_TRACE)("=>0(def)\n");
587 #endif
588 		return (0);
589 	case opx(ENF_AND):
590 		*sp &= arg;
591 		break;
592 	case opx(ENF_OR):
593 		*sp |= arg;
594 		break;
595 	case opx(ENF_XOR):
596 		*sp ^= arg;
597 		break;
598 	case opx(ENF_EQ):
599 		*sp = (*sp == arg);
600 		break;
601 	case opx(ENF_NEQ):
602 		*sp = (*sp != arg);
603 		break;
604 	case opx(ENF_LT):
605 		*sp = (*sp < arg);
606 		break;
607 	case opx(ENF_LE):
608 		*sp = (*sp <= arg);
609 		break;
610 	case opx(ENF_GT):
611 		*sp = (*sp > arg);
612 		break;
613 	case opx(ENF_GE):
614 		*sp = (*sp >= arg);
615 		break;
616 
617 	/* short-circuit operators */
618 
619 	case opx(ENF_COR):
620 		if (*sp++ == arg) {
621 #ifdef	INNERDEBUG
622 			enprintf(ENDBG_TRACE)("=>COR %x\n", *sp);
623 #endif
624 			return (1);
625 		}
626 		break;
627 	case opx(ENF_CAND):
628 		if (*sp++ != arg) {
629 #ifdef	INNERDEBUG
630 			enprintf(ENDBG_TRACE)("=>CAND %x\n", *sp);
631 #endif
632 			return (0);
633 		}
634 		break;
635 	case opx(ENF_CNOR):
636 		if (*sp++ == arg) {
637 #ifdef	INNERDEBUG
638 			enprintf(ENDBG_TRACE)("=>COR %x\n", *sp);
639 #endif
640 			return (0);
641 		}
642 		break;
643 	case opx(ENF_CNAND):
644 		if (*sp++ != arg) {
645 #ifdef	INNERDEBUG
646 			enprintf(ENDBG_TRACE)("=>CNAND %x\n", *sp);
647 #endif
648 			return (1);
649 		}
650 		break;
651 	}
652 	}
653 #ifdef	INNERDEBUG
654 	enprintf(ENDBG_TRACE)("=>%x\n", *sp);
655 #endif
656 	return (*sp);
657 }
658