1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * STREAMS Packet Filter Module 31 * 32 * This module applies a filter to messages arriving on its read 33 * queue, passing on messages that the filter accepts adn discarding 34 * the others. It supports ioctls for setting the filter. 35 * 36 * On the write side, the module simply passes everything through 37 * unchanged. 38 * 39 * Based on SunOS 4.x version. This version has minor changes: 40 * - general SVR4 porting stuff 41 * - change name and prefixes from "nit" buffer to streams buffer 42 * - multithreading assumes configured as D_MTQPAIR 43 */ 44 45 #include <sys/types.h> 46 #include <sys/sysmacros.h> 47 #include <sys/errno.h> 48 #include <sys/debug.h> 49 #include <sys/time.h> 50 #include <sys/stropts.h> 51 #include <sys/stream.h> 52 #include <sys/conf.h> 53 #include <sys/ddi.h> 54 #include <sys/sunddi.h> 55 #include <sys/kmem.h> 56 #include <sys/strsun.h> 57 #include <sys/pfmod.h> 58 #include <sys/modctl.h> 59 60 /* 61 * Expanded version of the Packetfilt structure that includes 62 * some additional fields that aid filter execution efficiency. 63 */ 64 struct epacketfilt { 65 struct Pf_ext_packetfilt pf; 66 #define pf_Priority pf.Pf_Priority 67 #define pf_FilterLen pf.Pf_FilterLen 68 #define pf_Filter pf.Pf_Filter 69 /* pointer to word immediately past end of filter */ 70 ushort_t *pf_FilterEnd; 71 /* length in bytes of packet prefix the filter examines */ 72 ushort_t pf_PByteLen; 73 }; 74 75 /* 76 * (Internal) packet descriptor for FilterPacket 77 */ 78 struct packdesc { 79 ushort_t *pd_hdr; /* header starting address */ 80 uint_t pd_hdrlen; /* header length in shorts */ 81 ushort_t *pd_body; /* body starting address */ 82 uint_t pd_bodylen; /* body length in shorts */ 83 }; 84 85 86 /* 87 * Function prototypes. 88 */ 89 static int pfopen(queue_t *, dev_t *, int, int, cred_t *); 90 static int pfclose(queue_t *); 91 static void pfioctl(queue_t *wq, mblk_t *mp); 92 static int FilterPacket(struct packdesc *, struct epacketfilt *); 93 /* 94 * To save instructions, since STREAMS ignores the return value 95 * from these functions, they are defined as void here. Kind of icky, but... 96 */ 97 static void pfwput(queue_t *, mblk_t *); 98 static void pfrput(queue_t *, mblk_t *); 99 100 static struct module_info pf_minfo = { 101 22, /* mi_idnum */ 102 "pfmod", /* mi_idname */ 103 0, /* mi_minpsz */ 104 INFPSZ, /* mi_maxpsz */ 105 0, /* mi_hiwat */ 106 0 /* mi_lowat */ 107 }; 108 109 static struct qinit pf_rinit = { 110 (int (*)())pfrput, /* qi_putp */ 111 NULL, 112 pfopen, /* qi_qopen */ 113 pfclose, /* qi_qclose */ 114 NULL, /* qi_qadmin */ 115 &pf_minfo, /* qi_minfo */ 116 NULL /* qi_mstat */ 117 }; 118 119 static struct qinit pf_winit = { 120 (int (*)())pfwput, /* qi_putp */ 121 NULL, /* qi_srvp */ 122 NULL, /* qi_qopen */ 123 NULL, /* qi_qclose */ 124 NULL, /* qi_qadmin */ 125 &pf_minfo, /* qi_minfo */ 126 NULL /* qi_mstat */ 127 }; 128 129 static struct streamtab pf_info = { 130 &pf_rinit, /* st_rdinit */ 131 &pf_winit, /* st_wrinit */ 132 NULL, /* st_muxrinit */ 133 NULL /* st_muxwinit */ 134 }; 135 136 static struct fmodsw fsw = { 137 "pfmod", 138 &pf_info, 139 D_MTQPAIR | D_MP 140 }; 141 142 static struct modlstrmod modlstrmod = { 143 &mod_strmodops, "streams packet filter module", &fsw 144 }; 145 146 static struct modlinkage modlinkage = { 147 MODREV_1, &modlstrmod, NULL 148 }; 149 150 int 151 _init(void) 152 { 153 return (mod_install(&modlinkage)); 154 } 155 156 int 157 _fini(void) 158 { 159 return (mod_remove(&modlinkage)); 160 } 161 162 int 163 _info(struct modinfo *modinfop) 164 { 165 return (mod_info(&modlinkage, modinfop)); 166 } 167 168 /*ARGSUSED*/ 169 static int 170 pfopen(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp) 171 { 172 struct epacketfilt *pfp; 173 174 ASSERT(rq); 175 176 if (sflag != MODOPEN) 177 return (EINVAL); 178 179 if (rq->q_ptr) 180 return (0); 181 182 /* 183 * Allocate and initialize per-Stream structure. 184 */ 185 pfp = kmem_alloc(sizeof (struct epacketfilt), KM_SLEEP); 186 rq->q_ptr = WR(rq)->q_ptr = (char *)pfp; 187 188 qprocson(rq); 189 190 return (0); 191 } 192 193 static int 194 pfclose(queue_t *rq) 195 { 196 struct epacketfilt *pfp = (struct epacketfilt *)rq->q_ptr; 197 198 ASSERT(pfp); 199 200 qprocsoff(rq); 201 202 kmem_free(pfp, sizeof (struct epacketfilt)); 203 rq->q_ptr = WR(rq)->q_ptr = NULL; 204 205 return (0); 206 } 207 208 /* 209 * Write-side put procedure. Its main task is to detect ioctls. 210 * Other message types are passed on through. 211 */ 212 static void 213 pfwput(queue_t *wq, mblk_t *mp) 214 { 215 switch (mp->b_datap->db_type) { 216 case M_IOCTL: 217 pfioctl(wq, mp); 218 break; 219 220 default: 221 putnext(wq, mp); 222 break; 223 } 224 } 225 226 /* 227 * Read-side put procedure. It's responsible for applying the 228 * packet filter and passing upstream message on or discarding it 229 * depending upon the results. 230 * 231 * Upstream messages can start with zero or more M_PROTO mblks 232 * which are skipped over before executing the packet filter 233 * on any remaining M_DATA mblks. 234 */ 235 static void 236 pfrput(queue_t *rq, mblk_t *mp) 237 { 238 struct epacketfilt *pfp = (struct epacketfilt *)rq->q_ptr; 239 mblk_t *mbp, *mpp; 240 struct packdesc pd; 241 int need; 242 243 ASSERT(pfp); 244 245 switch (DB_TYPE(mp)) { 246 case M_PROTO: 247 case M_DATA: 248 /* 249 * Skip over protocol information and find the start 250 * of the message body, saving the overall message 251 * start in mpp. 252 */ 253 for (mpp = mp; mp && (DB_TYPE(mp) == M_PROTO); mp = mp->b_cont) 254 ; 255 256 /* 257 * Null body (exclusive of M_PROTO blocks) ==> accept. 258 * Note that a null body is not the same as an empty body. 259 */ 260 if (mp == NULL) { 261 putnext(rq, mpp); 262 break; 263 } 264 265 /* 266 * Pull the packet up to the length required by 267 * the filter. Note that doing so destroys sharing 268 * relationships, which is unfortunate, since the 269 * results of pulling up here are likely to be useful 270 * for shared messages applied to a filter on a sibling 271 * stream. 272 * 273 * Most packet sources will provide the packet in two 274 * logical pieces: an initial header in a single mblk, 275 * and a body in a sequence of mblks hooked to the 276 * header. We're prepared to deal with variant forms, 277 * but in any case, the pullup applies only to the body 278 * part. 279 */ 280 mbp = mp->b_cont; 281 need = pfp->pf_PByteLen; 282 if (mbp && (MBLKL(mbp) < need)) { 283 int len = msgdsize(mbp); 284 285 /* XXX discard silently on pullupmsg failure */ 286 if (pullupmsg(mbp, MIN(need, len)) == 0) { 287 freemsg(mpp); 288 break; 289 } 290 } 291 292 /* 293 * Misalignment (not on short boundary) ==> reject. 294 */ 295 if (((uintptr_t)mp->b_rptr & (sizeof (ushort_t) - 1)) || 296 (mbp != NULL && 297 ((uintptr_t)mbp->b_rptr & (sizeof (ushort_t) - 1)))) { 298 freemsg(mpp); 299 break; 300 } 301 302 /* 303 * These assignments are distasteful, but necessary, 304 * since the packet filter wants to work in terms of 305 * shorts. Odd bytes at the end of header or data can't 306 * participate in the filtering operation. 307 */ 308 pd.pd_hdr = (ushort_t *)mp->b_rptr; 309 pd.pd_hdrlen = (mp->b_wptr - mp->b_rptr) / sizeof (ushort_t); 310 if (mbp) { 311 pd.pd_body = (ushort_t *)mbp->b_rptr; 312 pd.pd_bodylen = (mbp->b_wptr - mbp->b_rptr) / 313 sizeof (ushort_t); 314 } else { 315 pd.pd_body = NULL; 316 pd.pd_bodylen = 0; 317 } 318 319 /* 320 * Apply the filter. 321 */ 322 if (FilterPacket(&pd, pfp)) 323 putnext(rq, mpp); 324 else 325 freemsg(mpp); 326 327 break; 328 329 default: 330 putnext(rq, mp); 331 break; 332 } 333 334 } 335 336 /* 337 * Handle write-side M_IOCTL messages. 338 */ 339 static void 340 pfioctl(queue_t *wq, mblk_t *mp) 341 { 342 struct epacketfilt *pfp = (struct epacketfilt *)wq->q_ptr; 343 struct Pf_ext_packetfilt *upfp; 344 struct packetfilt *opfp; 345 ushort_t *fwp; 346 int maxoff, arg; 347 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 348 int error; 349 350 switch (iocp->ioc_cmd) { 351 case PFIOCSETF: 352 /* 353 * Verify argument length. Since the size of packet filter 354 * got increased (ENMAXFILTERS was bumped up to 2047), to 355 * maintain backwards binary compatibility, we need to 356 * check for both possible sizes. 357 */ 358 switch (iocp->ioc_count) { 359 case sizeof (struct Pf_ext_packetfilt): 360 error = miocpullup(mp, 361 sizeof (struct Pf_ext_packetfilt)); 362 if (error != 0) { 363 miocnak(wq, mp, 0, error); 364 return; 365 } 366 upfp = (struct Pf_ext_packetfilt *)mp->b_cont->b_rptr; 367 if (upfp->Pf_FilterLen > PF_MAXFILTERS) { 368 miocnak(wq, mp, 0, EINVAL); 369 return; 370 } 371 372 bcopy(upfp, pfp, sizeof (struct Pf_ext_packetfilt)); 373 pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen]; 374 break; 375 376 case sizeof (struct packetfilt): 377 error = miocpullup(mp, sizeof (struct packetfilt)); 378 if (error != 0) { 379 miocnak(wq, mp, 0, error); 380 return; 381 } 382 opfp = (struct packetfilt *)mp->b_cont->b_rptr; 383 /* this strange comparison keeps gcc from complaining */ 384 if (opfp->Pf_FilterLen - 1 >= ENMAXFILTERS) { 385 miocnak(wq, mp, 0, EINVAL); 386 return; 387 } 388 389 pfp->pf.Pf_Priority = opfp->Pf_Priority; 390 pfp->pf.Pf_FilterLen = (unsigned int)opfp->Pf_FilterLen; 391 392 bcopy(opfp->Pf_Filter, pfp->pf.Pf_Filter, 393 sizeof (opfp->Pf_Filter)); 394 pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen]; 395 break; 396 397 default: 398 miocnak(wq, mp, 0, EINVAL); 399 return; 400 } 401 402 /* 403 * Find and record maximum byte offset that the 404 * filter users. We use this when executing the 405 * filter to determine how much of the packet 406 * body to pull up. This code depends on the 407 * filter encoding. 408 */ 409 maxoff = 0; 410 for (fwp = pfp->pf_Filter; fwp < pfp->pf_FilterEnd; fwp++) { 411 arg = *fwp & ((1 << ENF_NBPA) - 1); 412 switch (arg) { 413 default: 414 if ((arg -= ENF_PUSHWORD) > maxoff) 415 maxoff = arg; 416 break; 417 418 case ENF_PUSHLIT: 419 /* Skip over the literal. */ 420 fwp++; 421 break; 422 423 case ENF_PUSHZERO: 424 case ENF_PUSHONE: 425 case ENF_PUSHFFFF: 426 case ENF_PUSHFF00: 427 case ENF_PUSH00FF: 428 case ENF_NOPUSH: 429 break; 430 } 431 } 432 433 /* 434 * Convert word offset to length in bytes. 435 */ 436 pfp->pf_PByteLen = (maxoff + 1) * sizeof (ushort_t); 437 438 miocack(wq, mp, 0, 0); 439 break; 440 441 default: 442 putnext(wq, mp); 443 break; 444 } 445 } 446 447 /* #define DEBUG 1 */ 448 /* #define INNERDEBUG 1 */ 449 450 #ifdef INNERDEBUG 451 #define enprintf(flags) if (enDebug & (flags)) printf 452 453 /* 454 * Symbolic definitions for enDebug flag bits 455 * ENDBG_TRACE should be 1 because it is the most common 456 * use in the code, and the compiler generates faster code 457 * for testing the low bit in a word. 458 */ 459 460 #define ENDBG_TRACE 1 /* trace most operations */ 461 #define ENDBG_DESQ 2 /* trace descriptor queues */ 462 #define ENDBG_INIT 4 /* initialization info */ 463 #define ENDBG_SCAV 8 /* scavenger operation */ 464 #define ENDBG_ABNORM 16 /* abnormal events */ 465 466 int enDebug = /* ENDBG_ABNORM | ENDBG_INIT | ENDBG_TRACE */ -1; 467 #endif /* INNERDEBUG */ 468 469 /* 470 * Apply the packet filter given by pfp to the packet given by 471 * pp. Return nonzero iff the filter accepts the packet. 472 * 473 * The packet comes in two pieces, a header and a body, since 474 * that's the most convenient form for our caller. The header 475 * is in contiguous memory, whereas the body is in a mbuf. 476 * Our caller will have adjusted the mbuf chain so that its first 477 * min(MLEN, length(body)) bytes are guaranteed contiguous. For 478 * the sake of efficiency (and some laziness) the filter is prepared 479 * to examine only these two contiguous pieces. Furthermore, it 480 * assumes that the header length is even, so that there's no need 481 * to glue the last byte of header to the first byte of data. 482 */ 483 484 #define opx(i) ((i) >> ENF_NBPA) 485 486 static int 487 FilterPacket(struct packdesc *pp, struct epacketfilt *pfp) 488 { 489 int maxhdr = pp->pd_hdrlen; 490 int maxword = maxhdr + pp->pd_bodylen; 491 ushort_t *sp; 492 ushort_t *fp; 493 ushort_t *fpe; 494 unsigned op; 495 unsigned arg; 496 ushort_t stack[ENMAXFILTERS+1]; 497 498 fp = &pfp->pf_Filter[0]; 499 fpe = pfp->pf_FilterEnd; 500 501 #ifdef INNERDEBUG 502 enprintf(ENDBG_TRACE)("FilterPacket(%p, %p, %p, %p):\n", 503 pp, pfp, fp, fpe); 504 #endif 505 506 /* 507 * Push TRUE on stack to start. The stack size is chosen such 508 * that overflow can't occur -- each operation can push at most 509 * one item on the stack, and the stack size equals the maximum 510 * program length. 511 */ 512 sp = &stack[ENMAXFILTERS]; 513 *sp = 1; 514 515 while (fp < fpe) { 516 op = *fp >> ENF_NBPA; 517 arg = *fp & ((1 << ENF_NBPA) - 1); 518 fp++; 519 520 switch (arg) { 521 default: 522 arg -= ENF_PUSHWORD; 523 /* 524 * Since arg is unsigned, 525 * if it were less than ENF_PUSHWORD before, 526 * it would now be huge. 527 */ 528 if (arg < maxhdr) 529 *--sp = pp->pd_hdr[arg]; 530 else if (arg < maxword) 531 *--sp = pp->pd_body[arg - maxhdr]; 532 else { 533 #ifdef INNERDEBUG 534 enprintf(ENDBG_TRACE)("=>0(len)\n"); 535 #endif 536 return (0); 537 } 538 break; 539 case ENF_PUSHLIT: 540 *--sp = *fp++; 541 break; 542 case ENF_PUSHZERO: 543 *--sp = 0; 544 break; 545 case ENF_PUSHONE: 546 *--sp = 1; 547 break; 548 case ENF_PUSHFFFF: 549 *--sp = 0xffff; 550 break; 551 case ENF_PUSHFF00: 552 *--sp = 0xff00; 553 break; 554 case ENF_PUSH00FF: 555 *--sp = 0x00ff; 556 break; 557 case ENF_NOPUSH: 558 break; 559 } 560 561 if (sp < &stack[2]) { /* check stack overflow: small yellow zone */ 562 #ifdef INNERDEBUG 563 enprintf(ENDBG_TRACE)("=>0(--sp)\n"); 564 #endif 565 return (0); 566 } 567 568 if (op == ENF_NOP) 569 continue; 570 571 /* 572 * all non-NOP operators binary, must have at least two operands 573 * on stack to evaluate. 574 */ 575 if (sp > &stack[ENMAXFILTERS-2]) { 576 #ifdef INNERDEBUG 577 enprintf(ENDBG_TRACE)("=>0(sp++)\n"); 578 #endif 579 return (0); 580 } 581 582 arg = *sp++; 583 switch (op) { 584 default: 585 #ifdef INNERDEBUG 586 enprintf(ENDBG_TRACE)("=>0(def)\n"); 587 #endif 588 return (0); 589 case opx(ENF_AND): 590 *sp &= arg; 591 break; 592 case opx(ENF_OR): 593 *sp |= arg; 594 break; 595 case opx(ENF_XOR): 596 *sp ^= arg; 597 break; 598 case opx(ENF_EQ): 599 *sp = (*sp == arg); 600 break; 601 case opx(ENF_NEQ): 602 *sp = (*sp != arg); 603 break; 604 case opx(ENF_LT): 605 *sp = (*sp < arg); 606 break; 607 case opx(ENF_LE): 608 *sp = (*sp <= arg); 609 break; 610 case opx(ENF_GT): 611 *sp = (*sp > arg); 612 break; 613 case opx(ENF_GE): 614 *sp = (*sp >= arg); 615 break; 616 617 /* short-circuit operators */ 618 619 case opx(ENF_COR): 620 if (*sp++ == arg) { 621 #ifdef INNERDEBUG 622 enprintf(ENDBG_TRACE)("=>COR %x\n", *sp); 623 #endif 624 return (1); 625 } 626 break; 627 case opx(ENF_CAND): 628 if (*sp++ != arg) { 629 #ifdef INNERDEBUG 630 enprintf(ENDBG_TRACE)("=>CAND %x\n", *sp); 631 #endif 632 return (0); 633 } 634 break; 635 case opx(ENF_CNOR): 636 if (*sp++ == arg) { 637 #ifdef INNERDEBUG 638 enprintf(ENDBG_TRACE)("=>COR %x\n", *sp); 639 #endif 640 return (0); 641 } 642 break; 643 case opx(ENF_CNAND): 644 if (*sp++ != arg) { 645 #ifdef INNERDEBUG 646 enprintf(ENDBG_TRACE)("=>CNAND %x\n", *sp); 647 #endif 648 return (1); 649 } 650 break; 651 } 652 } 653 #ifdef INNERDEBUG 654 enprintf(ENDBG_TRACE)("=>%x\n", *sp); 655 #endif 656 return (*sp); 657 } 658