1 /* 2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that: (1) source code distributions 7 * retain the above copyright notice and this paragraph in its entirety, (2) 8 * distributions including binary code include the above copyright notice and 9 * this paragraph in its entirety in the documentation or other materials 10 * provided with the distribution, and (3) all advertising materials mentioning 11 * features or use of this software display the following acknowledgement: 12 * ``This product includes software developed by the University of California, 13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of 14 * the University nor the names of its contributors may be used to endorse 15 * or promote products derived from this software without specific prior 16 * written permission. 17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 20 * 21 * Optimization module for tcpdump intermediate representation. 22 */ 23 #ifndef lint 24 static const char rcsid[] _U_ = 25 "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.90.2.1 2008/01/02 04:22:16 guy Exp $ (LBL)"; 26 #endif 27 28 #ifdef HAVE_CONFIG_H 29 #include "config.h" 30 #endif 31 32 #include <stdio.h> 33 #include <stdlib.h> 34 #include <memory.h> 35 #include <string.h> 36 37 #include <errno.h> 38 39 #include "pcap-int.h" 40 41 #include "gencode.h" 42 43 #ifdef HAVE_OS_PROTO_H 44 #include "os-proto.h" 45 #endif 46 47 #ifdef BDEBUG 48 extern int dflag; 49 #endif 50 51 #if defined(MSDOS) && !defined(__DJGPP__) 52 extern int _w32_ffs (int mask); 53 #define ffs _w32_ffs 54 #endif 55 56 #if defined(WIN32) && defined (_MSC_VER) 57 int ffs(int mask); 58 #endif 59 60 /* 61 * Represents a deleted instruction. 62 */ 63 #define NOP -1 64 65 /* 66 * Register numbers for use-def values. 67 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory 68 * location. A_ATOM is the accumulator and X_ATOM is the index 69 * register. 70 */ 71 #define A_ATOM BPF_MEMWORDS 72 #define X_ATOM (BPF_MEMWORDS+1) 73 74 /* 75 * This define is used to represent *both* the accumulator and 76 * x register in use-def computations. 77 * Currently, the use-def code assumes only one definition per instruction. 78 */ 79 #define AX_ATOM N_ATOMS 80 81 /* 82 * A flag to indicate that further optimization is needed. 83 * Iterative passes are continued until a given pass yields no 84 * branch movement. 85 */ 86 static int done; 87 88 /* 89 * A block is marked if only if its mark equals the current mark. 90 * Rather than traverse the code array, marking each item, 'cur_mark' is 91 * incremented. This automatically makes each element unmarked. 92 */ 93 static int cur_mark; 94 #define isMarked(p) ((p)->mark == cur_mark) 95 #define unMarkAll() cur_mark += 1 96 #define Mark(p) ((p)->mark = cur_mark) 97 98 static void opt_init(struct block *); 99 static void opt_cleanup(void); 100 101 static void make_marks(struct block *); 102 static void mark_code(struct block *); 103 104 static void intern_blocks(struct block *); 105 106 static int eq_slist(struct slist *, struct slist *); 107 108 static void find_levels_r(struct block *); 109 110 static void find_levels(struct block *); 111 static void find_dom(struct block *); 112 static void propedom(struct edge *); 113 static void find_edom(struct block *); 114 static void find_closure(struct block *); 115 static int atomuse(struct stmt *); 116 static int atomdef(struct stmt *); 117 static void compute_local_ud(struct block *); 118 static void find_ud(struct block *); 119 static void init_val(void); 120 static int F(int, int, int); 121 static inline void vstore(struct stmt *, int *, int, int); 122 static void opt_blk(struct block *, int); 123 static int use_conflict(struct block *, struct block *); 124 static void opt_j(struct edge *); 125 static void or_pullup(struct block *); 126 static void and_pullup(struct block *); 127 static void opt_blks(struct block *, int); 128 static inline void link_inedge(struct edge *, struct block *); 129 static void find_inedges(struct block *); 130 static void opt_root(struct block **); 131 static void opt_loop(struct block *, int); 132 static void fold_op(struct stmt *, int, int); 133 static inline struct slist *this_op(struct slist *); 134 static void opt_not(struct block *); 135 static void opt_peep(struct block *); 136 static void opt_stmt(struct stmt *, int[], int); 137 static void deadstmt(struct stmt *, struct stmt *[]); 138 static void opt_deadstores(struct block *); 139 static struct block *fold_edge(struct block *, struct edge *); 140 static inline int eq_blk(struct block *, struct block *); 141 static int slength(struct slist *); 142 static int count_blocks(struct block *); 143 static void number_blks_r(struct block *); 144 static int count_stmts(struct block *); 145 static int convert_code_r(struct block *); 146 #ifdef BDEBUG 147 static void opt_dump(struct block *); 148 #endif 149 150 static int n_blocks; 151 struct block **blocks; 152 static int n_edges; 153 struct edge **edges; 154 155 /* 156 * A bit vector set representation of the dominators. 157 * We round up the set size to the next power of two. 158 */ 159 static int nodewords; 160 static int edgewords; 161 struct block **levels; 162 bpf_u_int32 *space; 163 #define BITS_PER_WORD (8*sizeof(bpf_u_int32)) 164 /* 165 * True if a is in uset {p} 166 */ 167 #define SET_MEMBER(p, a) \ 168 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD))) 169 170 /* 171 * Add 'a' to uset p. 172 */ 173 #define SET_INSERT(p, a) \ 174 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD)) 175 176 /* 177 * Delete 'a' from uset p. 178 */ 179 #define SET_DELETE(p, a) \ 180 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD)) 181 182 /* 183 * a := a intersect b 184 */ 185 #define SET_INTERSECT(a, b, n)\ 186 {\ 187 register bpf_u_int32 *_x = a, *_y = b;\ 188 register int _n = n;\ 189 while (--_n >= 0) *_x++ &= *_y++;\ 190 } 191 192 /* 193 * a := a - b 194 */ 195 #define SET_SUBTRACT(a, b, n)\ 196 {\ 197 register bpf_u_int32 *_x = a, *_y = b;\ 198 register int _n = n;\ 199 while (--_n >= 0) *_x++ &=~ *_y++;\ 200 } 201 202 /* 203 * a := a union b 204 */ 205 #define SET_UNION(a, b, n)\ 206 {\ 207 register bpf_u_int32 *_x = a, *_y = b;\ 208 register int _n = n;\ 209 while (--_n >= 0) *_x++ |= *_y++;\ 210 } 211 212 static uset all_dom_sets; 213 static uset all_closure_sets; 214 static uset all_edge_sets; 215 216 #ifndef MAX 217 #define MAX(a,b) ((a)>(b)?(a):(b)) 218 #endif 219 220 static void 221 find_levels_r(b) 222 struct block *b; 223 { 224 int level; 225 226 if (isMarked(b)) 227 return; 228 229 Mark(b); 230 b->link = 0; 231 232 if (JT(b)) { 233 find_levels_r(JT(b)); 234 find_levels_r(JF(b)); 235 level = MAX(JT(b)->level, JF(b)->level) + 1; 236 } else 237 level = 0; 238 b->level = level; 239 b->link = levels[level]; 240 levels[level] = b; 241 } 242 243 /* 244 * Level graph. The levels go from 0 at the leaves to 245 * N_LEVELS at the root. The levels[] array points to the 246 * first node of the level list, whose elements are linked 247 * with the 'link' field of the struct block. 248 */ 249 static void 250 find_levels(root) 251 struct block *root; 252 { 253 memset((char *)levels, 0, n_blocks * sizeof(*levels)); 254 unMarkAll(); 255 find_levels_r(root); 256 } 257 258 /* 259 * Find dominator relationships. 260 * Assumes graph has been leveled. 261 */ 262 static void 263 find_dom(root) 264 struct block *root; 265 { 266 int i; 267 struct block *b; 268 bpf_u_int32 *x; 269 270 /* 271 * Initialize sets to contain all nodes. 272 */ 273 x = all_dom_sets; 274 i = n_blocks * nodewords; 275 while (--i >= 0) 276 *x++ = ~0; 277 /* Root starts off empty. */ 278 for (i = nodewords; --i >= 0;) 279 root->dom[i] = 0; 280 281 /* root->level is the highest level no found. */ 282 for (i = root->level; i >= 0; --i) { 283 for (b = levels[i]; b; b = b->link) { 284 SET_INSERT(b->dom, b->id); 285 if (JT(b) == 0) 286 continue; 287 SET_INTERSECT(JT(b)->dom, b->dom, nodewords); 288 SET_INTERSECT(JF(b)->dom, b->dom, nodewords); 289 } 290 } 291 } 292 293 static void 294 propedom(ep) 295 struct edge *ep; 296 { 297 SET_INSERT(ep->edom, ep->id); 298 if (ep->succ) { 299 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords); 300 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords); 301 } 302 } 303 304 /* 305 * Compute edge dominators. 306 * Assumes graph has been leveled and predecessors established. 307 */ 308 static void 309 find_edom(root) 310 struct block *root; 311 { 312 int i; 313 uset x; 314 struct block *b; 315 316 x = all_edge_sets; 317 for (i = n_edges * edgewords; --i >= 0; ) 318 x[i] = ~0; 319 320 /* root->level is the highest level no found. */ 321 memset(root->et.edom, 0, edgewords * sizeof(*(uset)0)); 322 memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0)); 323 for (i = root->level; i >= 0; --i) { 324 for (b = levels[i]; b != 0; b = b->link) { 325 propedom(&b->et); 326 propedom(&b->ef); 327 } 328 } 329 } 330 331 /* 332 * Find the backwards transitive closure of the flow graph. These sets 333 * are backwards in the sense that we find the set of nodes that reach 334 * a given node, not the set of nodes that can be reached by a node. 335 * 336 * Assumes graph has been leveled. 337 */ 338 static void 339 find_closure(root) 340 struct block *root; 341 { 342 int i; 343 struct block *b; 344 345 /* 346 * Initialize sets to contain no nodes. 347 */ 348 memset((char *)all_closure_sets, 0, 349 n_blocks * nodewords * sizeof(*all_closure_sets)); 350 351 /* root->level is the highest level no found. */ 352 for (i = root->level; i >= 0; --i) { 353 for (b = levels[i]; b; b = b->link) { 354 SET_INSERT(b->closure, b->id); 355 if (JT(b) == 0) 356 continue; 357 SET_UNION(JT(b)->closure, b->closure, nodewords); 358 SET_UNION(JF(b)->closure, b->closure, nodewords); 359 } 360 } 361 } 362 363 /* 364 * Return the register number that is used by s. If A and X are both 365 * used, return AX_ATOM. If no register is used, return -1. 366 * 367 * The implementation should probably change to an array access. 368 */ 369 static int 370 atomuse(s) 371 struct stmt *s; 372 { 373 register int c = s->code; 374 375 if (c == NOP) 376 return -1; 377 378 switch (BPF_CLASS(c)) { 379 380 case BPF_RET: 381 return (BPF_RVAL(c) == BPF_A) ? A_ATOM : 382 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1; 383 384 case BPF_LD: 385 case BPF_LDX: 386 return (BPF_MODE(c) == BPF_IND) ? X_ATOM : 387 (BPF_MODE(c) == BPF_MEM) ? s->k : -1; 388 389 case BPF_ST: 390 return A_ATOM; 391 392 case BPF_STX: 393 return X_ATOM; 394 395 case BPF_JMP: 396 case BPF_ALU: 397 if (BPF_SRC(c) == BPF_X) 398 return AX_ATOM; 399 return A_ATOM; 400 401 case BPF_MISC: 402 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM; 403 } 404 abort(); 405 /* NOTREACHED */ 406 } 407 408 /* 409 * Return the register number that is defined by 's'. We assume that 410 * a single stmt cannot define more than one register. If no register 411 * is defined, return -1. 412 * 413 * The implementation should probably change to an array access. 414 */ 415 static int 416 atomdef(s) 417 struct stmt *s; 418 { 419 if (s->code == NOP) 420 return -1; 421 422 switch (BPF_CLASS(s->code)) { 423 424 case BPF_LD: 425 case BPF_ALU: 426 return A_ATOM; 427 428 case BPF_LDX: 429 return X_ATOM; 430 431 case BPF_ST: 432 case BPF_STX: 433 return s->k; 434 435 case BPF_MISC: 436 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM; 437 } 438 return -1; 439 } 440 441 /* 442 * Compute the sets of registers used, defined, and killed by 'b'. 443 * 444 * "Used" means that a statement in 'b' uses the register before any 445 * statement in 'b' defines it, i.e. it uses the value left in 446 * that register by a predecessor block of this block. 447 * "Defined" means that a statement in 'b' defines it. 448 * "Killed" means that a statement in 'b' defines it before any 449 * statement in 'b' uses it, i.e. it kills the value left in that 450 * register by a predecessor block of this block. 451 */ 452 static void 453 compute_local_ud(b) 454 struct block *b; 455 { 456 struct slist *s; 457 atomset def = 0, use = 0, kill = 0; 458 int atom; 459 460 for (s = b->stmts; s; s = s->next) { 461 if (s->s.code == NOP) 462 continue; 463 atom = atomuse(&s->s); 464 if (atom >= 0) { 465 if (atom == AX_ATOM) { 466 if (!ATOMELEM(def, X_ATOM)) 467 use |= ATOMMASK(X_ATOM); 468 if (!ATOMELEM(def, A_ATOM)) 469 use |= ATOMMASK(A_ATOM); 470 } 471 else if (atom < N_ATOMS) { 472 if (!ATOMELEM(def, atom)) 473 use |= ATOMMASK(atom); 474 } 475 else 476 abort(); 477 } 478 atom = atomdef(&s->s); 479 if (atom >= 0) { 480 if (!ATOMELEM(use, atom)) 481 kill |= ATOMMASK(atom); 482 def |= ATOMMASK(atom); 483 } 484 } 485 if (BPF_CLASS(b->s.code) == BPF_JMP) { 486 /* 487 * XXX - what about RET? 488 */ 489 atom = atomuse(&b->s); 490 if (atom >= 0) { 491 if (atom == AX_ATOM) { 492 if (!ATOMELEM(def, X_ATOM)) 493 use |= ATOMMASK(X_ATOM); 494 if (!ATOMELEM(def, A_ATOM)) 495 use |= ATOMMASK(A_ATOM); 496 } 497 else if (atom < N_ATOMS) { 498 if (!ATOMELEM(def, atom)) 499 use |= ATOMMASK(atom); 500 } 501 else 502 abort(); 503 } 504 } 505 506 b->def = def; 507 b->kill = kill; 508 b->in_use = use; 509 } 510 511 /* 512 * Assume graph is already leveled. 513 */ 514 static void 515 find_ud(root) 516 struct block *root; 517 { 518 int i, maxlevel; 519 struct block *p; 520 521 /* 522 * root->level is the highest level no found; 523 * count down from there. 524 */ 525 maxlevel = root->level; 526 for (i = maxlevel; i >= 0; --i) 527 for (p = levels[i]; p; p = p->link) { 528 compute_local_ud(p); 529 p->out_use = 0; 530 } 531 532 for (i = 1; i <= maxlevel; ++i) { 533 for (p = levels[i]; p; p = p->link) { 534 p->out_use |= JT(p)->in_use | JF(p)->in_use; 535 p->in_use |= p->out_use &~ p->kill; 536 } 537 } 538 } 539 540 /* 541 * These data structures are used in a Cocke and Shwarz style 542 * value numbering scheme. Since the flowgraph is acyclic, 543 * exit values can be propagated from a node's predecessors 544 * provided it is uniquely defined. 545 */ 546 struct valnode { 547 int code; 548 int v0, v1; 549 int val; 550 struct valnode *next; 551 }; 552 553 #define MODULUS 213 554 static struct valnode *hashtbl[MODULUS]; 555 static int curval; 556 static int maxval; 557 558 /* Integer constants mapped with the load immediate opcode. */ 559 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L) 560 561 struct vmapinfo { 562 int is_const; 563 bpf_int32 const_val; 564 }; 565 566 struct vmapinfo *vmap; 567 struct valnode *vnode_base; 568 struct valnode *next_vnode; 569 570 static void 571 init_val() 572 { 573 curval = 0; 574 next_vnode = vnode_base; 575 memset((char *)vmap, 0, maxval * sizeof(*vmap)); 576 memset((char *)hashtbl, 0, sizeof hashtbl); 577 } 578 579 /* Because we really don't have an IR, this stuff is a little messy. */ 580 static int 581 F(code, v0, v1) 582 int code; 583 int v0, v1; 584 { 585 u_int hash; 586 int val; 587 struct valnode *p; 588 589 hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8); 590 hash %= MODULUS; 591 592 for (p = hashtbl[hash]; p; p = p->next) 593 if (p->code == code && p->v0 == v0 && p->v1 == v1) 594 return p->val; 595 596 val = ++curval; 597 if (BPF_MODE(code) == BPF_IMM && 598 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) { 599 vmap[val].const_val = v0; 600 vmap[val].is_const = 1; 601 } 602 p = next_vnode++; 603 p->val = val; 604 p->code = code; 605 p->v0 = v0; 606 p->v1 = v1; 607 p->next = hashtbl[hash]; 608 hashtbl[hash] = p; 609 610 return val; 611 } 612 613 static inline void 614 vstore(s, valp, newval, alter) 615 struct stmt *s; 616 int *valp; 617 int newval; 618 int alter; 619 { 620 if (alter && *valp == newval) 621 s->code = NOP; 622 else 623 *valp = newval; 624 } 625 626 static void 627 fold_op(s, v0, v1) 628 struct stmt *s; 629 int v0, v1; 630 { 631 bpf_u_int32 a, b; 632 633 a = vmap[v0].const_val; 634 b = vmap[v1].const_val; 635 636 switch (BPF_OP(s->code)) { 637 case BPF_ADD: 638 a += b; 639 break; 640 641 case BPF_SUB: 642 a -= b; 643 break; 644 645 case BPF_MUL: 646 a *= b; 647 break; 648 649 case BPF_DIV: 650 if (b == 0) 651 bpf_error("division by zero"); 652 a /= b; 653 break; 654 655 case BPF_AND: 656 a &= b; 657 break; 658 659 case BPF_OR: 660 a |= b; 661 break; 662 663 case BPF_LSH: 664 a <<= b; 665 break; 666 667 case BPF_RSH: 668 a >>= b; 669 break; 670 671 case BPF_NEG: 672 a = -a; 673 break; 674 675 default: 676 abort(); 677 } 678 s->k = a; 679 s->code = BPF_LD|BPF_IMM; 680 done = 0; 681 } 682 683 static inline struct slist * 684 this_op(s) 685 struct slist *s; 686 { 687 while (s != 0 && s->s.code == NOP) 688 s = s->next; 689 return s; 690 } 691 692 static void 693 opt_not(b) 694 struct block *b; 695 { 696 struct block *tmp = JT(b); 697 698 JT(b) = JF(b); 699 JF(b) = tmp; 700 } 701 702 static void 703 opt_peep(b) 704 struct block *b; 705 { 706 struct slist *s; 707 struct slist *next, *last; 708 int val; 709 710 s = b->stmts; 711 if (s == 0) 712 return; 713 714 last = s; 715 for (/*empty*/; /*empty*/; s = next) { 716 /* 717 * Skip over nops. 718 */ 719 s = this_op(s); 720 if (s == 0) 721 break; /* nothing left in the block */ 722 723 /* 724 * Find the next real instruction after that one 725 * (skipping nops). 726 */ 727 next = this_op(s->next); 728 if (next == 0) 729 break; /* no next instruction */ 730 last = next; 731 732 /* 733 * st M[k] --> st M[k] 734 * ldx M[k] tax 735 */ 736 if (s->s.code == BPF_ST && 737 next->s.code == (BPF_LDX|BPF_MEM) && 738 s->s.k == next->s.k) { 739 done = 0; 740 next->s.code = BPF_MISC|BPF_TAX; 741 } 742 /* 743 * ld #k --> ldx #k 744 * tax txa 745 */ 746 if (s->s.code == (BPF_LD|BPF_IMM) && 747 next->s.code == (BPF_MISC|BPF_TAX)) { 748 s->s.code = BPF_LDX|BPF_IMM; 749 next->s.code = BPF_MISC|BPF_TXA; 750 done = 0; 751 } 752 /* 753 * This is an ugly special case, but it happens 754 * when you say tcp[k] or udp[k] where k is a constant. 755 */ 756 if (s->s.code == (BPF_LD|BPF_IMM)) { 757 struct slist *add, *tax, *ild; 758 759 /* 760 * Check that X isn't used on exit from this 761 * block (which the optimizer might cause). 762 * We know the code generator won't generate 763 * any local dependencies. 764 */ 765 if (ATOMELEM(b->out_use, X_ATOM)) 766 continue; 767 768 /* 769 * Check that the instruction following the ldi 770 * is an addx, or it's an ldxms with an addx 771 * following it (with 0 or more nops between the 772 * ldxms and addx). 773 */ 774 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B)) 775 add = next; 776 else 777 add = this_op(next->next); 778 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X)) 779 continue; 780 781 /* 782 * Check that a tax follows that (with 0 or more 783 * nops between them). 784 */ 785 tax = this_op(add->next); 786 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX)) 787 continue; 788 789 /* 790 * Check that an ild follows that (with 0 or more 791 * nops between them). 792 */ 793 ild = this_op(tax->next); 794 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD || 795 BPF_MODE(ild->s.code) != BPF_IND) 796 continue; 797 /* 798 * We want to turn this sequence: 799 * 800 * (004) ldi #0x2 {s} 801 * (005) ldxms [14] {next} -- optional 802 * (006) addx {add} 803 * (007) tax {tax} 804 * (008) ild [x+0] {ild} 805 * 806 * into this sequence: 807 * 808 * (004) nop 809 * (005) ldxms [14] 810 * (006) nop 811 * (007) nop 812 * (008) ild [x+2] 813 * 814 * XXX We need to check that X is not 815 * subsequently used, because we want to change 816 * what'll be in it after this sequence. 817 * 818 * We know we can eliminate the accumulator 819 * modifications earlier in the sequence since 820 * it is defined by the last stmt of this sequence 821 * (i.e., the last statement of the sequence loads 822 * a value into the accumulator, so we can eliminate 823 * earlier operations on the accumulator). 824 */ 825 ild->s.k += s->s.k; 826 s->s.code = NOP; 827 add->s.code = NOP; 828 tax->s.code = NOP; 829 done = 0; 830 } 831 } 832 /* 833 * If the comparison at the end of a block is an equality 834 * comparison against a constant, and nobody uses the value 835 * we leave in the A register at the end of a block, and 836 * the operation preceding the comparison is an arithmetic 837 * operation, we can sometime optimize it away. 838 */ 839 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) && 840 !ATOMELEM(b->out_use, A_ATOM)) { 841 /* 842 * We can optimize away certain subtractions of the 843 * X register. 844 */ 845 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) { 846 val = b->val[X_ATOM]; 847 if (vmap[val].is_const) { 848 /* 849 * If we have a subtract to do a comparison, 850 * and the X register is a known constant, 851 * we can merge this value into the 852 * comparison: 853 * 854 * sub x -> nop 855 * jeq #y jeq #(x+y) 856 */ 857 b->s.k += vmap[val].const_val; 858 last->s.code = NOP; 859 done = 0; 860 } else if (b->s.k == 0) { 861 /* 862 * If the X register isn't a constant, 863 * and the comparison in the test is 864 * against 0, we can compare with the 865 * X register, instead: 866 * 867 * sub x -> nop 868 * jeq #0 jeq x 869 */ 870 last->s.code = NOP; 871 b->s.code = BPF_JMP|BPF_JEQ|BPF_X; 872 done = 0; 873 } 874 } 875 /* 876 * Likewise, a constant subtract can be simplified: 877 * 878 * sub #x -> nop 879 * jeq #y -> jeq #(x+y) 880 */ 881 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) { 882 last->s.code = NOP; 883 b->s.k += last->s.k; 884 done = 0; 885 } 886 /* 887 * And, similarly, a constant AND can be simplified 888 * if we're testing against 0, i.e.: 889 * 890 * and #k nop 891 * jeq #0 -> jset #k 892 */ 893 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) && 894 b->s.k == 0) { 895 b->s.k = last->s.k; 896 b->s.code = BPF_JMP|BPF_K|BPF_JSET; 897 last->s.code = NOP; 898 done = 0; 899 opt_not(b); 900 } 901 } 902 /* 903 * jset #0 -> never 904 * jset #ffffffff -> always 905 */ 906 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) { 907 if (b->s.k == 0) 908 JT(b) = JF(b); 909 if (b->s.k == 0xffffffff) 910 JF(b) = JT(b); 911 } 912 /* 913 * If we're comparing against the index register, and the index 914 * register is a known constant, we can just compare against that 915 * constant. 916 */ 917 val = b->val[X_ATOM]; 918 if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) { 919 bpf_int32 v = vmap[val].const_val; 920 b->s.code &= ~BPF_X; 921 b->s.k = v; 922 } 923 /* 924 * If the accumulator is a known constant, we can compute the 925 * comparison result. 926 */ 927 val = b->val[A_ATOM]; 928 if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { 929 bpf_int32 v = vmap[val].const_val; 930 switch (BPF_OP(b->s.code)) { 931 932 case BPF_JEQ: 933 v = v == b->s.k; 934 break; 935 936 case BPF_JGT: 937 v = (unsigned)v > b->s.k; 938 break; 939 940 case BPF_JGE: 941 v = (unsigned)v >= b->s.k; 942 break; 943 944 case BPF_JSET: 945 v &= b->s.k; 946 break; 947 948 default: 949 abort(); 950 } 951 if (JF(b) != JT(b)) 952 done = 0; 953 if (v) 954 JF(b) = JT(b); 955 else 956 JT(b) = JF(b); 957 } 958 } 959 960 /* 961 * Compute the symbolic value of expression of 's', and update 962 * anything it defines in the value table 'val'. If 'alter' is true, 963 * do various optimizations. This code would be cleaner if symbolic 964 * evaluation and code transformations weren't folded together. 965 */ 966 static void 967 opt_stmt(s, val, alter) 968 struct stmt *s; 969 int val[]; 970 int alter; 971 { 972 int op; 973 int v; 974 975 switch (s->code) { 976 977 case BPF_LD|BPF_ABS|BPF_W: 978 case BPF_LD|BPF_ABS|BPF_H: 979 case BPF_LD|BPF_ABS|BPF_B: 980 v = F(s->code, s->k, 0L); 981 vstore(s, &val[A_ATOM], v, alter); 982 break; 983 984 case BPF_LD|BPF_IND|BPF_W: 985 case BPF_LD|BPF_IND|BPF_H: 986 case BPF_LD|BPF_IND|BPF_B: 987 v = val[X_ATOM]; 988 if (alter && vmap[v].is_const) { 989 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code); 990 s->k += vmap[v].const_val; 991 v = F(s->code, s->k, 0L); 992 done = 0; 993 } 994 else 995 v = F(s->code, s->k, v); 996 vstore(s, &val[A_ATOM], v, alter); 997 break; 998 999 case BPF_LD|BPF_LEN: 1000 v = F(s->code, 0L, 0L); 1001 vstore(s, &val[A_ATOM], v, alter); 1002 break; 1003 1004 case BPF_LD|BPF_IMM: 1005 v = K(s->k); 1006 vstore(s, &val[A_ATOM], v, alter); 1007 break; 1008 1009 case BPF_LDX|BPF_IMM: 1010 v = K(s->k); 1011 vstore(s, &val[X_ATOM], v, alter); 1012 break; 1013 1014 case BPF_LDX|BPF_MSH|BPF_B: 1015 v = F(s->code, s->k, 0L); 1016 vstore(s, &val[X_ATOM], v, alter); 1017 break; 1018 1019 case BPF_ALU|BPF_NEG: 1020 if (alter && vmap[val[A_ATOM]].is_const) { 1021 s->code = BPF_LD|BPF_IMM; 1022 s->k = -vmap[val[A_ATOM]].const_val; 1023 val[A_ATOM] = K(s->k); 1024 } 1025 else 1026 val[A_ATOM] = F(s->code, val[A_ATOM], 0L); 1027 break; 1028 1029 case BPF_ALU|BPF_ADD|BPF_K: 1030 case BPF_ALU|BPF_SUB|BPF_K: 1031 case BPF_ALU|BPF_MUL|BPF_K: 1032 case BPF_ALU|BPF_DIV|BPF_K: 1033 case BPF_ALU|BPF_AND|BPF_K: 1034 case BPF_ALU|BPF_OR|BPF_K: 1035 case BPF_ALU|BPF_LSH|BPF_K: 1036 case BPF_ALU|BPF_RSH|BPF_K: 1037 op = BPF_OP(s->code); 1038 if (alter) { 1039 if (s->k == 0) { 1040 /* don't optimize away "sub #0" 1041 * as it may be needed later to 1042 * fixup the generated math code */ 1043 if (op == BPF_ADD || 1044 op == BPF_LSH || op == BPF_RSH || 1045 op == BPF_OR) { 1046 s->code = NOP; 1047 break; 1048 } 1049 if (op == BPF_MUL || op == BPF_AND) { 1050 s->code = BPF_LD|BPF_IMM; 1051 val[A_ATOM] = K(s->k); 1052 break; 1053 } 1054 } 1055 if (vmap[val[A_ATOM]].is_const) { 1056 fold_op(s, val[A_ATOM], K(s->k)); 1057 val[A_ATOM] = K(s->k); 1058 break; 1059 } 1060 } 1061 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k)); 1062 break; 1063 1064 case BPF_ALU|BPF_ADD|BPF_X: 1065 case BPF_ALU|BPF_SUB|BPF_X: 1066 case BPF_ALU|BPF_MUL|BPF_X: 1067 case BPF_ALU|BPF_DIV|BPF_X: 1068 case BPF_ALU|BPF_AND|BPF_X: 1069 case BPF_ALU|BPF_OR|BPF_X: 1070 case BPF_ALU|BPF_LSH|BPF_X: 1071 case BPF_ALU|BPF_RSH|BPF_X: 1072 op = BPF_OP(s->code); 1073 if (alter && vmap[val[X_ATOM]].is_const) { 1074 if (vmap[val[A_ATOM]].is_const) { 1075 fold_op(s, val[A_ATOM], val[X_ATOM]); 1076 val[A_ATOM] = K(s->k); 1077 } 1078 else { 1079 s->code = BPF_ALU|BPF_K|op; 1080 s->k = vmap[val[X_ATOM]].const_val; 1081 done = 0; 1082 val[A_ATOM] = 1083 F(s->code, val[A_ATOM], K(s->k)); 1084 } 1085 break; 1086 } 1087 /* 1088 * Check if we're doing something to an accumulator 1089 * that is 0, and simplify. This may not seem like 1090 * much of a simplification but it could open up further 1091 * optimizations. 1092 * XXX We could also check for mul by 1, etc. 1093 */ 1094 if (alter && vmap[val[A_ATOM]].is_const 1095 && vmap[val[A_ATOM]].const_val == 0) { 1096 if (op == BPF_ADD || op == BPF_OR) { 1097 s->code = BPF_MISC|BPF_TXA; 1098 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 1099 break; 1100 } 1101 else if (op == BPF_MUL || op == BPF_DIV || 1102 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) { 1103 s->code = BPF_LD|BPF_IMM; 1104 s->k = 0; 1105 vstore(s, &val[A_ATOM], K(s->k), alter); 1106 break; 1107 } 1108 else if (op == BPF_NEG) { 1109 s->code = NOP; 1110 break; 1111 } 1112 } 1113 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]); 1114 break; 1115 1116 case BPF_MISC|BPF_TXA: 1117 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 1118 break; 1119 1120 case BPF_LD|BPF_MEM: 1121 v = val[s->k]; 1122 if (alter && vmap[v].is_const) { 1123 s->code = BPF_LD|BPF_IMM; 1124 s->k = vmap[v].const_val; 1125 done = 0; 1126 } 1127 vstore(s, &val[A_ATOM], v, alter); 1128 break; 1129 1130 case BPF_MISC|BPF_TAX: 1131 vstore(s, &val[X_ATOM], val[A_ATOM], alter); 1132 break; 1133 1134 case BPF_LDX|BPF_MEM: 1135 v = val[s->k]; 1136 if (alter && vmap[v].is_const) { 1137 s->code = BPF_LDX|BPF_IMM; 1138 s->k = vmap[v].const_val; 1139 done = 0; 1140 } 1141 vstore(s, &val[X_ATOM], v, alter); 1142 break; 1143 1144 case BPF_ST: 1145 vstore(s, &val[s->k], val[A_ATOM], alter); 1146 break; 1147 1148 case BPF_STX: 1149 vstore(s, &val[s->k], val[X_ATOM], alter); 1150 break; 1151 } 1152 } 1153 1154 static void 1155 deadstmt(s, last) 1156 register struct stmt *s; 1157 register struct stmt *last[]; 1158 { 1159 register int atom; 1160 1161 atom = atomuse(s); 1162 if (atom >= 0) { 1163 if (atom == AX_ATOM) { 1164 last[X_ATOM] = 0; 1165 last[A_ATOM] = 0; 1166 } 1167 else 1168 last[atom] = 0; 1169 } 1170 atom = atomdef(s); 1171 if (atom >= 0) { 1172 if (last[atom]) { 1173 done = 0; 1174 last[atom]->code = NOP; 1175 } 1176 last[atom] = s; 1177 } 1178 } 1179 1180 static void 1181 opt_deadstores(b) 1182 register struct block *b; 1183 { 1184 register struct slist *s; 1185 register int atom; 1186 struct stmt *last[N_ATOMS]; 1187 1188 memset((char *)last, 0, sizeof last); 1189 1190 for (s = b->stmts; s != 0; s = s->next) 1191 deadstmt(&s->s, last); 1192 deadstmt(&b->s, last); 1193 1194 for (atom = 0; atom < N_ATOMS; ++atom) 1195 if (last[atom] && !ATOMELEM(b->out_use, atom)) { 1196 last[atom]->code = NOP; 1197 done = 0; 1198 } 1199 } 1200 1201 static void 1202 opt_blk(b, do_stmts) 1203 struct block *b; 1204 int do_stmts; 1205 { 1206 struct slist *s; 1207 struct edge *p; 1208 int i; 1209 bpf_int32 aval, xval; 1210 1211 #if 0 1212 for (s = b->stmts; s && s->next; s = s->next) 1213 if (BPF_CLASS(s->s.code) == BPF_JMP) { 1214 do_stmts = 0; 1215 break; 1216 } 1217 #endif 1218 1219 /* 1220 * Initialize the atom values. 1221 */ 1222 p = b->in_edges; 1223 if (p == 0) { 1224 /* 1225 * We have no predecessors, so everything is undefined 1226 * upon entry to this block. 1227 */ 1228 memset((char *)b->val, 0, sizeof(b->val)); 1229 } else { 1230 /* 1231 * Inherit values from our predecessors. 1232 * 1233 * First, get the values from the predecessor along the 1234 * first edge leading to this node. 1235 */ 1236 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val)); 1237 /* 1238 * Now look at all the other nodes leading to this node. 1239 * If, for the predecessor along that edge, a register 1240 * has a different value from the one we have (i.e., 1241 * control paths are merging, and the merging paths 1242 * assign different values to that register), give the 1243 * register the undefined value of 0. 1244 */ 1245 while ((p = p->next) != NULL) { 1246 for (i = 0; i < N_ATOMS; ++i) 1247 if (b->val[i] != p->pred->val[i]) 1248 b->val[i] = 0; 1249 } 1250 } 1251 aval = b->val[A_ATOM]; 1252 xval = b->val[X_ATOM]; 1253 for (s = b->stmts; s; s = s->next) 1254 opt_stmt(&s->s, b->val, do_stmts); 1255 1256 /* 1257 * This is a special case: if we don't use anything from this 1258 * block, and we load the accumulator or index register with a 1259 * value that is already there, or if this block is a return, 1260 * eliminate all the statements. 1261 * 1262 * XXX - what if it does a store? 1263 * 1264 * XXX - why does it matter whether we use anything from this 1265 * block? If the accumulator or index register doesn't change 1266 * its value, isn't that OK even if we use that value? 1267 * 1268 * XXX - if we load the accumulator with a different value, 1269 * and the block ends with a conditional branch, we obviously 1270 * can't eliminate it, as the branch depends on that value. 1271 * For the index register, the conditional branch only depends 1272 * on the index register value if the test is against the index 1273 * register value rather than a constant; if nothing uses the 1274 * value we put into the index register, and we're not testing 1275 * against the index register's value, and there aren't any 1276 * other problems that would keep us from eliminating this 1277 * block, can we eliminate it? 1278 */ 1279 if (do_stmts && 1280 ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval && 1281 xval != 0 && b->val[X_ATOM] == xval) || 1282 BPF_CLASS(b->s.code) == BPF_RET)) { 1283 if (b->stmts != 0) { 1284 b->stmts = 0; 1285 done = 0; 1286 } 1287 } else { 1288 opt_peep(b); 1289 opt_deadstores(b); 1290 } 1291 /* 1292 * Set up values for branch optimizer. 1293 */ 1294 if (BPF_SRC(b->s.code) == BPF_K) 1295 b->oval = K(b->s.k); 1296 else 1297 b->oval = b->val[X_ATOM]; 1298 b->et.code = b->s.code; 1299 b->ef.code = -b->s.code; 1300 } 1301 1302 /* 1303 * Return true if any register that is used on exit from 'succ', has 1304 * an exit value that is different from the corresponding exit value 1305 * from 'b'. 1306 */ 1307 static int 1308 use_conflict(b, succ) 1309 struct block *b, *succ; 1310 { 1311 int atom; 1312 atomset use = succ->out_use; 1313 1314 if (use == 0) 1315 return 0; 1316 1317 for (atom = 0; atom < N_ATOMS; ++atom) 1318 if (ATOMELEM(use, atom)) 1319 if (b->val[atom] != succ->val[atom]) 1320 return 1; 1321 return 0; 1322 } 1323 1324 static struct block * 1325 fold_edge(child, ep) 1326 struct block *child; 1327 struct edge *ep; 1328 { 1329 int sense; 1330 int aval0, aval1, oval0, oval1; 1331 int code = ep->code; 1332 1333 if (code < 0) { 1334 code = -code; 1335 sense = 0; 1336 } else 1337 sense = 1; 1338 1339 if (child->s.code != code) 1340 return 0; 1341 1342 aval0 = child->val[A_ATOM]; 1343 oval0 = child->oval; 1344 aval1 = ep->pred->val[A_ATOM]; 1345 oval1 = ep->pred->oval; 1346 1347 if (aval0 != aval1) 1348 return 0; 1349 1350 if (oval0 == oval1) 1351 /* 1352 * The operands of the branch instructions are 1353 * identical, so the result is true if a true 1354 * branch was taken to get here, otherwise false. 1355 */ 1356 return sense ? JT(child) : JF(child); 1357 1358 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K)) 1359 /* 1360 * At this point, we only know the comparison if we 1361 * came down the true branch, and it was an equality 1362 * comparison with a constant. 1363 * 1364 * I.e., if we came down the true branch, and the branch 1365 * was an equality comparison with a constant, we know the 1366 * accumulator contains that constant. If we came down 1367 * the false branch, or the comparison wasn't with a 1368 * constant, we don't know what was in the accumulator. 1369 * 1370 * We rely on the fact that distinct constants have distinct 1371 * value numbers. 1372 */ 1373 return JF(child); 1374 1375 return 0; 1376 } 1377 1378 static void 1379 opt_j(ep) 1380 struct edge *ep; 1381 { 1382 register int i, k; 1383 register struct block *target; 1384 1385 if (JT(ep->succ) == 0) 1386 return; 1387 1388 if (JT(ep->succ) == JF(ep->succ)) { 1389 /* 1390 * Common branch targets can be eliminated, provided 1391 * there is no data dependency. 1392 */ 1393 if (!use_conflict(ep->pred, ep->succ->et.succ)) { 1394 done = 0; 1395 ep->succ = JT(ep->succ); 1396 } 1397 } 1398 /* 1399 * For each edge dominator that matches the successor of this 1400 * edge, promote the edge successor to the its grandchild. 1401 * 1402 * XXX We violate the set abstraction here in favor a reasonably 1403 * efficient loop. 1404 */ 1405 top: 1406 for (i = 0; i < edgewords; ++i) { 1407 register bpf_u_int32 x = ep->edom[i]; 1408 1409 while (x != 0) { 1410 k = ffs(x) - 1; 1411 x &=~ (1 << k); 1412 k += i * BITS_PER_WORD; 1413 1414 target = fold_edge(ep->succ, edges[k]); 1415 /* 1416 * Check that there is no data dependency between 1417 * nodes that will be violated if we move the edge. 1418 */ 1419 if (target != 0 && !use_conflict(ep->pred, target)) { 1420 done = 0; 1421 ep->succ = target; 1422 if (JT(target) != 0) 1423 /* 1424 * Start over unless we hit a leaf. 1425 */ 1426 goto top; 1427 return; 1428 } 1429 } 1430 } 1431 } 1432 1433 1434 static void 1435 or_pullup(b) 1436 struct block *b; 1437 { 1438 int val, at_top; 1439 struct block *pull; 1440 struct block **diffp, **samep; 1441 struct edge *ep; 1442 1443 ep = b->in_edges; 1444 if (ep == 0) 1445 return; 1446 1447 /* 1448 * Make sure each predecessor loads the same value. 1449 * XXX why? 1450 */ 1451 val = ep->pred->val[A_ATOM]; 1452 for (ep = ep->next; ep != 0; ep = ep->next) 1453 if (val != ep->pred->val[A_ATOM]) 1454 return; 1455 1456 if (JT(b->in_edges->pred) == b) 1457 diffp = &JT(b->in_edges->pred); 1458 else 1459 diffp = &JF(b->in_edges->pred); 1460 1461 at_top = 1; 1462 while (1) { 1463 if (*diffp == 0) 1464 return; 1465 1466 if (JT(*diffp) != JT(b)) 1467 return; 1468 1469 if (!SET_MEMBER((*diffp)->dom, b->id)) 1470 return; 1471 1472 if ((*diffp)->val[A_ATOM] != val) 1473 break; 1474 1475 diffp = &JF(*diffp); 1476 at_top = 0; 1477 } 1478 samep = &JF(*diffp); 1479 while (1) { 1480 if (*samep == 0) 1481 return; 1482 1483 if (JT(*samep) != JT(b)) 1484 return; 1485 1486 if (!SET_MEMBER((*samep)->dom, b->id)) 1487 return; 1488 1489 if ((*samep)->val[A_ATOM] == val) 1490 break; 1491 1492 /* XXX Need to check that there are no data dependencies 1493 between dp0 and dp1. Currently, the code generator 1494 will not produce such dependencies. */ 1495 samep = &JF(*samep); 1496 } 1497 #ifdef notdef 1498 /* XXX This doesn't cover everything. */ 1499 for (i = 0; i < N_ATOMS; ++i) 1500 if ((*samep)->val[i] != pred->val[i]) 1501 return; 1502 #endif 1503 /* Pull up the node. */ 1504 pull = *samep; 1505 *samep = JF(pull); 1506 JF(pull) = *diffp; 1507 1508 /* 1509 * At the top of the chain, each predecessor needs to point at the 1510 * pulled up node. Inside the chain, there is only one predecessor 1511 * to worry about. 1512 */ 1513 if (at_top) { 1514 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1515 if (JT(ep->pred) == b) 1516 JT(ep->pred) = pull; 1517 else 1518 JF(ep->pred) = pull; 1519 } 1520 } 1521 else 1522 *diffp = pull; 1523 1524 done = 0; 1525 } 1526 1527 static void 1528 and_pullup(b) 1529 struct block *b; 1530 { 1531 int val, at_top; 1532 struct block *pull; 1533 struct block **diffp, **samep; 1534 struct edge *ep; 1535 1536 ep = b->in_edges; 1537 if (ep == 0) 1538 return; 1539 1540 /* 1541 * Make sure each predecessor loads the same value. 1542 */ 1543 val = ep->pred->val[A_ATOM]; 1544 for (ep = ep->next; ep != 0; ep = ep->next) 1545 if (val != ep->pred->val[A_ATOM]) 1546 return; 1547 1548 if (JT(b->in_edges->pred) == b) 1549 diffp = &JT(b->in_edges->pred); 1550 else 1551 diffp = &JF(b->in_edges->pred); 1552 1553 at_top = 1; 1554 while (1) { 1555 if (*diffp == 0) 1556 return; 1557 1558 if (JF(*diffp) != JF(b)) 1559 return; 1560 1561 if (!SET_MEMBER((*diffp)->dom, b->id)) 1562 return; 1563 1564 if ((*diffp)->val[A_ATOM] != val) 1565 break; 1566 1567 diffp = &JT(*diffp); 1568 at_top = 0; 1569 } 1570 samep = &JT(*diffp); 1571 while (1) { 1572 if (*samep == 0) 1573 return; 1574 1575 if (JF(*samep) != JF(b)) 1576 return; 1577 1578 if (!SET_MEMBER((*samep)->dom, b->id)) 1579 return; 1580 1581 if ((*samep)->val[A_ATOM] == val) 1582 break; 1583 1584 /* XXX Need to check that there are no data dependencies 1585 between diffp and samep. Currently, the code generator 1586 will not produce such dependencies. */ 1587 samep = &JT(*samep); 1588 } 1589 #ifdef notdef 1590 /* XXX This doesn't cover everything. */ 1591 for (i = 0; i < N_ATOMS; ++i) 1592 if ((*samep)->val[i] != pred->val[i]) 1593 return; 1594 #endif 1595 /* Pull up the node. */ 1596 pull = *samep; 1597 *samep = JT(pull); 1598 JT(pull) = *diffp; 1599 1600 /* 1601 * At the top of the chain, each predecessor needs to point at the 1602 * pulled up node. Inside the chain, there is only one predecessor 1603 * to worry about. 1604 */ 1605 if (at_top) { 1606 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1607 if (JT(ep->pred) == b) 1608 JT(ep->pred) = pull; 1609 else 1610 JF(ep->pred) = pull; 1611 } 1612 } 1613 else 1614 *diffp = pull; 1615 1616 done = 0; 1617 } 1618 1619 static void 1620 opt_blks(root, do_stmts) 1621 struct block *root; 1622 int do_stmts; 1623 { 1624 int i, maxlevel; 1625 struct block *p; 1626 1627 init_val(); 1628 maxlevel = root->level; 1629 1630 find_inedges(root); 1631 for (i = maxlevel; i >= 0; --i) 1632 for (p = levels[i]; p; p = p->link) 1633 opt_blk(p, do_stmts); 1634 1635 if (do_stmts) 1636 /* 1637 * No point trying to move branches; it can't possibly 1638 * make a difference at this point. 1639 */ 1640 return; 1641 1642 for (i = 1; i <= maxlevel; ++i) { 1643 for (p = levels[i]; p; p = p->link) { 1644 opt_j(&p->et); 1645 opt_j(&p->ef); 1646 } 1647 } 1648 1649 find_inedges(root); 1650 for (i = 1; i <= maxlevel; ++i) { 1651 for (p = levels[i]; p; p = p->link) { 1652 or_pullup(p); 1653 and_pullup(p); 1654 } 1655 } 1656 } 1657 1658 static inline void 1659 link_inedge(parent, child) 1660 struct edge *parent; 1661 struct block *child; 1662 { 1663 parent->next = child->in_edges; 1664 child->in_edges = parent; 1665 } 1666 1667 static void 1668 find_inedges(root) 1669 struct block *root; 1670 { 1671 int i; 1672 struct block *b; 1673 1674 for (i = 0; i < n_blocks; ++i) 1675 blocks[i]->in_edges = 0; 1676 1677 /* 1678 * Traverse the graph, adding each edge to the predecessor 1679 * list of its successors. Skip the leaves (i.e. level 0). 1680 */ 1681 for (i = root->level; i > 0; --i) { 1682 for (b = levels[i]; b != 0; b = b->link) { 1683 link_inedge(&b->et, JT(b)); 1684 link_inedge(&b->ef, JF(b)); 1685 } 1686 } 1687 } 1688 1689 static void 1690 opt_root(b) 1691 struct block **b; 1692 { 1693 struct slist *tmp, *s; 1694 1695 s = (*b)->stmts; 1696 (*b)->stmts = 0; 1697 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b)) 1698 *b = JT(*b); 1699 1700 tmp = (*b)->stmts; 1701 if (tmp != 0) 1702 sappend(s, tmp); 1703 (*b)->stmts = s; 1704 1705 /* 1706 * If the root node is a return, then there is no 1707 * point executing any statements (since the bpf machine 1708 * has no side effects). 1709 */ 1710 if (BPF_CLASS((*b)->s.code) == BPF_RET) 1711 (*b)->stmts = 0; 1712 } 1713 1714 static void 1715 opt_loop(root, do_stmts) 1716 struct block *root; 1717 int do_stmts; 1718 { 1719 1720 #ifdef BDEBUG 1721 if (dflag > 1) { 1722 printf("opt_loop(root, %d) begin\n", do_stmts); 1723 opt_dump(root); 1724 } 1725 #endif 1726 do { 1727 done = 1; 1728 find_levels(root); 1729 find_dom(root); 1730 find_closure(root); 1731 find_ud(root); 1732 find_edom(root); 1733 opt_blks(root, do_stmts); 1734 #ifdef BDEBUG 1735 if (dflag > 1) { 1736 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done); 1737 opt_dump(root); 1738 } 1739 #endif 1740 } while (!done); 1741 } 1742 1743 /* 1744 * Optimize the filter code in its dag representation. 1745 */ 1746 void 1747 bpf_optimize(rootp) 1748 struct block **rootp; 1749 { 1750 struct block *root; 1751 1752 root = *rootp; 1753 1754 opt_init(root); 1755 opt_loop(root, 0); 1756 opt_loop(root, 1); 1757 intern_blocks(root); 1758 #ifdef BDEBUG 1759 if (dflag > 1) { 1760 printf("after intern_blocks()\n"); 1761 opt_dump(root); 1762 } 1763 #endif 1764 opt_root(rootp); 1765 #ifdef BDEBUG 1766 if (dflag > 1) { 1767 printf("after opt_root()\n"); 1768 opt_dump(root); 1769 } 1770 #endif 1771 opt_cleanup(); 1772 } 1773 1774 static void 1775 make_marks(p) 1776 struct block *p; 1777 { 1778 if (!isMarked(p)) { 1779 Mark(p); 1780 if (BPF_CLASS(p->s.code) != BPF_RET) { 1781 make_marks(JT(p)); 1782 make_marks(JF(p)); 1783 } 1784 } 1785 } 1786 1787 /* 1788 * Mark code array such that isMarked(i) is true 1789 * only for nodes that are alive. 1790 */ 1791 static void 1792 mark_code(p) 1793 struct block *p; 1794 { 1795 cur_mark += 1; 1796 make_marks(p); 1797 } 1798 1799 /* 1800 * True iff the two stmt lists load the same value from the packet into 1801 * the accumulator. 1802 */ 1803 static int 1804 eq_slist(x, y) 1805 struct slist *x, *y; 1806 { 1807 while (1) { 1808 while (x && x->s.code == NOP) 1809 x = x->next; 1810 while (y && y->s.code == NOP) 1811 y = y->next; 1812 if (x == 0) 1813 return y == 0; 1814 if (y == 0) 1815 return x == 0; 1816 if (x->s.code != y->s.code || x->s.k != y->s.k) 1817 return 0; 1818 x = x->next; 1819 y = y->next; 1820 } 1821 } 1822 1823 static inline int 1824 eq_blk(b0, b1) 1825 struct block *b0, *b1; 1826 { 1827 if (b0->s.code == b1->s.code && 1828 b0->s.k == b1->s.k && 1829 b0->et.succ == b1->et.succ && 1830 b0->ef.succ == b1->ef.succ) 1831 return eq_slist(b0->stmts, b1->stmts); 1832 return 0; 1833 } 1834 1835 static void 1836 intern_blocks(root) 1837 struct block *root; 1838 { 1839 struct block *p; 1840 int i, j; 1841 int done1; /* don't shadow global */ 1842 top: 1843 done1 = 1; 1844 for (i = 0; i < n_blocks; ++i) 1845 blocks[i]->link = 0; 1846 1847 mark_code(root); 1848 1849 for (i = n_blocks - 1; --i >= 0; ) { 1850 if (!isMarked(blocks[i])) 1851 continue; 1852 for (j = i + 1; j < n_blocks; ++j) { 1853 if (!isMarked(blocks[j])) 1854 continue; 1855 if (eq_blk(blocks[i], blocks[j])) { 1856 blocks[i]->link = blocks[j]->link ? 1857 blocks[j]->link : blocks[j]; 1858 break; 1859 } 1860 } 1861 } 1862 for (i = 0; i < n_blocks; ++i) { 1863 p = blocks[i]; 1864 if (JT(p) == 0) 1865 continue; 1866 if (JT(p)->link) { 1867 done1 = 0; 1868 JT(p) = JT(p)->link; 1869 } 1870 if (JF(p)->link) { 1871 done1 = 0; 1872 JF(p) = JF(p)->link; 1873 } 1874 } 1875 if (!done1) 1876 goto top; 1877 } 1878 1879 static void 1880 opt_cleanup() 1881 { 1882 free((void *)vnode_base); 1883 free((void *)vmap); 1884 free((void *)edges); 1885 free((void *)space); 1886 free((void *)levels); 1887 free((void *)blocks); 1888 } 1889 1890 /* 1891 * Return the number of stmts in 's'. 1892 */ 1893 static int 1894 slength(s) 1895 struct slist *s; 1896 { 1897 int n = 0; 1898 1899 for (; s; s = s->next) 1900 if (s->s.code != NOP) 1901 ++n; 1902 return n; 1903 } 1904 1905 /* 1906 * Return the number of nodes reachable by 'p'. 1907 * All nodes should be initially unmarked. 1908 */ 1909 static int 1910 count_blocks(p) 1911 struct block *p; 1912 { 1913 if (p == 0 || isMarked(p)) 1914 return 0; 1915 Mark(p); 1916 return count_blocks(JT(p)) + count_blocks(JF(p)) + 1; 1917 } 1918 1919 /* 1920 * Do a depth first search on the flow graph, numbering the 1921 * the basic blocks, and entering them into the 'blocks' array.` 1922 */ 1923 static void 1924 number_blks_r(p) 1925 struct block *p; 1926 { 1927 int n; 1928 1929 if (p == 0 || isMarked(p)) 1930 return; 1931 1932 Mark(p); 1933 n = n_blocks++; 1934 p->id = n; 1935 blocks[n] = p; 1936 1937 number_blks_r(JT(p)); 1938 number_blks_r(JF(p)); 1939 } 1940 1941 /* 1942 * Return the number of stmts in the flowgraph reachable by 'p'. 1943 * The nodes should be unmarked before calling. 1944 * 1945 * Note that "stmts" means "instructions", and that this includes 1946 * 1947 * side-effect statements in 'p' (slength(p->stmts)); 1948 * 1949 * statements in the true branch from 'p' (count_stmts(JT(p))); 1950 * 1951 * statements in the false branch from 'p' (count_stmts(JF(p))); 1952 * 1953 * the conditional jump itself (1); 1954 * 1955 * an extra long jump if the true branch requires it (p->longjt); 1956 * 1957 * an extra long jump if the false branch requires it (p->longjf). 1958 */ 1959 static int 1960 count_stmts(p) 1961 struct block *p; 1962 { 1963 int n; 1964 1965 if (p == 0 || isMarked(p)) 1966 return 0; 1967 Mark(p); 1968 n = count_stmts(JT(p)) + count_stmts(JF(p)); 1969 return slength(p->stmts) + n + 1 + p->longjt + p->longjf; 1970 } 1971 1972 /* 1973 * Allocate memory. All allocation is done before optimization 1974 * is begun. A linear bound on the size of all data structures is computed 1975 * from the total number of blocks and/or statements. 1976 */ 1977 static void 1978 opt_init(root) 1979 struct block *root; 1980 { 1981 bpf_u_int32 *p; 1982 int i, n, max_stmts; 1983 1984 /* 1985 * First, count the blocks, so we can malloc an array to map 1986 * block number to block. Then, put the blocks into the array. 1987 */ 1988 unMarkAll(); 1989 n = count_blocks(root); 1990 blocks = (struct block **)calloc(n, sizeof(*blocks)); 1991 if (blocks == NULL) 1992 bpf_error("malloc"); 1993 unMarkAll(); 1994 n_blocks = 0; 1995 number_blks_r(root); 1996 1997 n_edges = 2 * n_blocks; 1998 edges = (struct edge **)calloc(n_edges, sizeof(*edges)); 1999 if (edges == NULL) 2000 bpf_error("malloc"); 2001 2002 /* 2003 * The number of levels is bounded by the number of nodes. 2004 */ 2005 levels = (struct block **)calloc(n_blocks, sizeof(*levels)); 2006 if (levels == NULL) 2007 bpf_error("malloc"); 2008 2009 edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1; 2010 nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1; 2011 2012 /* XXX */ 2013 space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space) 2014 + n_edges * edgewords * sizeof(*space)); 2015 if (space == NULL) 2016 bpf_error("malloc"); 2017 p = space; 2018 all_dom_sets = p; 2019 for (i = 0; i < n; ++i) { 2020 blocks[i]->dom = p; 2021 p += nodewords; 2022 } 2023 all_closure_sets = p; 2024 for (i = 0; i < n; ++i) { 2025 blocks[i]->closure = p; 2026 p += nodewords; 2027 } 2028 all_edge_sets = p; 2029 for (i = 0; i < n; ++i) { 2030 register struct block *b = blocks[i]; 2031 2032 b->et.edom = p; 2033 p += edgewords; 2034 b->ef.edom = p; 2035 p += edgewords; 2036 b->et.id = i; 2037 edges[i] = &b->et; 2038 b->ef.id = n_blocks + i; 2039 edges[n_blocks + i] = &b->ef; 2040 b->et.pred = b; 2041 b->ef.pred = b; 2042 } 2043 max_stmts = 0; 2044 for (i = 0; i < n; ++i) 2045 max_stmts += slength(blocks[i]->stmts) + 1; 2046 /* 2047 * We allocate at most 3 value numbers per statement, 2048 * so this is an upper bound on the number of valnodes 2049 * we'll need. 2050 */ 2051 maxval = 3 * max_stmts; 2052 vmap = (struct vmapinfo *)calloc(maxval, sizeof(*vmap)); 2053 vnode_base = (struct valnode *)calloc(maxval, sizeof(*vnode_base)); 2054 if (vmap == NULL || vnode_base == NULL) 2055 bpf_error("malloc"); 2056 } 2057 2058 /* 2059 * Some pointers used to convert the basic block form of the code, 2060 * into the array form that BPF requires. 'fstart' will point to 2061 * the malloc'd array while 'ftail' is used during the recursive traversal. 2062 */ 2063 static struct bpf_insn *fstart; 2064 static struct bpf_insn *ftail; 2065 2066 #ifdef BDEBUG 2067 int bids[1000]; 2068 #endif 2069 2070 /* 2071 * Returns true if successful. Returns false if a branch has 2072 * an offset that is too large. If so, we have marked that 2073 * branch so that on a subsequent iteration, it will be treated 2074 * properly. 2075 */ 2076 static int 2077 convert_code_r(p) 2078 struct block *p; 2079 { 2080 struct bpf_insn *dst; 2081 struct slist *src; 2082 int slen; 2083 u_int off; 2084 int extrajmps; /* number of extra jumps inserted */ 2085 struct slist **offset = NULL; 2086 2087 if (p == 0 || isMarked(p)) 2088 return (1); 2089 Mark(p); 2090 2091 if (convert_code_r(JF(p)) == 0) 2092 return (0); 2093 if (convert_code_r(JT(p)) == 0) 2094 return (0); 2095 2096 slen = slength(p->stmts); 2097 dst = ftail -= (slen + 1 + p->longjt + p->longjf); 2098 /* inflate length by any extra jumps */ 2099 2100 p->offset = dst - fstart; 2101 2102 /* generate offset[] for convenience */ 2103 if (slen) { 2104 offset = (struct slist **)calloc(slen, sizeof(struct slist *)); 2105 if (!offset) { 2106 bpf_error("not enough core"); 2107 /*NOTREACHED*/ 2108 } 2109 } 2110 src = p->stmts; 2111 for (off = 0; off < slen && src; off++) { 2112 #if 0 2113 printf("off=%d src=%x\n", off, src); 2114 #endif 2115 offset[off] = src; 2116 src = src->next; 2117 } 2118 2119 off = 0; 2120 for (src = p->stmts; src; src = src->next) { 2121 if (src->s.code == NOP) 2122 continue; 2123 dst->code = (u_short)src->s.code; 2124 dst->k = src->s.k; 2125 2126 /* fill block-local relative jump */ 2127 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) { 2128 #if 0 2129 if (src->s.jt || src->s.jf) { 2130 bpf_error("illegal jmp destination"); 2131 /*NOTREACHED*/ 2132 } 2133 #endif 2134 goto filled; 2135 } 2136 if (off == slen - 2) /*???*/ 2137 goto filled; 2138 2139 { 2140 int i; 2141 int jt, jf; 2142 const char *ljerr = "%s for block-local relative jump: off=%d"; 2143 2144 #if 0 2145 printf("code=%x off=%d %x %x\n", src->s.code, 2146 off, src->s.jt, src->s.jf); 2147 #endif 2148 2149 if (!src->s.jt || !src->s.jf) { 2150 bpf_error(ljerr, "no jmp destination", off); 2151 /*NOTREACHED*/ 2152 } 2153 2154 jt = jf = 0; 2155 for (i = 0; i < slen; i++) { 2156 if (offset[i] == src->s.jt) { 2157 if (jt) { 2158 bpf_error(ljerr, "multiple matches", off); 2159 /*NOTREACHED*/ 2160 } 2161 2162 dst->jt = i - off - 1; 2163 jt++; 2164 } 2165 if (offset[i] == src->s.jf) { 2166 if (jf) { 2167 bpf_error(ljerr, "multiple matches", off); 2168 /*NOTREACHED*/ 2169 } 2170 dst->jf = i - off - 1; 2171 jf++; 2172 } 2173 } 2174 if (!jt || !jf) { 2175 bpf_error(ljerr, "no destination found", off); 2176 /*NOTREACHED*/ 2177 } 2178 } 2179 filled: 2180 ++dst; 2181 ++off; 2182 } 2183 if (offset) 2184 free(offset); 2185 2186 #ifdef BDEBUG 2187 bids[dst - fstart] = p->id + 1; 2188 #endif 2189 dst->code = (u_short)p->s.code; 2190 dst->k = p->s.k; 2191 if (JT(p)) { 2192 extrajmps = 0; 2193 off = JT(p)->offset - (p->offset + slen) - 1; 2194 if (off >= 256) { 2195 /* offset too large for branch, must add a jump */ 2196 if (p->longjt == 0) { 2197 /* mark this instruction and retry */ 2198 p->longjt++; 2199 return(0); 2200 } 2201 /* branch if T to following jump */ 2202 dst->jt = extrajmps; 2203 extrajmps++; 2204 dst[extrajmps].code = BPF_JMP|BPF_JA; 2205 dst[extrajmps].k = off - extrajmps; 2206 } 2207 else 2208 dst->jt = off; 2209 off = JF(p)->offset - (p->offset + slen) - 1; 2210 if (off >= 256) { 2211 /* offset too large for branch, must add a jump */ 2212 if (p->longjf == 0) { 2213 /* mark this instruction and retry */ 2214 p->longjf++; 2215 return(0); 2216 } 2217 /* branch if F to following jump */ 2218 /* if two jumps are inserted, F goes to second one */ 2219 dst->jf = extrajmps; 2220 extrajmps++; 2221 dst[extrajmps].code = BPF_JMP|BPF_JA; 2222 dst[extrajmps].k = off - extrajmps; 2223 } 2224 else 2225 dst->jf = off; 2226 } 2227 return (1); 2228 } 2229 2230 2231 /* 2232 * Convert flowgraph intermediate representation to the 2233 * BPF array representation. Set *lenp to the number of instructions. 2234 * 2235 * This routine does *NOT* leak the memory pointed to by fp. It *must 2236 * not* do free(fp) before returning fp; doing so would make no sense, 2237 * as the BPF array pointed to by the return value of icode_to_fcode() 2238 * must be valid - it's being returned for use in a bpf_program structure. 2239 * 2240 * If it appears that icode_to_fcode() is leaking, the problem is that 2241 * the program using pcap_compile() is failing to free the memory in 2242 * the BPF program when it's done - the leak is in the program, not in 2243 * the routine that happens to be allocating the memory. (By analogy, if 2244 * a program calls fopen() without ever calling fclose() on the FILE *, 2245 * it will leak the FILE structure; the leak is not in fopen(), it's in 2246 * the program.) Change the program to use pcap_freecode() when it's 2247 * done with the filter program. See the pcap man page. 2248 */ 2249 struct bpf_insn * 2250 icode_to_fcode(root, lenp) 2251 struct block *root; 2252 int *lenp; 2253 { 2254 int n; 2255 struct bpf_insn *fp; 2256 2257 /* 2258 * Loop doing convert_code_r() until no branches remain 2259 * with too-large offsets. 2260 */ 2261 while (1) { 2262 unMarkAll(); 2263 n = *lenp = count_stmts(root); 2264 2265 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n); 2266 if (fp == NULL) 2267 bpf_error("malloc"); 2268 memset((char *)fp, 0, sizeof(*fp) * n); 2269 fstart = fp; 2270 ftail = fp + n; 2271 2272 unMarkAll(); 2273 if (convert_code_r(root)) 2274 break; 2275 free(fp); 2276 } 2277 2278 return fp; 2279 } 2280 2281 /* 2282 * Make a copy of a BPF program and put it in the "fcode" member of 2283 * a "pcap_t". 2284 * 2285 * If we fail to allocate memory for the copy, fill in the "errbuf" 2286 * member of the "pcap_t" with an error message, and return -1; 2287 * otherwise, return 0. 2288 */ 2289 int 2290 install_bpf_program(pcap_t *p, struct bpf_program *fp) 2291 { 2292 size_t prog_size; 2293 2294 /* 2295 * Validate the program. 2296 */ 2297 if (!bpf_validate(fp->bf_insns, fp->bf_len)) { 2298 snprintf(p->errbuf, sizeof(p->errbuf), 2299 "BPF program is not valid"); 2300 return (-1); 2301 } 2302 2303 /* 2304 * Free up any already installed program. 2305 */ 2306 pcap_freecode(&p->fcode); 2307 2308 prog_size = sizeof(*fp->bf_insns) * fp->bf_len; 2309 p->fcode.bf_len = fp->bf_len; 2310 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size); 2311 if (p->fcode.bf_insns == NULL) { 2312 snprintf(p->errbuf, sizeof(p->errbuf), 2313 "malloc: %s", pcap_strerror(errno)); 2314 return (-1); 2315 } 2316 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size); 2317 return (0); 2318 } 2319 2320 #ifdef BDEBUG 2321 static void 2322 opt_dump(root) 2323 struct block *root; 2324 { 2325 struct bpf_program f; 2326 2327 memset(bids, 0, sizeof bids); 2328 f.bf_insns = icode_to_fcode(root, &f.bf_len); 2329 bpf_dump(&f, 1); 2330 putchar('\n'); 2331 free((char *)f.bf_insns); 2332 } 2333 #endif 2334