1 /* 2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that: (1) source code distributions 7 * retain the above copyright notice and this paragraph in its entirety, (2) 8 * distributions including binary code include the above copyright notice and 9 * this paragraph in its entirety in the documentation or other materials 10 * provided with the distribution, and (3) all advertising materials mentioning 11 * features or use of this software display the following acknowledgement: 12 * ``This product includes software developed by the University of California, 13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of 14 * the University nor the names of its contributors may be used to endorse 15 * or promote products derived from this software without specific prior 16 * written permission. 17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 20 * 21 * Optimization module for tcpdump intermediate representation. 22 */ 23 #ifndef lint 24 static const char rcsid[] _U_ = 25 "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.91 2008-01-02 04:16:46 guy Exp $ (LBL)"; 26 #endif 27 28 #ifdef HAVE_CONFIG_H 29 #include "config.h" 30 #endif 31 32 #ifdef WIN32 33 #include <pcap-stdinc.h> 34 #else /* WIN32 */ 35 #if HAVE_INTTYPES_H 36 #include <inttypes.h> 37 #elif HAVE_STDINT_H 38 #include <stdint.h> 39 #endif 40 #ifdef HAVE_SYS_BITYPES_H 41 #include <sys/bitypes.h> 42 #endif 43 #include <sys/types.h> 44 #endif /* WIN32 */ 45 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <memory.h> 49 #include <string.h> 50 51 #include <errno.h> 52 53 #include "pcap-int.h" 54 55 #include "gencode.h" 56 57 #ifdef HAVE_OS_PROTO_H 58 #include "os-proto.h" 59 #endif 60 61 #ifdef BDEBUG 62 extern int dflag; 63 #endif 64 65 #if defined(MSDOS) && !defined(__DJGPP__) 66 extern int _w32_ffs (int mask); 67 #define ffs _w32_ffs 68 #endif 69 70 #if defined(WIN32) && defined (_MSC_VER) 71 int ffs(int mask); 72 #endif 73 74 /* 75 * Represents a deleted instruction. 76 */ 77 #define NOP -1 78 79 /* 80 * Register numbers for use-def values. 81 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory 82 * location. A_ATOM is the accumulator and X_ATOM is the index 83 * register. 84 */ 85 #define A_ATOM BPF_MEMWORDS 86 #define X_ATOM (BPF_MEMWORDS+1) 87 88 /* 89 * This define is used to represent *both* the accumulator and 90 * x register in use-def computations. 91 * Currently, the use-def code assumes only one definition per instruction. 92 */ 93 #define AX_ATOM N_ATOMS 94 95 /* 96 * A flag to indicate that further optimization is needed. 97 * Iterative passes are continued until a given pass yields no 98 * branch movement. 99 */ 100 static int done; 101 102 /* 103 * A block is marked if only if its mark equals the current mark. 104 * Rather than traverse the code array, marking each item, 'cur_mark' is 105 * incremented. This automatically makes each element unmarked. 106 */ 107 static int cur_mark; 108 #define isMarked(p) ((p)->mark == cur_mark) 109 #define unMarkAll() cur_mark += 1 110 #define Mark(p) ((p)->mark = cur_mark) 111 112 static void opt_init(struct block *); 113 static void opt_cleanup(void); 114 115 static void make_marks(struct block *); 116 static void mark_code(struct block *); 117 118 static void intern_blocks(struct block *); 119 120 static int eq_slist(struct slist *, struct slist *); 121 122 static void find_levels_r(struct block *); 123 124 static void find_levels(struct block *); 125 static void find_dom(struct block *); 126 static void propedom(struct edge *); 127 static void find_edom(struct block *); 128 static void find_closure(struct block *); 129 static int atomuse(struct stmt *); 130 static int atomdef(struct stmt *); 131 static void compute_local_ud(struct block *); 132 static void find_ud(struct block *); 133 static void init_val(void); 134 static int F(int, int, int); 135 static inline void vstore(struct stmt *, int *, int, int); 136 static void opt_blk(struct block *, int); 137 static int use_conflict(struct block *, struct block *); 138 static void opt_j(struct edge *); 139 static void or_pullup(struct block *); 140 static void and_pullup(struct block *); 141 static void opt_blks(struct block *, int); 142 static inline void link_inedge(struct edge *, struct block *); 143 static void find_inedges(struct block *); 144 static void opt_root(struct block **); 145 static void opt_loop(struct block *, int); 146 static void fold_op(struct stmt *, int, int); 147 static inline struct slist *this_op(struct slist *); 148 static void opt_not(struct block *); 149 static void opt_peep(struct block *); 150 static void opt_stmt(struct stmt *, int[], int); 151 static void deadstmt(struct stmt *, struct stmt *[]); 152 static void opt_deadstores(struct block *); 153 static struct block *fold_edge(struct block *, struct edge *); 154 static inline int eq_blk(struct block *, struct block *); 155 static int slength(struct slist *); 156 static int count_blocks(struct block *); 157 static void number_blks_r(struct block *); 158 static int count_stmts(struct block *); 159 static int convert_code_r(struct block *); 160 #ifdef BDEBUG 161 static void opt_dump(struct block *); 162 #endif 163 164 static int n_blocks; 165 struct block **blocks; 166 static int n_edges; 167 struct edge **edges; 168 169 /* 170 * A bit vector set representation of the dominators. 171 * We round up the set size to the next power of two. 172 */ 173 static int nodewords; 174 static int edgewords; 175 struct block **levels; 176 bpf_u_int32 *space; 177 #define BITS_PER_WORD (8*sizeof(bpf_u_int32)) 178 /* 179 * True if a is in uset {p} 180 */ 181 #define SET_MEMBER(p, a) \ 182 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD))) 183 184 /* 185 * Add 'a' to uset p. 186 */ 187 #define SET_INSERT(p, a) \ 188 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD)) 189 190 /* 191 * Delete 'a' from uset p. 192 */ 193 #define SET_DELETE(p, a) \ 194 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD)) 195 196 /* 197 * a := a intersect b 198 */ 199 #define SET_INTERSECT(a, b, n)\ 200 {\ 201 register bpf_u_int32 *_x = a, *_y = b;\ 202 register int _n = n;\ 203 while (--_n >= 0) *_x++ &= *_y++;\ 204 } 205 206 /* 207 * a := a - b 208 */ 209 #define SET_SUBTRACT(a, b, n)\ 210 {\ 211 register bpf_u_int32 *_x = a, *_y = b;\ 212 register int _n = n;\ 213 while (--_n >= 0) *_x++ &=~ *_y++;\ 214 } 215 216 /* 217 * a := a union b 218 */ 219 #define SET_UNION(a, b, n)\ 220 {\ 221 register bpf_u_int32 *_x = a, *_y = b;\ 222 register int _n = n;\ 223 while (--_n >= 0) *_x++ |= *_y++;\ 224 } 225 226 static uset all_dom_sets; 227 static uset all_closure_sets; 228 static uset all_edge_sets; 229 230 #ifndef MAX 231 #define MAX(a,b) ((a)>(b)?(a):(b)) 232 #endif 233 234 static void 235 find_levels_r(b) 236 struct block *b; 237 { 238 int level; 239 240 if (isMarked(b)) 241 return; 242 243 Mark(b); 244 b->link = 0; 245 246 if (JT(b)) { 247 find_levels_r(JT(b)); 248 find_levels_r(JF(b)); 249 level = MAX(JT(b)->level, JF(b)->level) + 1; 250 } else 251 level = 0; 252 b->level = level; 253 b->link = levels[level]; 254 levels[level] = b; 255 } 256 257 /* 258 * Level graph. The levels go from 0 at the leaves to 259 * N_LEVELS at the root. The levels[] array points to the 260 * first node of the level list, whose elements are linked 261 * with the 'link' field of the struct block. 262 */ 263 static void 264 find_levels(root) 265 struct block *root; 266 { 267 memset((char *)levels, 0, n_blocks * sizeof(*levels)); 268 unMarkAll(); 269 find_levels_r(root); 270 } 271 272 /* 273 * Find dominator relationships. 274 * Assumes graph has been leveled. 275 */ 276 static void 277 find_dom(root) 278 struct block *root; 279 { 280 int i; 281 struct block *b; 282 bpf_u_int32 *x; 283 284 /* 285 * Initialize sets to contain all nodes. 286 */ 287 x = all_dom_sets; 288 i = n_blocks * nodewords; 289 while (--i >= 0) 290 *x++ = ~0; 291 /* Root starts off empty. */ 292 for (i = nodewords; --i >= 0;) 293 root->dom[i] = 0; 294 295 /* root->level is the highest level no found. */ 296 for (i = root->level; i >= 0; --i) { 297 for (b = levels[i]; b; b = b->link) { 298 SET_INSERT(b->dom, b->id); 299 if (JT(b) == 0) 300 continue; 301 SET_INTERSECT(JT(b)->dom, b->dom, nodewords); 302 SET_INTERSECT(JF(b)->dom, b->dom, nodewords); 303 } 304 } 305 } 306 307 static void 308 propedom(ep) 309 struct edge *ep; 310 { 311 SET_INSERT(ep->edom, ep->id); 312 if (ep->succ) { 313 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords); 314 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords); 315 } 316 } 317 318 /* 319 * Compute edge dominators. 320 * Assumes graph has been leveled and predecessors established. 321 */ 322 static void 323 find_edom(root) 324 struct block *root; 325 { 326 int i; 327 uset x; 328 struct block *b; 329 330 x = all_edge_sets; 331 for (i = n_edges * edgewords; --i >= 0; ) 332 x[i] = ~0; 333 334 /* root->level is the highest level no found. */ 335 memset(root->et.edom, 0, edgewords * sizeof(*(uset)0)); 336 memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0)); 337 for (i = root->level; i >= 0; --i) { 338 for (b = levels[i]; b != 0; b = b->link) { 339 propedom(&b->et); 340 propedom(&b->ef); 341 } 342 } 343 } 344 345 /* 346 * Find the backwards transitive closure of the flow graph. These sets 347 * are backwards in the sense that we find the set of nodes that reach 348 * a given node, not the set of nodes that can be reached by a node. 349 * 350 * Assumes graph has been leveled. 351 */ 352 static void 353 find_closure(root) 354 struct block *root; 355 { 356 int i; 357 struct block *b; 358 359 /* 360 * Initialize sets to contain no nodes. 361 */ 362 memset((char *)all_closure_sets, 0, 363 n_blocks * nodewords * sizeof(*all_closure_sets)); 364 365 /* root->level is the highest level no found. */ 366 for (i = root->level; i >= 0; --i) { 367 for (b = levels[i]; b; b = b->link) { 368 SET_INSERT(b->closure, b->id); 369 if (JT(b) == 0) 370 continue; 371 SET_UNION(JT(b)->closure, b->closure, nodewords); 372 SET_UNION(JF(b)->closure, b->closure, nodewords); 373 } 374 } 375 } 376 377 /* 378 * Return the register number that is used by s. If A and X are both 379 * used, return AX_ATOM. If no register is used, return -1. 380 * 381 * The implementation should probably change to an array access. 382 */ 383 static int 384 atomuse(s) 385 struct stmt *s; 386 { 387 register int c = s->code; 388 389 if (c == NOP) 390 return -1; 391 392 switch (BPF_CLASS(c)) { 393 394 case BPF_RET: 395 return (BPF_RVAL(c) == BPF_A) ? A_ATOM : 396 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1; 397 398 case BPF_LD: 399 case BPF_LDX: 400 return (BPF_MODE(c) == BPF_IND) ? X_ATOM : 401 (BPF_MODE(c) == BPF_MEM) ? s->k : -1; 402 403 case BPF_ST: 404 return A_ATOM; 405 406 case BPF_STX: 407 return X_ATOM; 408 409 case BPF_JMP: 410 case BPF_ALU: 411 if (BPF_SRC(c) == BPF_X) 412 return AX_ATOM; 413 return A_ATOM; 414 415 case BPF_MISC: 416 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM; 417 } 418 abort(); 419 /* NOTREACHED */ 420 } 421 422 /* 423 * Return the register number that is defined by 's'. We assume that 424 * a single stmt cannot define more than one register. If no register 425 * is defined, return -1. 426 * 427 * The implementation should probably change to an array access. 428 */ 429 static int 430 atomdef(s) 431 struct stmt *s; 432 { 433 if (s->code == NOP) 434 return -1; 435 436 switch (BPF_CLASS(s->code)) { 437 438 case BPF_LD: 439 case BPF_ALU: 440 return A_ATOM; 441 442 case BPF_LDX: 443 return X_ATOM; 444 445 case BPF_ST: 446 case BPF_STX: 447 return s->k; 448 449 case BPF_MISC: 450 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM; 451 } 452 return -1; 453 } 454 455 /* 456 * Compute the sets of registers used, defined, and killed by 'b'. 457 * 458 * "Used" means that a statement in 'b' uses the register before any 459 * statement in 'b' defines it, i.e. it uses the value left in 460 * that register by a predecessor block of this block. 461 * "Defined" means that a statement in 'b' defines it. 462 * "Killed" means that a statement in 'b' defines it before any 463 * statement in 'b' uses it, i.e. it kills the value left in that 464 * register by a predecessor block of this block. 465 */ 466 static void 467 compute_local_ud(b) 468 struct block *b; 469 { 470 struct slist *s; 471 atomset def = 0, use = 0, kill = 0; 472 int atom; 473 474 for (s = b->stmts; s; s = s->next) { 475 if (s->s.code == NOP) 476 continue; 477 atom = atomuse(&s->s); 478 if (atom >= 0) { 479 if (atom == AX_ATOM) { 480 if (!ATOMELEM(def, X_ATOM)) 481 use |= ATOMMASK(X_ATOM); 482 if (!ATOMELEM(def, A_ATOM)) 483 use |= ATOMMASK(A_ATOM); 484 } 485 else if (atom < N_ATOMS) { 486 if (!ATOMELEM(def, atom)) 487 use |= ATOMMASK(atom); 488 } 489 else 490 abort(); 491 } 492 atom = atomdef(&s->s); 493 if (atom >= 0) { 494 if (!ATOMELEM(use, atom)) 495 kill |= ATOMMASK(atom); 496 def |= ATOMMASK(atom); 497 } 498 } 499 if (BPF_CLASS(b->s.code) == BPF_JMP) { 500 /* 501 * XXX - what about RET? 502 */ 503 atom = atomuse(&b->s); 504 if (atom >= 0) { 505 if (atom == AX_ATOM) { 506 if (!ATOMELEM(def, X_ATOM)) 507 use |= ATOMMASK(X_ATOM); 508 if (!ATOMELEM(def, A_ATOM)) 509 use |= ATOMMASK(A_ATOM); 510 } 511 else if (atom < N_ATOMS) { 512 if (!ATOMELEM(def, atom)) 513 use |= ATOMMASK(atom); 514 } 515 else 516 abort(); 517 } 518 } 519 520 b->def = def; 521 b->kill = kill; 522 b->in_use = use; 523 } 524 525 /* 526 * Assume graph is already leveled. 527 */ 528 static void 529 find_ud(root) 530 struct block *root; 531 { 532 int i, maxlevel; 533 struct block *p; 534 535 /* 536 * root->level is the highest level no found; 537 * count down from there. 538 */ 539 maxlevel = root->level; 540 for (i = maxlevel; i >= 0; --i) 541 for (p = levels[i]; p; p = p->link) { 542 compute_local_ud(p); 543 p->out_use = 0; 544 } 545 546 for (i = 1; i <= maxlevel; ++i) { 547 for (p = levels[i]; p; p = p->link) { 548 p->out_use |= JT(p)->in_use | JF(p)->in_use; 549 p->in_use |= p->out_use &~ p->kill; 550 } 551 } 552 } 553 554 /* 555 * These data structures are used in a Cocke and Shwarz style 556 * value numbering scheme. Since the flowgraph is acyclic, 557 * exit values can be propagated from a node's predecessors 558 * provided it is uniquely defined. 559 */ 560 struct valnode { 561 int code; 562 int v0, v1; 563 int val; 564 struct valnode *next; 565 }; 566 567 #define MODULUS 213 568 static struct valnode *hashtbl[MODULUS]; 569 static int curval; 570 static int maxval; 571 572 /* Integer constants mapped with the load immediate opcode. */ 573 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L) 574 575 struct vmapinfo { 576 int is_const; 577 bpf_int32 const_val; 578 }; 579 580 struct vmapinfo *vmap; 581 struct valnode *vnode_base; 582 struct valnode *next_vnode; 583 584 static void 585 init_val() 586 { 587 curval = 0; 588 next_vnode = vnode_base; 589 memset((char *)vmap, 0, maxval * sizeof(*vmap)); 590 memset((char *)hashtbl, 0, sizeof hashtbl); 591 } 592 593 /* Because we really don't have an IR, this stuff is a little messy. */ 594 static int 595 F(code, v0, v1) 596 int code; 597 int v0, v1; 598 { 599 u_int hash; 600 int val; 601 struct valnode *p; 602 603 hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8); 604 hash %= MODULUS; 605 606 for (p = hashtbl[hash]; p; p = p->next) 607 if (p->code == code && p->v0 == v0 && p->v1 == v1) 608 return p->val; 609 610 val = ++curval; 611 if (BPF_MODE(code) == BPF_IMM && 612 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) { 613 vmap[val].const_val = v0; 614 vmap[val].is_const = 1; 615 } 616 p = next_vnode++; 617 p->val = val; 618 p->code = code; 619 p->v0 = v0; 620 p->v1 = v1; 621 p->next = hashtbl[hash]; 622 hashtbl[hash] = p; 623 624 return val; 625 } 626 627 static inline void 628 vstore(s, valp, newval, alter) 629 struct stmt *s; 630 int *valp; 631 int newval; 632 int alter; 633 { 634 if (alter && *valp == newval) 635 s->code = NOP; 636 else 637 *valp = newval; 638 } 639 640 static void 641 fold_op(s, v0, v1) 642 struct stmt *s; 643 int v0, v1; 644 { 645 bpf_u_int32 a, b; 646 647 a = vmap[v0].const_val; 648 b = vmap[v1].const_val; 649 650 switch (BPF_OP(s->code)) { 651 case BPF_ADD: 652 a += b; 653 break; 654 655 case BPF_SUB: 656 a -= b; 657 break; 658 659 case BPF_MUL: 660 a *= b; 661 break; 662 663 case BPF_DIV: 664 if (b == 0) 665 bpf_error("division by zero"); 666 a /= b; 667 break; 668 669 case BPF_AND: 670 a &= b; 671 break; 672 673 case BPF_OR: 674 a |= b; 675 break; 676 677 case BPF_LSH: 678 a <<= b; 679 break; 680 681 case BPF_RSH: 682 a >>= b; 683 break; 684 685 case BPF_NEG: 686 a = -a; 687 break; 688 689 default: 690 abort(); 691 } 692 s->k = a; 693 s->code = BPF_LD|BPF_IMM; 694 done = 0; 695 } 696 697 static inline struct slist * 698 this_op(s) 699 struct slist *s; 700 { 701 while (s != 0 && s->s.code == NOP) 702 s = s->next; 703 return s; 704 } 705 706 static void 707 opt_not(b) 708 struct block *b; 709 { 710 struct block *tmp = JT(b); 711 712 JT(b) = JF(b); 713 JF(b) = tmp; 714 } 715 716 static void 717 opt_peep(b) 718 struct block *b; 719 { 720 struct slist *s; 721 struct slist *next, *last; 722 int val; 723 724 s = b->stmts; 725 if (s == 0) 726 return; 727 728 last = s; 729 for (/*empty*/; /*empty*/; s = next) { 730 /* 731 * Skip over nops. 732 */ 733 s = this_op(s); 734 if (s == 0) 735 break; /* nothing left in the block */ 736 737 /* 738 * Find the next real instruction after that one 739 * (skipping nops). 740 */ 741 next = this_op(s->next); 742 if (next == 0) 743 break; /* no next instruction */ 744 last = next; 745 746 /* 747 * st M[k] --> st M[k] 748 * ldx M[k] tax 749 */ 750 if (s->s.code == BPF_ST && 751 next->s.code == (BPF_LDX|BPF_MEM) && 752 s->s.k == next->s.k) { 753 done = 0; 754 next->s.code = BPF_MISC|BPF_TAX; 755 } 756 /* 757 * ld #k --> ldx #k 758 * tax txa 759 */ 760 if (s->s.code == (BPF_LD|BPF_IMM) && 761 next->s.code == (BPF_MISC|BPF_TAX)) { 762 s->s.code = BPF_LDX|BPF_IMM; 763 next->s.code = BPF_MISC|BPF_TXA; 764 done = 0; 765 } 766 /* 767 * This is an ugly special case, but it happens 768 * when you say tcp[k] or udp[k] where k is a constant. 769 */ 770 if (s->s.code == (BPF_LD|BPF_IMM)) { 771 struct slist *add, *tax, *ild; 772 773 /* 774 * Check that X isn't used on exit from this 775 * block (which the optimizer might cause). 776 * We know the code generator won't generate 777 * any local dependencies. 778 */ 779 if (ATOMELEM(b->out_use, X_ATOM)) 780 continue; 781 782 /* 783 * Check that the instruction following the ldi 784 * is an addx, or it's an ldxms with an addx 785 * following it (with 0 or more nops between the 786 * ldxms and addx). 787 */ 788 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B)) 789 add = next; 790 else 791 add = this_op(next->next); 792 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X)) 793 continue; 794 795 /* 796 * Check that a tax follows that (with 0 or more 797 * nops between them). 798 */ 799 tax = this_op(add->next); 800 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX)) 801 continue; 802 803 /* 804 * Check that an ild follows that (with 0 or more 805 * nops between them). 806 */ 807 ild = this_op(tax->next); 808 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD || 809 BPF_MODE(ild->s.code) != BPF_IND) 810 continue; 811 /* 812 * We want to turn this sequence: 813 * 814 * (004) ldi #0x2 {s} 815 * (005) ldxms [14] {next} -- optional 816 * (006) addx {add} 817 * (007) tax {tax} 818 * (008) ild [x+0] {ild} 819 * 820 * into this sequence: 821 * 822 * (004) nop 823 * (005) ldxms [14] 824 * (006) nop 825 * (007) nop 826 * (008) ild [x+2] 827 * 828 * XXX We need to check that X is not 829 * subsequently used, because we want to change 830 * what'll be in it after this sequence. 831 * 832 * We know we can eliminate the accumulator 833 * modifications earlier in the sequence since 834 * it is defined by the last stmt of this sequence 835 * (i.e., the last statement of the sequence loads 836 * a value into the accumulator, so we can eliminate 837 * earlier operations on the accumulator). 838 */ 839 ild->s.k += s->s.k; 840 s->s.code = NOP; 841 add->s.code = NOP; 842 tax->s.code = NOP; 843 done = 0; 844 } 845 } 846 /* 847 * If the comparison at the end of a block is an equality 848 * comparison against a constant, and nobody uses the value 849 * we leave in the A register at the end of a block, and 850 * the operation preceding the comparison is an arithmetic 851 * operation, we can sometime optimize it away. 852 */ 853 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) && 854 !ATOMELEM(b->out_use, A_ATOM)) { 855 /* 856 * We can optimize away certain subtractions of the 857 * X register. 858 */ 859 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) { 860 val = b->val[X_ATOM]; 861 if (vmap[val].is_const) { 862 /* 863 * If we have a subtract to do a comparison, 864 * and the X register is a known constant, 865 * we can merge this value into the 866 * comparison: 867 * 868 * sub x -> nop 869 * jeq #y jeq #(x+y) 870 */ 871 b->s.k += vmap[val].const_val; 872 last->s.code = NOP; 873 done = 0; 874 } else if (b->s.k == 0) { 875 /* 876 * If the X register isn't a constant, 877 * and the comparison in the test is 878 * against 0, we can compare with the 879 * X register, instead: 880 * 881 * sub x -> nop 882 * jeq #0 jeq x 883 */ 884 last->s.code = NOP; 885 b->s.code = BPF_JMP|BPF_JEQ|BPF_X; 886 done = 0; 887 } 888 } 889 /* 890 * Likewise, a constant subtract can be simplified: 891 * 892 * sub #x -> nop 893 * jeq #y -> jeq #(x+y) 894 */ 895 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) { 896 last->s.code = NOP; 897 b->s.k += last->s.k; 898 done = 0; 899 } 900 /* 901 * And, similarly, a constant AND can be simplified 902 * if we're testing against 0, i.e.: 903 * 904 * and #k nop 905 * jeq #0 -> jset #k 906 */ 907 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) && 908 b->s.k == 0) { 909 b->s.k = last->s.k; 910 b->s.code = BPF_JMP|BPF_K|BPF_JSET; 911 last->s.code = NOP; 912 done = 0; 913 opt_not(b); 914 } 915 } 916 /* 917 * jset #0 -> never 918 * jset #ffffffff -> always 919 */ 920 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) { 921 if (b->s.k == 0) 922 JT(b) = JF(b); 923 if (b->s.k == 0xffffffff) 924 JF(b) = JT(b); 925 } 926 /* 927 * If we're comparing against the index register, and the index 928 * register is a known constant, we can just compare against that 929 * constant. 930 */ 931 val = b->val[X_ATOM]; 932 if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) { 933 bpf_int32 v = vmap[val].const_val; 934 b->s.code &= ~BPF_X; 935 b->s.k = v; 936 } 937 /* 938 * If the accumulator is a known constant, we can compute the 939 * comparison result. 940 */ 941 val = b->val[A_ATOM]; 942 if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { 943 bpf_int32 v = vmap[val].const_val; 944 switch (BPF_OP(b->s.code)) { 945 946 case BPF_JEQ: 947 v = v == b->s.k; 948 break; 949 950 case BPF_JGT: 951 v = (unsigned)v > b->s.k; 952 break; 953 954 case BPF_JGE: 955 v = (unsigned)v >= b->s.k; 956 break; 957 958 case BPF_JSET: 959 v &= b->s.k; 960 break; 961 962 default: 963 abort(); 964 } 965 if (JF(b) != JT(b)) 966 done = 0; 967 if (v) 968 JF(b) = JT(b); 969 else 970 JT(b) = JF(b); 971 } 972 } 973 974 /* 975 * Compute the symbolic value of expression of 's', and update 976 * anything it defines in the value table 'val'. If 'alter' is true, 977 * do various optimizations. This code would be cleaner if symbolic 978 * evaluation and code transformations weren't folded together. 979 */ 980 static void 981 opt_stmt(s, val, alter) 982 struct stmt *s; 983 int val[]; 984 int alter; 985 { 986 int op; 987 int v; 988 989 switch (s->code) { 990 991 case BPF_LD|BPF_ABS|BPF_W: 992 case BPF_LD|BPF_ABS|BPF_H: 993 case BPF_LD|BPF_ABS|BPF_B: 994 v = F(s->code, s->k, 0L); 995 vstore(s, &val[A_ATOM], v, alter); 996 break; 997 998 case BPF_LD|BPF_IND|BPF_W: 999 case BPF_LD|BPF_IND|BPF_H: 1000 case BPF_LD|BPF_IND|BPF_B: 1001 v = val[X_ATOM]; 1002 if (alter && vmap[v].is_const) { 1003 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code); 1004 s->k += vmap[v].const_val; 1005 v = F(s->code, s->k, 0L); 1006 done = 0; 1007 } 1008 else 1009 v = F(s->code, s->k, v); 1010 vstore(s, &val[A_ATOM], v, alter); 1011 break; 1012 1013 case BPF_LD|BPF_LEN: 1014 v = F(s->code, 0L, 0L); 1015 vstore(s, &val[A_ATOM], v, alter); 1016 break; 1017 1018 case BPF_LD|BPF_IMM: 1019 v = K(s->k); 1020 vstore(s, &val[A_ATOM], v, alter); 1021 break; 1022 1023 case BPF_LDX|BPF_IMM: 1024 v = K(s->k); 1025 vstore(s, &val[X_ATOM], v, alter); 1026 break; 1027 1028 case BPF_LDX|BPF_MSH|BPF_B: 1029 v = F(s->code, s->k, 0L); 1030 vstore(s, &val[X_ATOM], v, alter); 1031 break; 1032 1033 case BPF_ALU|BPF_NEG: 1034 if (alter && vmap[val[A_ATOM]].is_const) { 1035 s->code = BPF_LD|BPF_IMM; 1036 s->k = -vmap[val[A_ATOM]].const_val; 1037 val[A_ATOM] = K(s->k); 1038 } 1039 else 1040 val[A_ATOM] = F(s->code, val[A_ATOM], 0L); 1041 break; 1042 1043 case BPF_ALU|BPF_ADD|BPF_K: 1044 case BPF_ALU|BPF_SUB|BPF_K: 1045 case BPF_ALU|BPF_MUL|BPF_K: 1046 case BPF_ALU|BPF_DIV|BPF_K: 1047 case BPF_ALU|BPF_AND|BPF_K: 1048 case BPF_ALU|BPF_OR|BPF_K: 1049 case BPF_ALU|BPF_LSH|BPF_K: 1050 case BPF_ALU|BPF_RSH|BPF_K: 1051 op = BPF_OP(s->code); 1052 if (alter) { 1053 if (s->k == 0) { 1054 /* don't optimize away "sub #0" 1055 * as it may be needed later to 1056 * fixup the generated math code */ 1057 if (op == BPF_ADD || 1058 op == BPF_LSH || op == BPF_RSH || 1059 op == BPF_OR) { 1060 s->code = NOP; 1061 break; 1062 } 1063 if (op == BPF_MUL || op == BPF_AND) { 1064 s->code = BPF_LD|BPF_IMM; 1065 val[A_ATOM] = K(s->k); 1066 break; 1067 } 1068 } 1069 if (vmap[val[A_ATOM]].is_const) { 1070 fold_op(s, val[A_ATOM], K(s->k)); 1071 val[A_ATOM] = K(s->k); 1072 break; 1073 } 1074 } 1075 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k)); 1076 break; 1077 1078 case BPF_ALU|BPF_ADD|BPF_X: 1079 case BPF_ALU|BPF_SUB|BPF_X: 1080 case BPF_ALU|BPF_MUL|BPF_X: 1081 case BPF_ALU|BPF_DIV|BPF_X: 1082 case BPF_ALU|BPF_AND|BPF_X: 1083 case BPF_ALU|BPF_OR|BPF_X: 1084 case BPF_ALU|BPF_LSH|BPF_X: 1085 case BPF_ALU|BPF_RSH|BPF_X: 1086 op = BPF_OP(s->code); 1087 if (alter && vmap[val[X_ATOM]].is_const) { 1088 if (vmap[val[A_ATOM]].is_const) { 1089 fold_op(s, val[A_ATOM], val[X_ATOM]); 1090 val[A_ATOM] = K(s->k); 1091 } 1092 else { 1093 s->code = BPF_ALU|BPF_K|op; 1094 s->k = vmap[val[X_ATOM]].const_val; 1095 done = 0; 1096 val[A_ATOM] = 1097 F(s->code, val[A_ATOM], K(s->k)); 1098 } 1099 break; 1100 } 1101 /* 1102 * Check if we're doing something to an accumulator 1103 * that is 0, and simplify. This may not seem like 1104 * much of a simplification but it could open up further 1105 * optimizations. 1106 * XXX We could also check for mul by 1, etc. 1107 */ 1108 if (alter && vmap[val[A_ATOM]].is_const 1109 && vmap[val[A_ATOM]].const_val == 0) { 1110 if (op == BPF_ADD || op == BPF_OR) { 1111 s->code = BPF_MISC|BPF_TXA; 1112 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 1113 break; 1114 } 1115 else if (op == BPF_MUL || op == BPF_DIV || 1116 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) { 1117 s->code = BPF_LD|BPF_IMM; 1118 s->k = 0; 1119 vstore(s, &val[A_ATOM], K(s->k), alter); 1120 break; 1121 } 1122 else if (op == BPF_NEG) { 1123 s->code = NOP; 1124 break; 1125 } 1126 } 1127 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]); 1128 break; 1129 1130 case BPF_MISC|BPF_TXA: 1131 vstore(s, &val[A_ATOM], val[X_ATOM], alter); 1132 break; 1133 1134 case BPF_LD|BPF_MEM: 1135 v = val[s->k]; 1136 if (alter && vmap[v].is_const) { 1137 s->code = BPF_LD|BPF_IMM; 1138 s->k = vmap[v].const_val; 1139 done = 0; 1140 } 1141 vstore(s, &val[A_ATOM], v, alter); 1142 break; 1143 1144 case BPF_MISC|BPF_TAX: 1145 vstore(s, &val[X_ATOM], val[A_ATOM], alter); 1146 break; 1147 1148 case BPF_LDX|BPF_MEM: 1149 v = val[s->k]; 1150 if (alter && vmap[v].is_const) { 1151 s->code = BPF_LDX|BPF_IMM; 1152 s->k = vmap[v].const_val; 1153 done = 0; 1154 } 1155 vstore(s, &val[X_ATOM], v, alter); 1156 break; 1157 1158 case BPF_ST: 1159 vstore(s, &val[s->k], val[A_ATOM], alter); 1160 break; 1161 1162 case BPF_STX: 1163 vstore(s, &val[s->k], val[X_ATOM], alter); 1164 break; 1165 } 1166 } 1167 1168 static void 1169 deadstmt(s, last) 1170 register struct stmt *s; 1171 register struct stmt *last[]; 1172 { 1173 register int atom; 1174 1175 atom = atomuse(s); 1176 if (atom >= 0) { 1177 if (atom == AX_ATOM) { 1178 last[X_ATOM] = 0; 1179 last[A_ATOM] = 0; 1180 } 1181 else 1182 last[atom] = 0; 1183 } 1184 atom = atomdef(s); 1185 if (atom >= 0) { 1186 if (last[atom]) { 1187 done = 0; 1188 last[atom]->code = NOP; 1189 } 1190 last[atom] = s; 1191 } 1192 } 1193 1194 static void 1195 opt_deadstores(b) 1196 register struct block *b; 1197 { 1198 register struct slist *s; 1199 register int atom; 1200 struct stmt *last[N_ATOMS]; 1201 1202 memset((char *)last, 0, sizeof last); 1203 1204 for (s = b->stmts; s != 0; s = s->next) 1205 deadstmt(&s->s, last); 1206 deadstmt(&b->s, last); 1207 1208 for (atom = 0; atom < N_ATOMS; ++atom) 1209 if (last[atom] && !ATOMELEM(b->out_use, atom)) { 1210 last[atom]->code = NOP; 1211 done = 0; 1212 } 1213 } 1214 1215 static void 1216 opt_blk(b, do_stmts) 1217 struct block *b; 1218 int do_stmts; 1219 { 1220 struct slist *s; 1221 struct edge *p; 1222 int i; 1223 bpf_int32 aval, xval; 1224 1225 #if 0 1226 for (s = b->stmts; s && s->next; s = s->next) 1227 if (BPF_CLASS(s->s.code) == BPF_JMP) { 1228 do_stmts = 0; 1229 break; 1230 } 1231 #endif 1232 1233 /* 1234 * Initialize the atom values. 1235 */ 1236 p = b->in_edges; 1237 if (p == 0) { 1238 /* 1239 * We have no predecessors, so everything is undefined 1240 * upon entry to this block. 1241 */ 1242 memset((char *)b->val, 0, sizeof(b->val)); 1243 } else { 1244 /* 1245 * Inherit values from our predecessors. 1246 * 1247 * First, get the values from the predecessor along the 1248 * first edge leading to this node. 1249 */ 1250 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val)); 1251 /* 1252 * Now look at all the other nodes leading to this node. 1253 * If, for the predecessor along that edge, a register 1254 * has a different value from the one we have (i.e., 1255 * control paths are merging, and the merging paths 1256 * assign different values to that register), give the 1257 * register the undefined value of 0. 1258 */ 1259 while ((p = p->next) != NULL) { 1260 for (i = 0; i < N_ATOMS; ++i) 1261 if (b->val[i] != p->pred->val[i]) 1262 b->val[i] = 0; 1263 } 1264 } 1265 aval = b->val[A_ATOM]; 1266 xval = b->val[X_ATOM]; 1267 for (s = b->stmts; s; s = s->next) 1268 opt_stmt(&s->s, b->val, do_stmts); 1269 1270 /* 1271 * This is a special case: if we don't use anything from this 1272 * block, and we load the accumulator or index register with a 1273 * value that is already there, or if this block is a return, 1274 * eliminate all the statements. 1275 * 1276 * XXX - what if it does a store? 1277 * 1278 * XXX - why does it matter whether we use anything from this 1279 * block? If the accumulator or index register doesn't change 1280 * its value, isn't that OK even if we use that value? 1281 * 1282 * XXX - if we load the accumulator with a different value, 1283 * and the block ends with a conditional branch, we obviously 1284 * can't eliminate it, as the branch depends on that value. 1285 * For the index register, the conditional branch only depends 1286 * on the index register value if the test is against the index 1287 * register value rather than a constant; if nothing uses the 1288 * value we put into the index register, and we're not testing 1289 * against the index register's value, and there aren't any 1290 * other problems that would keep us from eliminating this 1291 * block, can we eliminate it? 1292 */ 1293 if (do_stmts && 1294 ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval && 1295 xval != 0 && b->val[X_ATOM] == xval) || 1296 BPF_CLASS(b->s.code) == BPF_RET)) { 1297 if (b->stmts != 0) { 1298 b->stmts = 0; 1299 done = 0; 1300 } 1301 } else { 1302 opt_peep(b); 1303 opt_deadstores(b); 1304 } 1305 /* 1306 * Set up values for branch optimizer. 1307 */ 1308 if (BPF_SRC(b->s.code) == BPF_K) 1309 b->oval = K(b->s.k); 1310 else 1311 b->oval = b->val[X_ATOM]; 1312 b->et.code = b->s.code; 1313 b->ef.code = -b->s.code; 1314 } 1315 1316 /* 1317 * Return true if any register that is used on exit from 'succ', has 1318 * an exit value that is different from the corresponding exit value 1319 * from 'b'. 1320 */ 1321 static int 1322 use_conflict(b, succ) 1323 struct block *b, *succ; 1324 { 1325 int atom; 1326 atomset use = succ->out_use; 1327 1328 if (use == 0) 1329 return 0; 1330 1331 for (atom = 0; atom < N_ATOMS; ++atom) 1332 if (ATOMELEM(use, atom)) 1333 if (b->val[atom] != succ->val[atom]) 1334 return 1; 1335 return 0; 1336 } 1337 1338 static struct block * 1339 fold_edge(child, ep) 1340 struct block *child; 1341 struct edge *ep; 1342 { 1343 int sense; 1344 int aval0, aval1, oval0, oval1; 1345 int code = ep->code; 1346 1347 if (code < 0) { 1348 code = -code; 1349 sense = 0; 1350 } else 1351 sense = 1; 1352 1353 if (child->s.code != code) 1354 return 0; 1355 1356 aval0 = child->val[A_ATOM]; 1357 oval0 = child->oval; 1358 aval1 = ep->pred->val[A_ATOM]; 1359 oval1 = ep->pred->oval; 1360 1361 if (aval0 != aval1) 1362 return 0; 1363 1364 if (oval0 == oval1) 1365 /* 1366 * The operands of the branch instructions are 1367 * identical, so the result is true if a true 1368 * branch was taken to get here, otherwise false. 1369 */ 1370 return sense ? JT(child) : JF(child); 1371 1372 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K)) 1373 /* 1374 * At this point, we only know the comparison if we 1375 * came down the true branch, and it was an equality 1376 * comparison with a constant. 1377 * 1378 * I.e., if we came down the true branch, and the branch 1379 * was an equality comparison with a constant, we know the 1380 * accumulator contains that constant. If we came down 1381 * the false branch, or the comparison wasn't with a 1382 * constant, we don't know what was in the accumulator. 1383 * 1384 * We rely on the fact that distinct constants have distinct 1385 * value numbers. 1386 */ 1387 return JF(child); 1388 1389 return 0; 1390 } 1391 1392 static void 1393 opt_j(ep) 1394 struct edge *ep; 1395 { 1396 register int i, k; 1397 register struct block *target; 1398 1399 if (JT(ep->succ) == 0) 1400 return; 1401 1402 if (JT(ep->succ) == JF(ep->succ)) { 1403 /* 1404 * Common branch targets can be eliminated, provided 1405 * there is no data dependency. 1406 */ 1407 if (!use_conflict(ep->pred, ep->succ->et.succ)) { 1408 done = 0; 1409 ep->succ = JT(ep->succ); 1410 } 1411 } 1412 /* 1413 * For each edge dominator that matches the successor of this 1414 * edge, promote the edge successor to the its grandchild. 1415 * 1416 * XXX We violate the set abstraction here in favor a reasonably 1417 * efficient loop. 1418 */ 1419 top: 1420 for (i = 0; i < edgewords; ++i) { 1421 register bpf_u_int32 x = ep->edom[i]; 1422 1423 while (x != 0) { 1424 k = ffs(x) - 1; 1425 x &=~ (1 << k); 1426 k += i * BITS_PER_WORD; 1427 1428 target = fold_edge(ep->succ, edges[k]); 1429 /* 1430 * Check that there is no data dependency between 1431 * nodes that will be violated if we move the edge. 1432 */ 1433 if (target != 0 && !use_conflict(ep->pred, target)) { 1434 done = 0; 1435 ep->succ = target; 1436 if (JT(target) != 0) 1437 /* 1438 * Start over unless we hit a leaf. 1439 */ 1440 goto top; 1441 return; 1442 } 1443 } 1444 } 1445 } 1446 1447 1448 static void 1449 or_pullup(b) 1450 struct block *b; 1451 { 1452 int val, at_top; 1453 struct block *pull; 1454 struct block **diffp, **samep; 1455 struct edge *ep; 1456 1457 ep = b->in_edges; 1458 if (ep == 0) 1459 return; 1460 1461 /* 1462 * Make sure each predecessor loads the same value. 1463 * XXX why? 1464 */ 1465 val = ep->pred->val[A_ATOM]; 1466 for (ep = ep->next; ep != 0; ep = ep->next) 1467 if (val != ep->pred->val[A_ATOM]) 1468 return; 1469 1470 if (JT(b->in_edges->pred) == b) 1471 diffp = &JT(b->in_edges->pred); 1472 else 1473 diffp = &JF(b->in_edges->pred); 1474 1475 at_top = 1; 1476 while (1) { 1477 if (*diffp == 0) 1478 return; 1479 1480 if (JT(*diffp) != JT(b)) 1481 return; 1482 1483 if (!SET_MEMBER((*diffp)->dom, b->id)) 1484 return; 1485 1486 if ((*diffp)->val[A_ATOM] != val) 1487 break; 1488 1489 diffp = &JF(*diffp); 1490 at_top = 0; 1491 } 1492 samep = &JF(*diffp); 1493 while (1) { 1494 if (*samep == 0) 1495 return; 1496 1497 if (JT(*samep) != JT(b)) 1498 return; 1499 1500 if (!SET_MEMBER((*samep)->dom, b->id)) 1501 return; 1502 1503 if ((*samep)->val[A_ATOM] == val) 1504 break; 1505 1506 /* XXX Need to check that there are no data dependencies 1507 between dp0 and dp1. Currently, the code generator 1508 will not produce such dependencies. */ 1509 samep = &JF(*samep); 1510 } 1511 #ifdef notdef 1512 /* XXX This doesn't cover everything. */ 1513 for (i = 0; i < N_ATOMS; ++i) 1514 if ((*samep)->val[i] != pred->val[i]) 1515 return; 1516 #endif 1517 /* Pull up the node. */ 1518 pull = *samep; 1519 *samep = JF(pull); 1520 JF(pull) = *diffp; 1521 1522 /* 1523 * At the top of the chain, each predecessor needs to point at the 1524 * pulled up node. Inside the chain, there is only one predecessor 1525 * to worry about. 1526 */ 1527 if (at_top) { 1528 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1529 if (JT(ep->pred) == b) 1530 JT(ep->pred) = pull; 1531 else 1532 JF(ep->pred) = pull; 1533 } 1534 } 1535 else 1536 *diffp = pull; 1537 1538 done = 0; 1539 } 1540 1541 static void 1542 and_pullup(b) 1543 struct block *b; 1544 { 1545 int val, at_top; 1546 struct block *pull; 1547 struct block **diffp, **samep; 1548 struct edge *ep; 1549 1550 ep = b->in_edges; 1551 if (ep == 0) 1552 return; 1553 1554 /* 1555 * Make sure each predecessor loads the same value. 1556 */ 1557 val = ep->pred->val[A_ATOM]; 1558 for (ep = ep->next; ep != 0; ep = ep->next) 1559 if (val != ep->pred->val[A_ATOM]) 1560 return; 1561 1562 if (JT(b->in_edges->pred) == b) 1563 diffp = &JT(b->in_edges->pred); 1564 else 1565 diffp = &JF(b->in_edges->pred); 1566 1567 at_top = 1; 1568 while (1) { 1569 if (*diffp == 0) 1570 return; 1571 1572 if (JF(*diffp) != JF(b)) 1573 return; 1574 1575 if (!SET_MEMBER((*diffp)->dom, b->id)) 1576 return; 1577 1578 if ((*diffp)->val[A_ATOM] != val) 1579 break; 1580 1581 diffp = &JT(*diffp); 1582 at_top = 0; 1583 } 1584 samep = &JT(*diffp); 1585 while (1) { 1586 if (*samep == 0) 1587 return; 1588 1589 if (JF(*samep) != JF(b)) 1590 return; 1591 1592 if (!SET_MEMBER((*samep)->dom, b->id)) 1593 return; 1594 1595 if ((*samep)->val[A_ATOM] == val) 1596 break; 1597 1598 /* XXX Need to check that there are no data dependencies 1599 between diffp and samep. Currently, the code generator 1600 will not produce such dependencies. */ 1601 samep = &JT(*samep); 1602 } 1603 #ifdef notdef 1604 /* XXX This doesn't cover everything. */ 1605 for (i = 0; i < N_ATOMS; ++i) 1606 if ((*samep)->val[i] != pred->val[i]) 1607 return; 1608 #endif 1609 /* Pull up the node. */ 1610 pull = *samep; 1611 *samep = JT(pull); 1612 JT(pull) = *diffp; 1613 1614 /* 1615 * At the top of the chain, each predecessor needs to point at the 1616 * pulled up node. Inside the chain, there is only one predecessor 1617 * to worry about. 1618 */ 1619 if (at_top) { 1620 for (ep = b->in_edges; ep != 0; ep = ep->next) { 1621 if (JT(ep->pred) == b) 1622 JT(ep->pred) = pull; 1623 else 1624 JF(ep->pred) = pull; 1625 } 1626 } 1627 else 1628 *diffp = pull; 1629 1630 done = 0; 1631 } 1632 1633 static void 1634 opt_blks(root, do_stmts) 1635 struct block *root; 1636 int do_stmts; 1637 { 1638 int i, maxlevel; 1639 struct block *p; 1640 1641 init_val(); 1642 maxlevel = root->level; 1643 1644 find_inedges(root); 1645 for (i = maxlevel; i >= 0; --i) 1646 for (p = levels[i]; p; p = p->link) 1647 opt_blk(p, do_stmts); 1648 1649 if (do_stmts) 1650 /* 1651 * No point trying to move branches; it can't possibly 1652 * make a difference at this point. 1653 */ 1654 return; 1655 1656 for (i = 1; i <= maxlevel; ++i) { 1657 for (p = levels[i]; p; p = p->link) { 1658 opt_j(&p->et); 1659 opt_j(&p->ef); 1660 } 1661 } 1662 1663 find_inedges(root); 1664 for (i = 1; i <= maxlevel; ++i) { 1665 for (p = levels[i]; p; p = p->link) { 1666 or_pullup(p); 1667 and_pullup(p); 1668 } 1669 } 1670 } 1671 1672 static inline void 1673 link_inedge(parent, child) 1674 struct edge *parent; 1675 struct block *child; 1676 { 1677 parent->next = child->in_edges; 1678 child->in_edges = parent; 1679 } 1680 1681 static void 1682 find_inedges(root) 1683 struct block *root; 1684 { 1685 int i; 1686 struct block *b; 1687 1688 for (i = 0; i < n_blocks; ++i) 1689 blocks[i]->in_edges = 0; 1690 1691 /* 1692 * Traverse the graph, adding each edge to the predecessor 1693 * list of its successors. Skip the leaves (i.e. level 0). 1694 */ 1695 for (i = root->level; i > 0; --i) { 1696 for (b = levels[i]; b != 0; b = b->link) { 1697 link_inedge(&b->et, JT(b)); 1698 link_inedge(&b->ef, JF(b)); 1699 } 1700 } 1701 } 1702 1703 static void 1704 opt_root(b) 1705 struct block **b; 1706 { 1707 struct slist *tmp, *s; 1708 1709 s = (*b)->stmts; 1710 (*b)->stmts = 0; 1711 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b)) 1712 *b = JT(*b); 1713 1714 tmp = (*b)->stmts; 1715 if (tmp != 0) 1716 sappend(s, tmp); 1717 (*b)->stmts = s; 1718 1719 /* 1720 * If the root node is a return, then there is no 1721 * point executing any statements (since the bpf machine 1722 * has no side effects). 1723 */ 1724 if (BPF_CLASS((*b)->s.code) == BPF_RET) 1725 (*b)->stmts = 0; 1726 } 1727 1728 static void 1729 opt_loop(root, do_stmts) 1730 struct block *root; 1731 int do_stmts; 1732 { 1733 1734 #ifdef BDEBUG 1735 if (dflag > 1) { 1736 printf("opt_loop(root, %d) begin\n", do_stmts); 1737 opt_dump(root); 1738 } 1739 #endif 1740 do { 1741 done = 1; 1742 find_levels(root); 1743 find_dom(root); 1744 find_closure(root); 1745 find_ud(root); 1746 find_edom(root); 1747 opt_blks(root, do_stmts); 1748 #ifdef BDEBUG 1749 if (dflag > 1) { 1750 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done); 1751 opt_dump(root); 1752 } 1753 #endif 1754 } while (!done); 1755 } 1756 1757 /* 1758 * Optimize the filter code in its dag representation. 1759 */ 1760 void 1761 bpf_optimize(rootp) 1762 struct block **rootp; 1763 { 1764 struct block *root; 1765 1766 root = *rootp; 1767 1768 opt_init(root); 1769 opt_loop(root, 0); 1770 opt_loop(root, 1); 1771 intern_blocks(root); 1772 #ifdef BDEBUG 1773 if (dflag > 1) { 1774 printf("after intern_blocks()\n"); 1775 opt_dump(root); 1776 } 1777 #endif 1778 opt_root(rootp); 1779 #ifdef BDEBUG 1780 if (dflag > 1) { 1781 printf("after opt_root()\n"); 1782 opt_dump(root); 1783 } 1784 #endif 1785 opt_cleanup(); 1786 } 1787 1788 static void 1789 make_marks(p) 1790 struct block *p; 1791 { 1792 if (!isMarked(p)) { 1793 Mark(p); 1794 if (BPF_CLASS(p->s.code) != BPF_RET) { 1795 make_marks(JT(p)); 1796 make_marks(JF(p)); 1797 } 1798 } 1799 } 1800 1801 /* 1802 * Mark code array such that isMarked(i) is true 1803 * only for nodes that are alive. 1804 */ 1805 static void 1806 mark_code(p) 1807 struct block *p; 1808 { 1809 cur_mark += 1; 1810 make_marks(p); 1811 } 1812 1813 /* 1814 * True iff the two stmt lists load the same value from the packet into 1815 * the accumulator. 1816 */ 1817 static int 1818 eq_slist(x, y) 1819 struct slist *x, *y; 1820 { 1821 while (1) { 1822 while (x && x->s.code == NOP) 1823 x = x->next; 1824 while (y && y->s.code == NOP) 1825 y = y->next; 1826 if (x == 0) 1827 return y == 0; 1828 if (y == 0) 1829 return x == 0; 1830 if (x->s.code != y->s.code || x->s.k != y->s.k) 1831 return 0; 1832 x = x->next; 1833 y = y->next; 1834 } 1835 } 1836 1837 static inline int 1838 eq_blk(b0, b1) 1839 struct block *b0, *b1; 1840 { 1841 if (b0->s.code == b1->s.code && 1842 b0->s.k == b1->s.k && 1843 b0->et.succ == b1->et.succ && 1844 b0->ef.succ == b1->ef.succ) 1845 return eq_slist(b0->stmts, b1->stmts); 1846 return 0; 1847 } 1848 1849 static void 1850 intern_blocks(root) 1851 struct block *root; 1852 { 1853 struct block *p; 1854 int i, j; 1855 int done1; /* don't shadow global */ 1856 top: 1857 done1 = 1; 1858 for (i = 0; i < n_blocks; ++i) 1859 blocks[i]->link = 0; 1860 1861 mark_code(root); 1862 1863 for (i = n_blocks - 1; --i >= 0; ) { 1864 if (!isMarked(blocks[i])) 1865 continue; 1866 for (j = i + 1; j < n_blocks; ++j) { 1867 if (!isMarked(blocks[j])) 1868 continue; 1869 if (eq_blk(blocks[i], blocks[j])) { 1870 blocks[i]->link = blocks[j]->link ? 1871 blocks[j]->link : blocks[j]; 1872 break; 1873 } 1874 } 1875 } 1876 for (i = 0; i < n_blocks; ++i) { 1877 p = blocks[i]; 1878 if (JT(p) == 0) 1879 continue; 1880 if (JT(p)->link) { 1881 done1 = 0; 1882 JT(p) = JT(p)->link; 1883 } 1884 if (JF(p)->link) { 1885 done1 = 0; 1886 JF(p) = JF(p)->link; 1887 } 1888 } 1889 if (!done1) 1890 goto top; 1891 } 1892 1893 static void 1894 opt_cleanup() 1895 { 1896 free((void *)vnode_base); 1897 free((void *)vmap); 1898 free((void *)edges); 1899 free((void *)space); 1900 free((void *)levels); 1901 free((void *)blocks); 1902 } 1903 1904 /* 1905 * Return the number of stmts in 's'. 1906 */ 1907 static int 1908 slength(s) 1909 struct slist *s; 1910 { 1911 int n = 0; 1912 1913 for (; s; s = s->next) 1914 if (s->s.code != NOP) 1915 ++n; 1916 return n; 1917 } 1918 1919 /* 1920 * Return the number of nodes reachable by 'p'. 1921 * All nodes should be initially unmarked. 1922 */ 1923 static int 1924 count_blocks(p) 1925 struct block *p; 1926 { 1927 if (p == 0 || isMarked(p)) 1928 return 0; 1929 Mark(p); 1930 return count_blocks(JT(p)) + count_blocks(JF(p)) + 1; 1931 } 1932 1933 /* 1934 * Do a depth first search on the flow graph, numbering the 1935 * the basic blocks, and entering them into the 'blocks' array.` 1936 */ 1937 static void 1938 number_blks_r(p) 1939 struct block *p; 1940 { 1941 int n; 1942 1943 if (p == 0 || isMarked(p)) 1944 return; 1945 1946 Mark(p); 1947 n = n_blocks++; 1948 p->id = n; 1949 blocks[n] = p; 1950 1951 number_blks_r(JT(p)); 1952 number_blks_r(JF(p)); 1953 } 1954 1955 /* 1956 * Return the number of stmts in the flowgraph reachable by 'p'. 1957 * The nodes should be unmarked before calling. 1958 * 1959 * Note that "stmts" means "instructions", and that this includes 1960 * 1961 * side-effect statements in 'p' (slength(p->stmts)); 1962 * 1963 * statements in the true branch from 'p' (count_stmts(JT(p))); 1964 * 1965 * statements in the false branch from 'p' (count_stmts(JF(p))); 1966 * 1967 * the conditional jump itself (1); 1968 * 1969 * an extra long jump if the true branch requires it (p->longjt); 1970 * 1971 * an extra long jump if the false branch requires it (p->longjf). 1972 */ 1973 static int 1974 count_stmts(p) 1975 struct block *p; 1976 { 1977 int n; 1978 1979 if (p == 0 || isMarked(p)) 1980 return 0; 1981 Mark(p); 1982 n = count_stmts(JT(p)) + count_stmts(JF(p)); 1983 return slength(p->stmts) + n + 1 + p->longjt + p->longjf; 1984 } 1985 1986 /* 1987 * Allocate memory. All allocation is done before optimization 1988 * is begun. A linear bound on the size of all data structures is computed 1989 * from the total number of blocks and/or statements. 1990 */ 1991 static void 1992 opt_init(root) 1993 struct block *root; 1994 { 1995 bpf_u_int32 *p; 1996 int i, n, max_stmts; 1997 1998 /* 1999 * First, count the blocks, so we can malloc an array to map 2000 * block number to block. Then, put the blocks into the array. 2001 */ 2002 unMarkAll(); 2003 n = count_blocks(root); 2004 blocks = (struct block **)calloc(n, sizeof(*blocks)); 2005 if (blocks == NULL) 2006 bpf_error("malloc"); 2007 unMarkAll(); 2008 n_blocks = 0; 2009 number_blks_r(root); 2010 2011 n_edges = 2 * n_blocks; 2012 edges = (struct edge **)calloc(n_edges, sizeof(*edges)); 2013 if (edges == NULL) 2014 bpf_error("malloc"); 2015 2016 /* 2017 * The number of levels is bounded by the number of nodes. 2018 */ 2019 levels = (struct block **)calloc(n_blocks, sizeof(*levels)); 2020 if (levels == NULL) 2021 bpf_error("malloc"); 2022 2023 edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1; 2024 nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1; 2025 2026 /* XXX */ 2027 space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space) 2028 + n_edges * edgewords * sizeof(*space)); 2029 if (space == NULL) 2030 bpf_error("malloc"); 2031 p = space; 2032 all_dom_sets = p; 2033 for (i = 0; i < n; ++i) { 2034 blocks[i]->dom = p; 2035 p += nodewords; 2036 } 2037 all_closure_sets = p; 2038 for (i = 0; i < n; ++i) { 2039 blocks[i]->closure = p; 2040 p += nodewords; 2041 } 2042 all_edge_sets = p; 2043 for (i = 0; i < n; ++i) { 2044 register struct block *b = blocks[i]; 2045 2046 b->et.edom = p; 2047 p += edgewords; 2048 b->ef.edom = p; 2049 p += edgewords; 2050 b->et.id = i; 2051 edges[i] = &b->et; 2052 b->ef.id = n_blocks + i; 2053 edges[n_blocks + i] = &b->ef; 2054 b->et.pred = b; 2055 b->ef.pred = b; 2056 } 2057 max_stmts = 0; 2058 for (i = 0; i < n; ++i) 2059 max_stmts += slength(blocks[i]->stmts) + 1; 2060 /* 2061 * We allocate at most 3 value numbers per statement, 2062 * so this is an upper bound on the number of valnodes 2063 * we'll need. 2064 */ 2065 maxval = 3 * max_stmts; 2066 vmap = (struct vmapinfo *)calloc(maxval, sizeof(*vmap)); 2067 vnode_base = (struct valnode *)calloc(maxval, sizeof(*vnode_base)); 2068 if (vmap == NULL || vnode_base == NULL) 2069 bpf_error("malloc"); 2070 } 2071 2072 /* 2073 * Some pointers used to convert the basic block form of the code, 2074 * into the array form that BPF requires. 'fstart' will point to 2075 * the malloc'd array while 'ftail' is used during the recursive traversal. 2076 */ 2077 static struct bpf_insn *fstart; 2078 static struct bpf_insn *ftail; 2079 2080 #ifdef BDEBUG 2081 int bids[1000]; 2082 #endif 2083 2084 /* 2085 * Returns true if successful. Returns false if a branch has 2086 * an offset that is too large. If so, we have marked that 2087 * branch so that on a subsequent iteration, it will be treated 2088 * properly. 2089 */ 2090 static int 2091 convert_code_r(p) 2092 struct block *p; 2093 { 2094 struct bpf_insn *dst; 2095 struct slist *src; 2096 int slen; 2097 u_int off; 2098 int extrajmps; /* number of extra jumps inserted */ 2099 struct slist **offset = NULL; 2100 2101 if (p == 0 || isMarked(p)) 2102 return (1); 2103 Mark(p); 2104 2105 if (convert_code_r(JF(p)) == 0) 2106 return (0); 2107 if (convert_code_r(JT(p)) == 0) 2108 return (0); 2109 2110 slen = slength(p->stmts); 2111 dst = ftail -= (slen + 1 + p->longjt + p->longjf); 2112 /* inflate length by any extra jumps */ 2113 2114 p->offset = dst - fstart; 2115 2116 /* generate offset[] for convenience */ 2117 if (slen) { 2118 offset = (struct slist **)calloc(slen, sizeof(struct slist *)); 2119 if (!offset) { 2120 bpf_error("not enough core"); 2121 /*NOTREACHED*/ 2122 } 2123 } 2124 src = p->stmts; 2125 for (off = 0; off < slen && src; off++) { 2126 #if 0 2127 printf("off=%d src=%x\n", off, src); 2128 #endif 2129 offset[off] = src; 2130 src = src->next; 2131 } 2132 2133 off = 0; 2134 for (src = p->stmts; src; src = src->next) { 2135 if (src->s.code == NOP) 2136 continue; 2137 dst->code = (u_short)src->s.code; 2138 dst->k = src->s.k; 2139 2140 /* fill block-local relative jump */ 2141 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) { 2142 #if 0 2143 if (src->s.jt || src->s.jf) { 2144 bpf_error("illegal jmp destination"); 2145 /*NOTREACHED*/ 2146 } 2147 #endif 2148 goto filled; 2149 } 2150 if (off == slen - 2) /*???*/ 2151 goto filled; 2152 2153 { 2154 int i; 2155 int jt, jf; 2156 const char *ljerr = "%s for block-local relative jump: off=%d"; 2157 2158 #if 0 2159 printf("code=%x off=%d %x %x\n", src->s.code, 2160 off, src->s.jt, src->s.jf); 2161 #endif 2162 2163 if (!src->s.jt || !src->s.jf) { 2164 bpf_error(ljerr, "no jmp destination", off); 2165 /*NOTREACHED*/ 2166 } 2167 2168 jt = jf = 0; 2169 for (i = 0; i < slen; i++) { 2170 if (offset[i] == src->s.jt) { 2171 if (jt) { 2172 bpf_error(ljerr, "multiple matches", off); 2173 /*NOTREACHED*/ 2174 } 2175 2176 dst->jt = i - off - 1; 2177 jt++; 2178 } 2179 if (offset[i] == src->s.jf) { 2180 if (jf) { 2181 bpf_error(ljerr, "multiple matches", off); 2182 /*NOTREACHED*/ 2183 } 2184 dst->jf = i - off - 1; 2185 jf++; 2186 } 2187 } 2188 if (!jt || !jf) { 2189 bpf_error(ljerr, "no destination found", off); 2190 /*NOTREACHED*/ 2191 } 2192 } 2193 filled: 2194 ++dst; 2195 ++off; 2196 } 2197 if (offset) 2198 free(offset); 2199 2200 #ifdef BDEBUG 2201 bids[dst - fstart] = p->id + 1; 2202 #endif 2203 dst->code = (u_short)p->s.code; 2204 dst->k = p->s.k; 2205 if (JT(p)) { 2206 extrajmps = 0; 2207 off = JT(p)->offset - (p->offset + slen) - 1; 2208 if (off >= 256) { 2209 /* offset too large for branch, must add a jump */ 2210 if (p->longjt == 0) { 2211 /* mark this instruction and retry */ 2212 p->longjt++; 2213 return(0); 2214 } 2215 /* branch if T to following jump */ 2216 dst->jt = extrajmps; 2217 extrajmps++; 2218 dst[extrajmps].code = BPF_JMP|BPF_JA; 2219 dst[extrajmps].k = off - extrajmps; 2220 } 2221 else 2222 dst->jt = off; 2223 off = JF(p)->offset - (p->offset + slen) - 1; 2224 if (off >= 256) { 2225 /* offset too large for branch, must add a jump */ 2226 if (p->longjf == 0) { 2227 /* mark this instruction and retry */ 2228 p->longjf++; 2229 return(0); 2230 } 2231 /* branch if F to following jump */ 2232 /* if two jumps are inserted, F goes to second one */ 2233 dst->jf = extrajmps; 2234 extrajmps++; 2235 dst[extrajmps].code = BPF_JMP|BPF_JA; 2236 dst[extrajmps].k = off - extrajmps; 2237 } 2238 else 2239 dst->jf = off; 2240 } 2241 return (1); 2242 } 2243 2244 2245 /* 2246 * Convert flowgraph intermediate representation to the 2247 * BPF array representation. Set *lenp to the number of instructions. 2248 * 2249 * This routine does *NOT* leak the memory pointed to by fp. It *must 2250 * not* do free(fp) before returning fp; doing so would make no sense, 2251 * as the BPF array pointed to by the return value of icode_to_fcode() 2252 * must be valid - it's being returned for use in a bpf_program structure. 2253 * 2254 * If it appears that icode_to_fcode() is leaking, the problem is that 2255 * the program using pcap_compile() is failing to free the memory in 2256 * the BPF program when it's done - the leak is in the program, not in 2257 * the routine that happens to be allocating the memory. (By analogy, if 2258 * a program calls fopen() without ever calling fclose() on the FILE *, 2259 * it will leak the FILE structure; the leak is not in fopen(), it's in 2260 * the program.) Change the program to use pcap_freecode() when it's 2261 * done with the filter program. See the pcap man page. 2262 */ 2263 struct bpf_insn * 2264 icode_to_fcode(root, lenp) 2265 struct block *root; 2266 int *lenp; 2267 { 2268 int n; 2269 struct bpf_insn *fp; 2270 2271 /* 2272 * Loop doing convert_code_r() until no branches remain 2273 * with too-large offsets. 2274 */ 2275 while (1) { 2276 unMarkAll(); 2277 n = *lenp = count_stmts(root); 2278 2279 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n); 2280 if (fp == NULL) 2281 bpf_error("malloc"); 2282 memset((char *)fp, 0, sizeof(*fp) * n); 2283 fstart = fp; 2284 ftail = fp + n; 2285 2286 unMarkAll(); 2287 if (convert_code_r(root)) 2288 break; 2289 free(fp); 2290 } 2291 2292 return fp; 2293 } 2294 2295 /* 2296 * Make a copy of a BPF program and put it in the "fcode" member of 2297 * a "pcap_t". 2298 * 2299 * If we fail to allocate memory for the copy, fill in the "errbuf" 2300 * member of the "pcap_t" with an error message, and return -1; 2301 * otherwise, return 0. 2302 */ 2303 int 2304 install_bpf_program(pcap_t *p, struct bpf_program *fp) 2305 { 2306 size_t prog_size; 2307 2308 /* 2309 * Validate the program. 2310 */ 2311 if (!bpf_validate(fp->bf_insns, fp->bf_len)) { 2312 snprintf(p->errbuf, sizeof(p->errbuf), 2313 "BPF program is not valid"); 2314 return (-1); 2315 } 2316 2317 /* 2318 * Free up any already installed program. 2319 */ 2320 pcap_freecode(&p->fcode); 2321 2322 prog_size = sizeof(*fp->bf_insns) * fp->bf_len; 2323 p->fcode.bf_len = fp->bf_len; 2324 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size); 2325 if (p->fcode.bf_insns == NULL) { 2326 snprintf(p->errbuf, sizeof(p->errbuf), 2327 "malloc: %s", pcap_strerror(errno)); 2328 return (-1); 2329 } 2330 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size); 2331 return (0); 2332 } 2333 2334 #ifdef BDEBUG 2335 static void 2336 opt_dump(root) 2337 struct block *root; 2338 { 2339 struct bpf_program f; 2340 2341 memset(bids, 0, sizeof bids); 2342 f.bf_insns = icode_to_fcode(root, &f.bf_len); 2343 bpf_dump(&f, 1); 2344 putchar('\n'); 2345 free((char *)f.bf_insns); 2346 } 2347 #endif 2348