1#!/usr/bin/env python 2 3from __future__ import print_function 4 5""" 6This script parses each "meta" file and extracts the 7information needed to deduce build and src dependencies. 8 9It works much the same as the original shell script, but is 10*much* more efficient. 11 12The parsing work is handled by the class MetaFile. 13We only pay attention to a subset of the information in the 14"meta" files. Specifically: 15 16'CWD' to initialize our notion. 17 18'C' to track chdir(2) on a per process basis 19 20'R' files read are what we really care about. 21 directories read, provide a clue to resolving 22 subsequent relative paths. That is if we cannot find 23 them relative to 'cwd', we check relative to the last 24 dir read. 25 26'W' files opened for write or read-write, 27 for filemon V3 and earlier. 28 29'E' files executed. 30 31'L' files linked 32 33'V' the filemon version, this record is used as a clue 34 that we have reached the interesting bit. 35 36""" 37 38""" 39RCSid: 40 $Id: meta2deps.py,v 1.30 2020/06/08 23:05:00 sjg Exp $ 41 42 Copyright (c) 2011-2019, Simon J. Gerraty 43 Copyright (c) 2011-2017, Juniper Networks, Inc. 44 All rights reserved. 45 46 Redistribution and use in source and binary forms, with or without 47 modification, are permitted provided that the following conditions 48 are met: 49 1. Redistributions of source code must retain the above copyright 50 notice, this list of conditions and the following disclaimer. 51 2. Redistributions in binary form must reproduce the above copyright 52 notice, this list of conditions and the following disclaimer in the 53 documentation and/or other materials provided with the distribution. 54 55 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 56 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 57 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 58 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 59 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 60 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 61 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 62 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 63 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 64 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 65 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 66 67""" 68 69import os, re, sys 70 71def getv(dict, key, d=None): 72 """Lookup key in dict and return value or the supplied default.""" 73 if key in dict: 74 return dict[key] 75 return d 76 77def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 78 """ 79 Return an absolute path, resolving via cwd or last_dir if needed. 80 """ 81 if path.endswith('/.'): 82 path = path[0:-2] 83 if len(path) > 0 and path[0] == '/': 84 if os.path.exists(path): 85 return path 86 if debug > 2: 87 print("skipping non-existent:", path, file=debug_out) 88 return None 89 if path == '.': 90 return cwd 91 if path.startswith('./'): 92 return cwd + path[1:] 93 if last_dir == cwd: 94 last_dir = None 95 for d in [last_dir, cwd]: 96 if not d: 97 continue 98 if path == '..': 99 dw = d.split('/') 100 p = '/'.join(dw[:-1]) 101 if not p: 102 p = '/' 103 return p 104 p = '/'.join([d,path]) 105 if debug > 2: 106 print("looking for:", p, end=' ', file=debug_out) 107 if not os.path.exists(p): 108 if debug > 2: 109 print("nope", file=debug_out) 110 p = None 111 continue 112 if debug > 2: 113 print("found:", p, file=debug_out) 114 return p 115 return None 116 117def cleanpath(path): 118 """cleanup path without using realpath(3)""" 119 if path.startswith('/'): 120 r = '/' 121 else: 122 r = '' 123 p = [] 124 w = path.split('/') 125 for d in w: 126 if not d or d == '.': 127 continue 128 if d == '..': 129 try: 130 p.pop() 131 continue 132 except: 133 break 134 p.append(d) 135 136 return r + '/'.join(p) 137 138def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 139 """ 140 Return an absolute path, resolving via cwd or last_dir if needed. 141 this gets called a lot, so we try to avoid calling realpath. 142 """ 143 rpath = resolve(path, cwd, last_dir, debug, debug_out) 144 if rpath: 145 path = rpath 146 elif len(path) > 0 and path[0] == '/': 147 return None 148 if (path.find('/') < 0 or 149 path.find('./') > 0 or 150 path.endswith('/..')): 151 path = cleanpath(path) 152 return path 153 154def sort_unique(list, cmp=None, key=None, reverse=False): 155 list.sort(cmp, key, reverse) 156 nl = [] 157 le = None 158 for e in list: 159 if e == le: 160 continue 161 le = e 162 nl.append(e) 163 return nl 164 165def add_trims(x): 166 return ['/' + x + '/', 167 '/' + x, 168 x + '/', 169 x] 170 171class MetaFile: 172 """class to parse meta files generated by bmake.""" 173 174 conf = None 175 dirdep_re = None 176 host_target = None 177 srctops = [] 178 objroots = [] 179 excludes = [] 180 seen = {} 181 obj_deps = [] 182 src_deps = [] 183 file_deps = [] 184 185 def __init__(self, name, conf={}): 186 """if name is set we will parse it now. 187 conf can have the follwing keys: 188 189 SRCTOPS list of tops of the src tree(s). 190 191 CURDIR the src directory 'bmake' was run from. 192 193 RELDIR the relative path from SRCTOP to CURDIR 194 195 MACHINE the machine we built for. 196 set to 'none' if we are not cross-building. 197 More specifically if machine cannot be deduced from objdirs. 198 199 TARGET_SPEC 200 Sometimes MACHINE isn't enough. 201 202 HOST_TARGET 203 when we build for the pseudo machine 'host' 204 the object tree uses HOST_TARGET rather than MACHINE. 205 206 OBJROOTS a list of the common prefix for all obj dirs it might 207 end in '/' or '-'. 208 209 DPDEPS names an optional file to which per file dependencies 210 will be appended. 211 For example if 'some/path/foo.h' is read from SRCTOP 212 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 213 This can allow 'bmake' to learn all the dirs within 214 the tree that depend on 'foo.h' 215 216 EXCLUDES 217 A list of paths to ignore. 218 ccache(1) can otherwise be trouble. 219 220 debug desired debug level 221 222 debug_out open file to send debug output to (sys.stderr) 223 224 """ 225 226 self.name = name 227 self.debug = getv(conf, 'debug', 0) 228 self.debug_out = getv(conf, 'debug_out', sys.stderr) 229 230 self.machine = getv(conf, 'MACHINE', '') 231 self.machine_arch = getv(conf, 'MACHINE_ARCH', '') 232 self.target_spec = getv(conf, 'TARGET_SPEC', '') 233 self.curdir = getv(conf, 'CURDIR') 234 self.reldir = getv(conf, 'RELDIR') 235 self.dpdeps = getv(conf, 'DPDEPS') 236 self.line = 0 237 238 if not self.conf: 239 # some of the steps below we want to do only once 240 self.conf = conf 241 self.host_target = getv(conf, 'HOST_TARGET') 242 for srctop in getv(conf, 'SRCTOPS', []): 243 if srctop[-1] != '/': 244 srctop += '/' 245 if not srctop in self.srctops: 246 self.srctops.append(srctop) 247 _srctop = os.path.realpath(srctop) 248 if _srctop[-1] != '/': 249 _srctop += '/' 250 if not _srctop in self.srctops: 251 self.srctops.append(_srctop) 252 253 trim_list = add_trims(self.machine) 254 if self.machine == 'host': 255 trim_list += add_trims(self.host_target) 256 if self.target_spec: 257 trim_list += add_trims(self.target_spec) 258 259 for objroot in getv(conf, 'OBJROOTS', []): 260 for e in trim_list: 261 if objroot.endswith(e): 262 # this is not what we want - fix it 263 objroot = objroot[0:-len(e)] 264 265 if objroot[-1] != '/': 266 objroot += '/' 267 if not objroot in self.objroots: 268 self.objroots.append(objroot) 269 _objroot = os.path.realpath(objroot) 270 if objroot[-1] == '/': 271 _objroot += '/' 272 if not _objroot in self.objroots: 273 self.objroots.append(_objroot) 274 275 # we want the longest match 276 self.srctops.sort(reverse=True) 277 self.objroots.sort(reverse=True) 278 279 self.excludes = getv(conf, 'EXCLUDES', []) 280 281 if self.debug: 282 print("host_target=", self.host_target, file=self.debug_out) 283 print("srctops=", self.srctops, file=self.debug_out) 284 print("objroots=", self.objroots, file=self.debug_out) 285 print("excludes=", self.excludes, file=self.debug_out) 286 287 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 288 289 if self.dpdeps and not self.reldir: 290 if self.debug: 291 print("need reldir:", end=' ', file=self.debug_out) 292 if self.curdir: 293 srctop = self.find_top(self.curdir, self.srctops) 294 if srctop: 295 self.reldir = self.curdir.replace(srctop,'') 296 if self.debug: 297 print(self.reldir, file=self.debug_out) 298 if not self.reldir: 299 self.dpdeps = None # we cannot do it? 300 301 self.cwd = os.getcwd() # make sure this is initialized 302 self.last_dir = self.cwd 303 304 if name: 305 self.try_parse() 306 307 def reset(self): 308 """reset state if we are being passed meta files from multiple directories.""" 309 self.seen = {} 310 self.obj_deps = [] 311 self.src_deps = [] 312 self.file_deps = [] 313 314 def dirdeps(self, sep='\n'): 315 """return DIRDEPS""" 316 return sep.strip() + sep.join(self.obj_deps) 317 318 def src_dirdeps(self, sep='\n'): 319 """return SRC_DIRDEPS""" 320 return sep.strip() + sep.join(self.src_deps) 321 322 def file_depends(self, out=None): 323 """Append DPDEPS_${file} += ${RELDIR} 324 for each file we saw, to the output file.""" 325 if not self.reldir: 326 return None 327 for f in sort_unique(self.file_deps): 328 print('DPDEPS_%s += %s' % (f, self.reldir), file=out) 329 # these entries provide for reverse DIRDEPS lookup 330 for f in self.obj_deps: 331 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out) 332 333 def seenit(self, dir): 334 """rememer that we have seen dir.""" 335 self.seen[dir] = 1 336 337 def add(self, list, data, clue=''): 338 """add data to list if it isn't already there.""" 339 if data not in list: 340 list.append(data) 341 if self.debug: 342 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out) 343 344 def find_top(self, path, list): 345 """the logical tree may be split across multiple trees""" 346 for top in list: 347 if path.startswith(top): 348 if self.debug > 2: 349 print("found in", top, file=self.debug_out) 350 return top 351 return None 352 353 def find_obj(self, objroot, dir, path, input): 354 """return path within objroot, taking care of .dirdep files""" 355 ddep = None 356 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 357 if not ddep and os.path.exists(ddepf): 358 ddep = open(ddepf, 'r').readline().strip('# \n') 359 if self.debug > 1: 360 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out) 361 if ddep.endswith(self.machine): 362 ddep = ddep[0:-(1+len(self.machine))] 363 elif self.target_spec and ddep.endswith(self.target_spec): 364 ddep = ddep[0:-(1+len(self.target_spec))] 365 366 if not ddep: 367 # no .dirdeps, so remember that we've seen the raw input 368 self.seenit(input) 369 self.seenit(dir) 370 if self.machine == 'none': 371 if dir.startswith(objroot): 372 return dir.replace(objroot,'') 373 return None 374 m = self.dirdep_re.match(dir.replace(objroot,'')) 375 if m: 376 ddep = m.group(2) 377 dmachine = m.group(1) 378 if dmachine != self.machine: 379 if not (self.machine == 'host' and 380 dmachine == self.host_target): 381 if self.debug > 2: 382 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out) 383 ddep += '.' + dmachine 384 385 return ddep 386 387 def try_parse(self, name=None, file=None): 388 """give file and line number causing exception""" 389 try: 390 self.parse(name, file) 391 except: 392 # give a useful clue 393 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr) 394 raise 395 396 def parse(self, name=None, file=None): 397 """A meta file looks like: 398 399 # Meta data file "path" 400 CMD "command-line" 401 CWD "cwd" 402 TARGET "target" 403 -- command output -- 404 -- filemon acquired metadata -- 405 # buildmon version 3 406 V 3 407 C "pid" "cwd" 408 E "pid" "path" 409 F "pid" "child" 410 R "pid" "path" 411 W "pid" "path" 412 X "pid" "status" 413 D "pid" "path" 414 L "pid" "src" "target" 415 M "pid" "old" "new" 416 S "pid" "path" 417 # Bye bye 418 419 We go to some effort to avoid processing a dependency more than once. 420 Of the above record types only C,E,F,L,R,V and W are of interest. 421 """ 422 423 version = 0 # unknown 424 if name: 425 self.name = name; 426 if file: 427 f = file 428 cwd = self.last_dir = self.cwd 429 else: 430 f = open(self.name, 'r') 431 skip = True 432 pid_cwd = {} 433 pid_last_dir = {} 434 last_pid = 0 435 436 self.line = 0 437 if self.curdir: 438 self.seenit(self.curdir) # we ignore this 439 440 interesting = 'CEFLRV' 441 for line in f: 442 self.line += 1 443 # ignore anything we don't care about 444 if not line[0] in interesting: 445 continue 446 if self.debug > 2: 447 print("input:", line, end=' ', file=self.debug_out) 448 w = line.split() 449 450 if skip: 451 if w[0] == 'V': 452 skip = False 453 version = int(w[1]) 454 """ 455 if version < 4: 456 # we cannot ignore 'W' records 457 # as they may be 'rw' 458 interesting += 'W' 459 """ 460 elif w[0] == 'CWD': 461 self.cwd = cwd = self.last_dir = w[1] 462 self.seenit(cwd) # ignore this 463 if self.debug: 464 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out) 465 continue 466 467 pid = int(w[1]) 468 if pid != last_pid: 469 if last_pid: 470 pid_last_dir[last_pid] = self.last_dir 471 cwd = getv(pid_cwd, pid, self.cwd) 472 self.last_dir = getv(pid_last_dir, pid, self.cwd) 473 last_pid = pid 474 475 # process operations 476 if w[0] == 'F': 477 npid = int(w[2]) 478 pid_cwd[npid] = cwd 479 pid_last_dir[npid] = cwd 480 last_pid = npid 481 continue 482 elif w[0] == 'C': 483 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 484 if not cwd: 485 cwd = w[2] 486 if self.debug > 1: 487 print("missing cwd=", cwd, file=self.debug_out) 488 if cwd.endswith('/.'): 489 cwd = cwd[0:-2] 490 self.last_dir = pid_last_dir[pid] = cwd 491 pid_cwd[pid] = cwd 492 if self.debug > 1: 493 print("cwd=", cwd, file=self.debug_out) 494 continue 495 496 if w[2] in self.seen: 497 if self.debug > 2: 498 print("seen:", w[2], file=self.debug_out) 499 continue 500 # file operations 501 if w[0] in 'ML': 502 # these are special, tread src as read and 503 # target as write 504 self.parse_path(w[2].strip("'"), cwd, 'R', w) 505 self.parse_path(w[3].strip("'"), cwd, 'W', w) 506 continue 507 elif w[0] in 'ERWS': 508 path = w[2] 509 self.parse_path(path, cwd, w[0], w) 510 511 if not file: 512 f.close() 513 514 def is_src(self, base, dir, rdir): 515 """is base in srctop""" 516 for dir in [dir,rdir]: 517 if not dir: 518 continue 519 path = '/'.join([dir,base]) 520 srctop = self.find_top(path, self.srctops) 521 if srctop: 522 if self.dpdeps: 523 self.add(self.file_deps, path.replace(srctop,''), 'file') 524 self.add(self.src_deps, dir.replace(srctop,''), 'src') 525 self.seenit(dir) 526 return True 527 return False 528 529 def parse_path(self, path, cwd, op=None, w=[]): 530 """look at a path for the op specified""" 531 532 if not op: 533 op = w[0] 534 535 # we are never interested in .dirdep files as dependencies 536 if path.endswith('.dirdep'): 537 return 538 for p in self.excludes: 539 if p and path.startswith(p): 540 if self.debug > 2: 541 print("exclude:", p, path, file=self.debug_out) 542 return 543 # we don't want to resolve the last component if it is 544 # a symlink 545 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out) 546 if not path: 547 return 548 dir,base = os.path.split(path) 549 if dir in self.seen: 550 if self.debug > 2: 551 print("seen:", dir, file=self.debug_out) 552 return 553 # we can have a path in an objdir which is a link 554 # to the src dir, we may need to add dependencies for each 555 rdir = dir 556 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out) 557 rdir = os.path.realpath(dir) 558 if rdir == dir: 559 rdir = None 560 # now put path back together 561 path = '/'.join([dir,base]) 562 if self.debug > 1: 563 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out) 564 if op in 'RWS': 565 if path in [self.last_dir, cwd, self.cwd, self.curdir]: 566 if self.debug > 1: 567 print("skipping:", path, file=self.debug_out) 568 return 569 if os.path.isdir(path): 570 if op in 'RW': 571 self.last_dir = path; 572 if self.debug > 1: 573 print("ldir=", self.last_dir, file=self.debug_out) 574 return 575 576 if op in 'ER': 577 # finally, we get down to it 578 if dir == self.cwd or dir == self.curdir: 579 return 580 if self.is_src(base, dir, rdir): 581 self.seenit(w[2]) 582 if not rdir: 583 return 584 585 objroot = None 586 for dir in [dir,rdir]: 587 if not dir: 588 continue 589 objroot = self.find_top(dir, self.objroots) 590 if objroot: 591 break 592 if objroot: 593 ddep = self.find_obj(objroot, dir, path, w[2]) 594 if ddep: 595 self.add(self.obj_deps, ddep, 'obj') 596 if self.dpdeps and objroot.endswith('/stage/'): 597 sp = '/'.join(path.replace(objroot,'').split('/')[1:]) 598 self.add(self.file_deps, sp, 'file') 599 else: 600 # don't waste time looking again 601 self.seenit(w[2]) 602 self.seenit(dir) 603 604 605def main(argv, klass=MetaFile, xopts='', xoptf=None): 606 """Simple driver for class MetaFile. 607 608 Usage: 609 script [options] [key=value ...] "meta" ... 610 611 Options and key=value pairs contribute to the 612 dictionary passed to MetaFile. 613 614 -S "SRCTOP" 615 add "SRCTOP" to the "SRCTOPS" list. 616 617 -C "CURDIR" 618 619 -O "OBJROOT" 620 add "OBJROOT" to the "OBJROOTS" list. 621 622 -m "MACHINE" 623 624 -a "MACHINE_ARCH" 625 626 -H "HOST_TARGET" 627 628 -D "DPDEPS" 629 630 -d bumps debug level 631 632 """ 633 import getopt 634 635 # import Psyco if we can 636 # it can speed things up quite a bit 637 have_psyco = 0 638 try: 639 import psyco 640 psyco.full() 641 have_psyco = 1 642 except: 643 pass 644 645 conf = { 646 'SRCTOPS': [], 647 'OBJROOTS': [], 648 'EXCLUDES': [], 649 } 650 651 try: 652 machine = os.environ['MACHINE'] 653 if machine: 654 conf['MACHINE'] = machine 655 machine_arch = os.environ['MACHINE_ARCH'] 656 if machine_arch: 657 conf['MACHINE_ARCH'] = machine_arch 658 srctop = os.environ['SB_SRC'] 659 if srctop: 660 conf['SRCTOPS'].append(srctop) 661 objroot = os.environ['SB_OBJROOT'] 662 if objroot: 663 conf['OBJROOTS'].append(objroot) 664 except: 665 pass 666 667 debug = 0 668 output = True 669 670 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts) 671 for o, a in opts: 672 if o == '-a': 673 conf['MACHINE_ARCH'] = a 674 elif o == '-d': 675 debug += 1 676 elif o == '-q': 677 output = False 678 elif o == '-H': 679 conf['HOST_TARGET'] = a 680 elif o == '-S': 681 if a not in conf['SRCTOPS']: 682 conf['SRCTOPS'].append(a) 683 elif o == '-C': 684 conf['CURDIR'] = a 685 elif o == '-O': 686 if a not in conf['OBJROOTS']: 687 conf['OBJROOTS'].append(a) 688 elif o == '-R': 689 conf['RELDIR'] = a 690 elif o == '-D': 691 conf['DPDEPS'] = a 692 elif o == '-m': 693 conf['MACHINE'] = a 694 elif o == '-T': 695 conf['TARGET_SPEC'] = a 696 elif o == '-X': 697 if a not in conf['EXCLUDES']: 698 conf['EXCLUDES'].append(a) 699 elif xoptf: 700 xoptf(o, a, conf) 701 702 conf['debug'] = debug 703 704 # get any var=val assignments 705 eaten = [] 706 for a in args: 707 if a.find('=') > 0: 708 k,v = a.split('=') 709 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 710 if k == 'SRCTOP': 711 k = 'SRCTOPS' 712 elif k == 'OBJROOT': 713 k = 'OBJROOTS' 714 if v not in conf[k]: 715 conf[k].append(v) 716 else: 717 conf[k] = v 718 eaten.append(a) 719 continue 720 break 721 722 for a in eaten: 723 args.remove(a) 724 725 debug_out = getv(conf, 'debug_out', sys.stderr) 726 727 if debug: 728 print("config:", file=debug_out) 729 print("psyco=", have_psyco, file=debug_out) 730 for k,v in list(conf.items()): 731 print("%s=%s" % (k,v), file=debug_out) 732 733 m = None 734 for a in args: 735 if a.endswith('.meta'): 736 if not os.path.exists(a): 737 continue 738 m = klass(a, conf) 739 elif a.startswith('@'): 740 # there can actually multiple files per line 741 for line in open(a[1:]): 742 for f in line.strip().split(): 743 if not os.path.exists(f): 744 continue 745 m = klass(f, conf) 746 747 if output and m: 748 print(m.dirdeps()) 749 750 print(m.src_dirdeps('\nsrc:')) 751 752 dpdeps = getv(conf, 'DPDEPS') 753 if dpdeps: 754 m.file_depends(open(dpdeps, 'wb')) 755 756 return m 757 758if __name__ == '__main__': 759 try: 760 main(sys.argv) 761 except: 762 # yes, this goes to stdout 763 print("ERROR: ", sys.exc_info()[1]) 764 raise 765 766