1#!/usr/bin/env python 2 3from __future__ import print_function 4 5""" 6This script parses each "meta" file and extracts the 7information needed to deduce build and src dependencies. 8 9It works much the same as the original shell script, but is 10*much* more efficient. 11 12The parsing work is handled by the class MetaFile. 13We only pay attention to a subset of the information in the 14"meta" files. Specifically: 15 16'CWD' to initialize our notion. 17 18'C' to track chdir(2) on a per process basis 19 20'R' files read are what we really care about. 21 directories read, provide a clue to resolving 22 subsequent relative paths. That is if we cannot find 23 them relative to 'cwd', we check relative to the last 24 dir read. 25 26'W' files opened for write or read-write, 27 for filemon V3 and earlier. 28 29'E' files executed. 30 31'L' files linked 32 33'V' the filemon version, this record is used as a clue 34 that we have reached the interesting bit. 35 36""" 37 38""" 39RCSid: 40 $Id: meta2deps.py,v 1.38 2021/06/17 05:20:08 sjg Exp $ 41 42 Copyright (c) 2011-2020, Simon J. Gerraty 43 Copyright (c) 2011-2017, Juniper Networks, Inc. 44 All rights reserved. 45 46 Redistribution and use in source and binary forms, with or without 47 modification, are permitted provided that the following conditions 48 are met: 49 1. Redistributions of source code must retain the above copyright 50 notice, this list of conditions and the following disclaimer. 51 2. Redistributions in binary form must reproduce the above copyright 52 notice, this list of conditions and the following disclaimer in the 53 documentation and/or other materials provided with the distribution. 54 55 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 56 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 57 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 58 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 59 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 60 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 61 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 62 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 63 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 64 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 65 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 66 67""" 68 69import os, re, sys 70 71def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 72 """ 73 Return an absolute path, resolving via cwd or last_dir if needed. 74 """ 75 if path.endswith('/.'): 76 path = path[0:-2] 77 if len(path) > 0 and path[0] == '/': 78 if os.path.exists(path): 79 return path 80 if debug > 2: 81 print("skipping non-existent:", path, file=debug_out) 82 return None 83 if path == '.': 84 return cwd 85 if path.startswith('./'): 86 return cwd + path[1:] 87 if last_dir == cwd: 88 last_dir = None 89 for d in [last_dir, cwd]: 90 if not d: 91 continue 92 if path == '..': 93 dw = d.split('/') 94 p = '/'.join(dw[:-1]) 95 if not p: 96 p = '/' 97 return p 98 p = '/'.join([d,path]) 99 if debug > 2: 100 print("looking for:", p, end=' ', file=debug_out) 101 if not os.path.exists(p): 102 if debug > 2: 103 print("nope", file=debug_out) 104 p = None 105 continue 106 if debug > 2: 107 print("found:", p, file=debug_out) 108 return p 109 return None 110 111def cleanpath(path): 112 """cleanup path without using realpath(3)""" 113 if path.startswith('/'): 114 r = '/' 115 else: 116 r = '' 117 p = [] 118 w = path.split('/') 119 for d in w: 120 if not d or d == '.': 121 continue 122 if d == '..': 123 try: 124 p.pop() 125 continue 126 except: 127 break 128 p.append(d) 129 130 return r + '/'.join(p) 131 132def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 133 """ 134 Return an absolute path, resolving via cwd or last_dir if needed. 135 this gets called a lot, so we try to avoid calling realpath. 136 """ 137 rpath = resolve(path, cwd, last_dir, debug, debug_out) 138 if rpath: 139 path = rpath 140 elif len(path) > 0 and path[0] == '/': 141 return None 142 if (path.find('/') < 0 or 143 path.find('./') > 0 or 144 path.endswith('/..')): 145 path = cleanpath(path) 146 return path 147 148def sort_unique(list, cmp=None, key=None, reverse=False): 149 if sys.version_info[0] == 2: 150 list.sort(cmp, key, reverse) 151 else: 152 list.sort(reverse=reverse) 153 nl = [] 154 le = None 155 for e in list: 156 if e == le: 157 continue 158 le = e 159 nl.append(e) 160 return nl 161 162def add_trims(x): 163 return ['/' + x + '/', 164 '/' + x, 165 x + '/', 166 x] 167 168class MetaFile: 169 """class to parse meta files generated by bmake.""" 170 171 conf = None 172 dirdep_re = None 173 host_target = None 174 srctops = [] 175 objroots = [] 176 excludes = [] 177 seen = {} 178 obj_deps = [] 179 src_deps = [] 180 file_deps = [] 181 182 def __init__(self, name, conf={}): 183 """if name is set we will parse it now. 184 conf can have the follwing keys: 185 186 SRCTOPS list of tops of the src tree(s). 187 188 CURDIR the src directory 'bmake' was run from. 189 190 RELDIR the relative path from SRCTOP to CURDIR 191 192 MACHINE the machine we built for. 193 set to 'none' if we are not cross-building. 194 More specifically if machine cannot be deduced from objdirs. 195 196 TARGET_SPEC 197 Sometimes MACHINE isn't enough. 198 199 HOST_TARGET 200 when we build for the pseudo machine 'host' 201 the object tree uses HOST_TARGET rather than MACHINE. 202 203 OBJROOTS a list of the common prefix for all obj dirs it might 204 end in '/' or '-'. 205 206 DPDEPS names an optional file to which per file dependencies 207 will be appended. 208 For example if 'some/path/foo.h' is read from SRCTOP 209 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 210 This can allow 'bmake' to learn all the dirs within 211 the tree that depend on 'foo.h' 212 213 EXCLUDES 214 A list of paths to ignore. 215 ccache(1) can otherwise be trouble. 216 217 debug desired debug level 218 219 debug_out open file to send debug output to (sys.stderr) 220 221 """ 222 223 self.name = name 224 self.debug = conf.get('debug', 0) 225 self.debug_out = conf.get('debug_out', sys.stderr) 226 227 self.machine = conf.get('MACHINE', '') 228 self.machine_arch = conf.get('MACHINE_ARCH', '') 229 self.target_spec = conf.get('TARGET_SPEC', '') 230 self.curdir = conf.get('CURDIR') 231 self.reldir = conf.get('RELDIR') 232 self.dpdeps = conf.get('DPDEPS') 233 self.line = 0 234 235 if not self.conf: 236 # some of the steps below we want to do only once 237 self.conf = conf 238 self.host_target = conf.get('HOST_TARGET') 239 for srctop in conf.get('SRCTOPS', []): 240 if srctop[-1] != '/': 241 srctop += '/' 242 if not srctop in self.srctops: 243 self.srctops.append(srctop) 244 _srctop = os.path.realpath(srctop) 245 if _srctop[-1] != '/': 246 _srctop += '/' 247 if not _srctop in self.srctops: 248 self.srctops.append(_srctop) 249 250 trim_list = add_trims(self.machine) 251 if self.machine == 'host': 252 trim_list += add_trims(self.host_target) 253 if self.target_spec: 254 trim_list += add_trims(self.target_spec) 255 256 for objroot in conf.get('OBJROOTS', []): 257 for e in trim_list: 258 if objroot.endswith(e): 259 # this is not what we want - fix it 260 objroot = objroot[0:-len(e)] 261 262 if objroot[-1] != '/': 263 objroot += '/' 264 if not objroot in self.objroots: 265 self.objroots.append(objroot) 266 _objroot = os.path.realpath(objroot) 267 if objroot[-1] == '/': 268 _objroot += '/' 269 if not _objroot in self.objroots: 270 self.objroots.append(_objroot) 271 272 # we want the longest match 273 self.srctops.sort(reverse=True) 274 self.objroots.sort(reverse=True) 275 276 self.excludes = conf.get('EXCLUDES', []) 277 278 if self.debug: 279 print("host_target=", self.host_target, file=self.debug_out) 280 print("srctops=", self.srctops, file=self.debug_out) 281 print("objroots=", self.objroots, file=self.debug_out) 282 print("excludes=", self.excludes, file=self.debug_out) 283 284 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 285 286 if self.dpdeps and not self.reldir: 287 if self.debug: 288 print("need reldir:", end=' ', file=self.debug_out) 289 if self.curdir: 290 srctop = self.find_top(self.curdir, self.srctops) 291 if srctop: 292 self.reldir = self.curdir.replace(srctop,'') 293 if self.debug: 294 print(self.reldir, file=self.debug_out) 295 if not self.reldir: 296 self.dpdeps = None # we cannot do it? 297 298 self.cwd = os.getcwd() # make sure this is initialized 299 self.last_dir = self.cwd 300 301 if name: 302 self.try_parse() 303 304 def reset(self): 305 """reset state if we are being passed meta files from multiple directories.""" 306 self.seen = {} 307 self.obj_deps = [] 308 self.src_deps = [] 309 self.file_deps = [] 310 311 def dirdeps(self, sep='\n'): 312 """return DIRDEPS""" 313 return sep.strip() + sep.join(self.obj_deps) 314 315 def src_dirdeps(self, sep='\n'): 316 """return SRC_DIRDEPS""" 317 return sep.strip() + sep.join(self.src_deps) 318 319 def file_depends(self, out=None): 320 """Append DPDEPS_${file} += ${RELDIR} 321 for each file we saw, to the output file.""" 322 if not self.reldir: 323 return None 324 for f in sort_unique(self.file_deps): 325 print('DPDEPS_%s += %s' % (f, self.reldir), file=out) 326 # these entries provide for reverse DIRDEPS lookup 327 for f in self.obj_deps: 328 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out) 329 330 def seenit(self, dir): 331 """rememer that we have seen dir.""" 332 self.seen[dir] = 1 333 334 def add(self, list, data, clue=''): 335 """add data to list if it isn't already there.""" 336 if data not in list: 337 list.append(data) 338 if self.debug: 339 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out) 340 341 def find_top(self, path, list): 342 """the logical tree may be split across multiple trees""" 343 for top in list: 344 if path.startswith(top): 345 if self.debug > 2: 346 print("found in", top, file=self.debug_out) 347 return top 348 return None 349 350 def find_obj(self, objroot, dir, path, input): 351 """return path within objroot, taking care of .dirdep files""" 352 ddep = None 353 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 354 if not ddep and os.path.exists(ddepf): 355 ddep = open(ddepf, 'r').readline().strip('# \n') 356 if self.debug > 1: 357 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out) 358 if ddep.endswith(self.machine): 359 ddep = ddep[0:-(1+len(self.machine))] 360 elif self.target_spec and ddep.endswith(self.target_spec): 361 ddep = ddep[0:-(1+len(self.target_spec))] 362 363 if not ddep: 364 # no .dirdeps, so remember that we've seen the raw input 365 self.seenit(input) 366 self.seenit(dir) 367 if self.machine == 'none': 368 if dir.startswith(objroot): 369 return dir.replace(objroot,'') 370 return None 371 m = self.dirdep_re.match(dir.replace(objroot,'')) 372 if m: 373 ddep = m.group(2) 374 dmachine = m.group(1) 375 if dmachine != self.machine: 376 if not (self.machine == 'host' and 377 dmachine == self.host_target): 378 if self.debug > 2: 379 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out) 380 ddep += '.' + dmachine 381 382 return ddep 383 384 def try_parse(self, name=None, file=None): 385 """give file and line number causing exception""" 386 try: 387 self.parse(name, file) 388 except: 389 # give a useful clue 390 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr) 391 raise 392 393 def parse(self, name=None, file=None): 394 """A meta file looks like: 395 396 # Meta data file "path" 397 CMD "command-line" 398 CWD "cwd" 399 TARGET "target" 400 -- command output -- 401 -- filemon acquired metadata -- 402 # buildmon version 3 403 V 3 404 C "pid" "cwd" 405 E "pid" "path" 406 F "pid" "child" 407 R "pid" "path" 408 W "pid" "path" 409 X "pid" "status" 410 D "pid" "path" 411 L "pid" "src" "target" 412 M "pid" "old" "new" 413 S "pid" "path" 414 # Bye bye 415 416 We go to some effort to avoid processing a dependency more than once. 417 Of the above record types only C,E,F,L,R,V and W are of interest. 418 """ 419 420 version = 0 # unknown 421 if name: 422 self.name = name; 423 if file: 424 f = file 425 cwd = self.last_dir = self.cwd 426 else: 427 f = open(self.name, 'r') 428 skip = True 429 pid_cwd = {} 430 pid_last_dir = {} 431 last_pid = 0 432 433 self.line = 0 434 if self.curdir: 435 self.seenit(self.curdir) # we ignore this 436 437 interesting = 'CEFLRV' 438 for line in f: 439 self.line += 1 440 # ignore anything we don't care about 441 if not line[0] in interesting: 442 continue 443 if self.debug > 2: 444 print("input:", line, end=' ', file=self.debug_out) 445 w = line.split() 446 447 if skip: 448 if w[0] == 'V': 449 skip = False 450 version = int(w[1]) 451 """ 452 if version < 4: 453 # we cannot ignore 'W' records 454 # as they may be 'rw' 455 interesting += 'W' 456 """ 457 elif w[0] == 'CWD': 458 self.cwd = cwd = self.last_dir = w[1] 459 self.seenit(cwd) # ignore this 460 if self.debug: 461 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out) 462 continue 463 464 pid = int(w[1]) 465 if pid != last_pid: 466 if last_pid: 467 pid_last_dir[last_pid] = self.last_dir 468 cwd = pid_cwd.get(pid, self.cwd) 469 self.last_dir = pid_last_dir.get(pid, self.cwd) 470 last_pid = pid 471 472 # process operations 473 if w[0] == 'F': 474 npid = int(w[2]) 475 pid_cwd[npid] = cwd 476 pid_last_dir[npid] = cwd 477 last_pid = npid 478 continue 479 elif w[0] == 'C': 480 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 481 if not cwd: 482 cwd = w[2] 483 if self.debug > 1: 484 print("missing cwd=", cwd, file=self.debug_out) 485 if cwd.endswith('/.'): 486 cwd = cwd[0:-2] 487 self.last_dir = pid_last_dir[pid] = cwd 488 pid_cwd[pid] = cwd 489 if self.debug > 1: 490 print("cwd=", cwd, file=self.debug_out) 491 continue 492 493 if w[2] in self.seen: 494 if self.debug > 2: 495 print("seen:", w[2], file=self.debug_out) 496 continue 497 # file operations 498 if w[0] in 'ML': 499 # these are special, tread src as read and 500 # target as write 501 self.parse_path(w[2].strip("'"), cwd, 'R', w) 502 self.parse_path(w[3].strip("'"), cwd, 'W', w) 503 continue 504 elif w[0] in 'ERWS': 505 path = w[2] 506 if path == '.': 507 continue 508 self.parse_path(path, cwd, w[0], w) 509 510 assert(version > 0) 511 if not file: 512 f.close() 513 514 def is_src(self, base, dir, rdir): 515 """is base in srctop""" 516 for dir in [dir,rdir]: 517 if not dir: 518 continue 519 path = '/'.join([dir,base]) 520 srctop = self.find_top(path, self.srctops) 521 if srctop: 522 if self.dpdeps: 523 self.add(self.file_deps, path.replace(srctop,''), 'file') 524 self.add(self.src_deps, dir.replace(srctop,''), 'src') 525 self.seenit(dir) 526 return True 527 return False 528 529 def parse_path(self, path, cwd, op=None, w=[]): 530 """look at a path for the op specified""" 531 532 if not op: 533 op = w[0] 534 535 # we are never interested in .dirdep files as dependencies 536 if path.endswith('.dirdep'): 537 return 538 for p in self.excludes: 539 if p and path.startswith(p): 540 if self.debug > 2: 541 print("exclude:", p, path, file=self.debug_out) 542 return 543 # we don't want to resolve the last component if it is 544 # a symlink 545 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out) 546 if not path: 547 return 548 dir,base = os.path.split(path) 549 if dir in self.seen: 550 if self.debug > 2: 551 print("seen:", dir, file=self.debug_out) 552 return 553 # we can have a path in an objdir which is a link 554 # to the src dir, we may need to add dependencies for each 555 rdir = dir 556 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out) 557 if dir: 558 rdir = os.path.realpath(dir) 559 else: 560 dir = rdir 561 if rdir == dir: 562 rdir = None 563 # now put path back together 564 path = '/'.join([dir,base]) 565 if self.debug > 1: 566 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out) 567 if op in 'RWS': 568 if path in [self.last_dir, cwd, self.cwd, self.curdir]: 569 if self.debug > 1: 570 print("skipping:", path, file=self.debug_out) 571 return 572 if os.path.isdir(path): 573 if op in 'RW': 574 self.last_dir = path; 575 if self.debug > 1: 576 print("ldir=", self.last_dir, file=self.debug_out) 577 return 578 579 if op in 'ER': 580 # finally, we get down to it 581 if dir == self.cwd or dir == self.curdir: 582 return 583 if self.is_src(base, dir, rdir): 584 self.seenit(w[2]) 585 if not rdir: 586 return 587 588 objroot = None 589 for dir in [dir,rdir]: 590 if not dir: 591 continue 592 objroot = self.find_top(dir, self.objroots) 593 if objroot: 594 break 595 if objroot: 596 ddep = self.find_obj(objroot, dir, path, w[2]) 597 if ddep: 598 self.add(self.obj_deps, ddep, 'obj') 599 if self.dpdeps and objroot.endswith('/stage/'): 600 sp = '/'.join(path.replace(objroot,'').split('/')[1:]) 601 self.add(self.file_deps, sp, 'file') 602 else: 603 # don't waste time looking again 604 self.seenit(w[2]) 605 self.seenit(dir) 606 607 608def main(argv, klass=MetaFile, xopts='', xoptf=None): 609 """Simple driver for class MetaFile. 610 611 Usage: 612 script [options] [key=value ...] "meta" ... 613 614 Options and key=value pairs contribute to the 615 dictionary passed to MetaFile. 616 617 -S "SRCTOP" 618 add "SRCTOP" to the "SRCTOPS" list. 619 620 -C "CURDIR" 621 622 -O "OBJROOT" 623 add "OBJROOT" to the "OBJROOTS" list. 624 625 -m "MACHINE" 626 627 -a "MACHINE_ARCH" 628 629 -H "HOST_TARGET" 630 631 -D "DPDEPS" 632 633 -d bumps debug level 634 635 """ 636 import getopt 637 638 # import Psyco if we can 639 # it can speed things up quite a bit 640 have_psyco = 0 641 try: 642 import psyco 643 psyco.full() 644 have_psyco = 1 645 except: 646 pass 647 648 conf = { 649 'SRCTOPS': [], 650 'OBJROOTS': [], 651 'EXCLUDES': [], 652 } 653 654 try: 655 machine = os.environ['MACHINE'] 656 if machine: 657 conf['MACHINE'] = machine 658 machine_arch = os.environ['MACHINE_ARCH'] 659 if machine_arch: 660 conf['MACHINE_ARCH'] = machine_arch 661 srctop = os.environ['SB_SRC'] 662 if srctop: 663 conf['SRCTOPS'].append(srctop) 664 objroot = os.environ['SB_OBJROOT'] 665 if objroot: 666 conf['OBJROOTS'].append(objroot) 667 except: 668 pass 669 670 debug = 0 671 output = True 672 673 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts) 674 for o, a in opts: 675 if o == '-a': 676 conf['MACHINE_ARCH'] = a 677 elif o == '-d': 678 debug += 1 679 elif o == '-q': 680 output = False 681 elif o == '-H': 682 conf['HOST_TARGET'] = a 683 elif o == '-S': 684 if a not in conf['SRCTOPS']: 685 conf['SRCTOPS'].append(a) 686 elif o == '-C': 687 conf['CURDIR'] = a 688 elif o == '-O': 689 if a not in conf['OBJROOTS']: 690 conf['OBJROOTS'].append(a) 691 elif o == '-R': 692 conf['RELDIR'] = a 693 elif o == '-D': 694 conf['DPDEPS'] = a 695 elif o == '-m': 696 conf['MACHINE'] = a 697 elif o == '-T': 698 conf['TARGET_SPEC'] = a 699 elif o == '-X': 700 if a not in conf['EXCLUDES']: 701 conf['EXCLUDES'].append(a) 702 elif xoptf: 703 xoptf(o, a, conf) 704 705 conf['debug'] = debug 706 707 # get any var=val assignments 708 eaten = [] 709 for a in args: 710 if a.find('=') > 0: 711 k,v = a.split('=') 712 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 713 if k == 'SRCTOP': 714 k = 'SRCTOPS' 715 elif k == 'OBJROOT': 716 k = 'OBJROOTS' 717 if v not in conf[k]: 718 conf[k].append(v) 719 else: 720 conf[k] = v 721 eaten.append(a) 722 continue 723 break 724 725 for a in eaten: 726 args.remove(a) 727 728 debug_out = conf.get('debug_out', sys.stderr) 729 730 if debug: 731 print("config:", file=debug_out) 732 print("psyco=", have_psyco, file=debug_out) 733 for k,v in list(conf.items()): 734 print("%s=%s" % (k,v), file=debug_out) 735 736 m = None 737 for a in args: 738 if a.endswith('.meta'): 739 if not os.path.exists(a): 740 continue 741 m = klass(a, conf) 742 elif a.startswith('@'): 743 # there can actually multiple files per line 744 for line in open(a[1:]): 745 for f in line.strip().split(): 746 if not os.path.exists(f): 747 continue 748 m = klass(f, conf) 749 750 if output and m: 751 print(m.dirdeps()) 752 753 print(m.src_dirdeps('\nsrc:')) 754 755 dpdeps = conf.get('DPDEPS') 756 if dpdeps: 757 m.file_depends(open(dpdeps, 'w')) 758 759 return m 760 761if __name__ == '__main__': 762 try: 763 main(sys.argv) 764 except: 765 # yes, this goes to stdout 766 print("ERROR: ", sys.exc_info()[1]) 767 raise 768 769