1#!/usr/bin/env python 2 3from __future__ import print_function 4 5""" 6This script parses each "meta" file and extracts the 7information needed to deduce build and src dependencies. 8 9It works much the same as the original shell script, but is 10*much* more efficient. 11 12The parsing work is handled by the class MetaFile. 13We only pay attention to a subset of the information in the 14"meta" files. Specifically: 15 16'CWD' to initialize our notion. 17 18'C' to track chdir(2) on a per process basis 19 20'R' files read are what we really care about. 21 directories read, provide a clue to resolving 22 subsequent relative paths. That is if we cannot find 23 them relative to 'cwd', we check relative to the last 24 dir read. 25 26'W' files opened for write or read-write, 27 for filemon V3 and earlier. 28 29'E' files executed. 30 31'L' files linked 32 33'V' the filemon version, this record is used as a clue 34 that we have reached the interesting bit. 35 36""" 37 38""" 39RCSid: 40 $Id: meta2deps.py,v 1.40 2021/12/13 19:32:46 sjg Exp $ 41 42 Copyright (c) 2011-2020, Simon J. Gerraty 43 Copyright (c) 2011-2017, Juniper Networks, Inc. 44 All rights reserved. 45 46 Redistribution and use in source and binary forms, with or without 47 modification, are permitted provided that the following conditions 48 are met: 49 1. Redistributions of source code must retain the above copyright 50 notice, this list of conditions and the following disclaimer. 51 2. Redistributions in binary form must reproduce the above copyright 52 notice, this list of conditions and the following disclaimer in the 53 documentation and/or other materials provided with the distribution. 54 55 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 56 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 57 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 58 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 59 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 60 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 61 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 62 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 63 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 64 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 65 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 66 67""" 68 69import os, re, sys 70 71def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 72 """ 73 Return an absolute path, resolving via cwd or last_dir if needed. 74 """ 75 if path.endswith('/.'): 76 path = path[0:-2] 77 if len(path) > 0 and path[0] == '/': 78 if os.path.exists(path): 79 return path 80 if debug > 2: 81 print("skipping non-existent:", path, file=debug_out) 82 return None 83 if path == '.': 84 return cwd 85 if path.startswith('./'): 86 return cwd + path[1:] 87 if last_dir == cwd: 88 last_dir = None 89 for d in [last_dir, cwd]: 90 if not d: 91 continue 92 if path == '..': 93 dw = d.split('/') 94 p = '/'.join(dw[:-1]) 95 if not p: 96 p = '/' 97 return p 98 p = '/'.join([d,path]) 99 if debug > 2: 100 print("looking for:", p, end=' ', file=debug_out) 101 if not os.path.exists(p): 102 if debug > 2: 103 print("nope", file=debug_out) 104 p = None 105 continue 106 if debug > 2: 107 print("found:", p, file=debug_out) 108 return p 109 return None 110 111def cleanpath(path): 112 """cleanup path without using realpath(3)""" 113 if path.startswith('/'): 114 r = '/' 115 else: 116 r = '' 117 p = [] 118 w = path.split('/') 119 for d in w: 120 if not d or d == '.': 121 continue 122 if d == '..': 123 try: 124 p.pop() 125 continue 126 except: 127 break 128 p.append(d) 129 130 return r + '/'.join(p) 131 132def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 133 """ 134 Return an absolute path, resolving via cwd or last_dir if needed. 135 this gets called a lot, so we try to avoid calling realpath. 136 """ 137 rpath = resolve(path, cwd, last_dir, debug, debug_out) 138 if rpath: 139 path = rpath 140 elif len(path) > 0 and path[0] == '/': 141 return None 142 if (path.find('/') < 0 or 143 path.find('./') > 0 or 144 path.endswith('/..')): 145 path = cleanpath(path) 146 return path 147 148def sort_unique(list, cmp=None, key=None, reverse=False): 149 if sys.version_info[0] == 2: 150 list.sort(cmp, key, reverse) 151 else: 152 list.sort(reverse=reverse) 153 nl = [] 154 le = None 155 for e in list: 156 if e == le: 157 continue 158 le = e 159 nl.append(e) 160 return nl 161 162def add_trims(x): 163 return ['/' + x + '/', 164 '/' + x, 165 x + '/', 166 x] 167 168def target_spec_exts(target_spec): 169 """return a list of dirdep extensions that could match target_spec""" 170 171 if target_spec.find(',') < 0: 172 return ['.'+target_spec] 173 w = target_spec.split(',') 174 n = len(w) 175 e = [] 176 while n > 0: 177 e.append('.'+','.join(w[0:n])) 178 n -= 1 179 return e 180 181class MetaFile: 182 """class to parse meta files generated by bmake.""" 183 184 conf = None 185 dirdep_re = None 186 host_target = None 187 srctops = [] 188 objroots = [] 189 excludes = [] 190 seen = {} 191 obj_deps = [] 192 src_deps = [] 193 file_deps = [] 194 195 def __init__(self, name, conf={}): 196 """if name is set we will parse it now. 197 conf can have the follwing keys: 198 199 SRCTOPS list of tops of the src tree(s). 200 201 CURDIR the src directory 'bmake' was run from. 202 203 RELDIR the relative path from SRCTOP to CURDIR 204 205 MACHINE the machine we built for. 206 set to 'none' if we are not cross-building. 207 More specifically if machine cannot be deduced from objdirs. 208 209 TARGET_SPEC 210 Sometimes MACHINE isn't enough. 211 212 HOST_TARGET 213 when we build for the pseudo machine 'host' 214 the object tree uses HOST_TARGET rather than MACHINE. 215 216 OBJROOTS a list of the common prefix for all obj dirs it might 217 end in '/' or '-'. 218 219 DPDEPS names an optional file to which per file dependencies 220 will be appended. 221 For example if 'some/path/foo.h' is read from SRCTOP 222 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 223 This can allow 'bmake' to learn all the dirs within 224 the tree that depend on 'foo.h' 225 226 EXCLUDES 227 A list of paths to ignore. 228 ccache(1) can otherwise be trouble. 229 230 debug desired debug level 231 232 debug_out open file to send debug output to (sys.stderr) 233 234 """ 235 236 self.name = name 237 self.debug = conf.get('debug', 0) 238 self.debug_out = conf.get('debug_out', sys.stderr) 239 240 self.machine = conf.get('MACHINE', '') 241 self.machine_arch = conf.get('MACHINE_ARCH', '') 242 self.target_spec = conf.get('TARGET_SPEC', self.machine) 243 self.exts = target_spec_exts(self.target_spec) 244 self.curdir = conf.get('CURDIR') 245 self.reldir = conf.get('RELDIR') 246 self.dpdeps = conf.get('DPDEPS') 247 self.line = 0 248 249 if not self.conf: 250 # some of the steps below we want to do only once 251 self.conf = conf 252 self.host_target = conf.get('HOST_TARGET') 253 for srctop in conf.get('SRCTOPS', []): 254 if srctop[-1] != '/': 255 srctop += '/' 256 if not srctop in self.srctops: 257 self.srctops.append(srctop) 258 _srctop = os.path.realpath(srctop) 259 if _srctop[-1] != '/': 260 _srctop += '/' 261 if not _srctop in self.srctops: 262 self.srctops.append(_srctop) 263 264 trim_list = add_trims(self.machine) 265 if self.machine == 'host': 266 trim_list += add_trims(self.host_target) 267 if self.target_spec != self.machine: 268 trim_list += add_trims(self.target_spec) 269 270 for objroot in conf.get('OBJROOTS', []): 271 for e in trim_list: 272 if objroot.endswith(e): 273 # this is not what we want - fix it 274 objroot = objroot[0:-len(e)] 275 276 if objroot[-1] != '/': 277 objroot += '/' 278 if not objroot in self.objroots: 279 self.objroots.append(objroot) 280 _objroot = os.path.realpath(objroot) 281 if objroot[-1] == '/': 282 _objroot += '/' 283 if not _objroot in self.objroots: 284 self.objroots.append(_objroot) 285 286 # we want the longest match 287 self.srctops.sort(reverse=True) 288 self.objroots.sort(reverse=True) 289 290 self.excludes = conf.get('EXCLUDES', []) 291 292 if self.debug: 293 print("host_target=", self.host_target, file=self.debug_out) 294 print("srctops=", self.srctops, file=self.debug_out) 295 print("objroots=", self.objroots, file=self.debug_out) 296 print("excludes=", self.excludes, file=self.debug_out) 297 print("ext_list=", self.exts, file=self.debug_out) 298 299 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 300 301 if self.dpdeps and not self.reldir: 302 if self.debug: 303 print("need reldir:", end=' ', file=self.debug_out) 304 if self.curdir: 305 srctop = self.find_top(self.curdir, self.srctops) 306 if srctop: 307 self.reldir = self.curdir.replace(srctop,'') 308 if self.debug: 309 print(self.reldir, file=self.debug_out) 310 if not self.reldir: 311 self.dpdeps = None # we cannot do it? 312 313 self.cwd = os.getcwd() # make sure this is initialized 314 self.last_dir = self.cwd 315 316 if name: 317 self.try_parse() 318 319 def reset(self): 320 """reset state if we are being passed meta files from multiple directories.""" 321 self.seen = {} 322 self.obj_deps = [] 323 self.src_deps = [] 324 self.file_deps = [] 325 326 def dirdeps(self, sep='\n'): 327 """return DIRDEPS""" 328 return sep.strip() + sep.join(self.obj_deps) 329 330 def src_dirdeps(self, sep='\n'): 331 """return SRC_DIRDEPS""" 332 return sep.strip() + sep.join(self.src_deps) 333 334 def file_depends(self, out=None): 335 """Append DPDEPS_${file} += ${RELDIR} 336 for each file we saw, to the output file.""" 337 if not self.reldir: 338 return None 339 for f in sort_unique(self.file_deps): 340 print('DPDEPS_%s += %s' % (f, self.reldir), file=out) 341 # these entries provide for reverse DIRDEPS lookup 342 for f in self.obj_deps: 343 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out) 344 345 def seenit(self, dir): 346 """rememer that we have seen dir.""" 347 self.seen[dir] = 1 348 349 def add(self, list, data, clue=''): 350 """add data to list if it isn't already there.""" 351 if data not in list: 352 list.append(data) 353 if self.debug: 354 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out) 355 356 def find_top(self, path, list): 357 """the logical tree may be split across multiple trees""" 358 for top in list: 359 if path.startswith(top): 360 if self.debug > 2: 361 print("found in", top, file=self.debug_out) 362 return top 363 return None 364 365 def find_obj(self, objroot, dir, path, input): 366 """return path within objroot, taking care of .dirdep files""" 367 ddep = None 368 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 369 if not ddep and os.path.exists(ddepf): 370 ddep = open(ddepf, 'r').readline().strip('# \n') 371 if self.debug > 1: 372 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out) 373 for e in self.exts: 374 if ddep.endswith(e): 375 ddep = ddep[0:-len(e)] 376 break 377 378 if not ddep: 379 # no .dirdeps, so remember that we've seen the raw input 380 self.seenit(input) 381 self.seenit(dir) 382 if self.machine == 'none': 383 if dir.startswith(objroot): 384 return dir.replace(objroot,'') 385 return None 386 m = self.dirdep_re.match(dir.replace(objroot,'')) 387 if m: 388 ddep = m.group(2) 389 dmachine = m.group(1) 390 if dmachine != self.machine: 391 if not (self.machine == 'host' and 392 dmachine == self.host_target): 393 if self.debug > 2: 394 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out) 395 ddep += '.' + dmachine 396 397 return ddep 398 399 def try_parse(self, name=None, file=None): 400 """give file and line number causing exception""" 401 try: 402 self.parse(name, file) 403 except: 404 # give a useful clue 405 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr) 406 raise 407 408 def parse(self, name=None, file=None): 409 """A meta file looks like: 410 411 # Meta data file "path" 412 CMD "command-line" 413 CWD "cwd" 414 TARGET "target" 415 -- command output -- 416 -- filemon acquired metadata -- 417 # buildmon version 3 418 V 3 419 C "pid" "cwd" 420 E "pid" "path" 421 F "pid" "child" 422 R "pid" "path" 423 W "pid" "path" 424 X "pid" "status" 425 D "pid" "path" 426 L "pid" "src" "target" 427 M "pid" "old" "new" 428 S "pid" "path" 429 # Bye bye 430 431 We go to some effort to avoid processing a dependency more than once. 432 Of the above record types only C,E,F,L,R,V and W are of interest. 433 """ 434 435 version = 0 # unknown 436 if name: 437 self.name = name; 438 if file: 439 f = file 440 cwd = self.last_dir = self.cwd 441 else: 442 f = open(self.name, 'r') 443 skip = True 444 pid_cwd = {} 445 pid_last_dir = {} 446 last_pid = 0 447 448 self.line = 0 449 if self.curdir: 450 self.seenit(self.curdir) # we ignore this 451 452 interesting = 'CEFLRV' 453 for line in f: 454 self.line += 1 455 # ignore anything we don't care about 456 if not line[0] in interesting: 457 continue 458 if self.debug > 2: 459 print("input:", line, end=' ', file=self.debug_out) 460 w = line.split() 461 462 if skip: 463 if w[0] == 'V': 464 skip = False 465 version = int(w[1]) 466 """ 467 if version < 4: 468 # we cannot ignore 'W' records 469 # as they may be 'rw' 470 interesting += 'W' 471 """ 472 elif w[0] == 'CWD': 473 self.cwd = cwd = self.last_dir = w[1] 474 self.seenit(cwd) # ignore this 475 if self.debug: 476 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out) 477 continue 478 479 pid = int(w[1]) 480 if pid != last_pid: 481 if last_pid: 482 pid_last_dir[last_pid] = self.last_dir 483 cwd = pid_cwd.get(pid, self.cwd) 484 self.last_dir = pid_last_dir.get(pid, self.cwd) 485 last_pid = pid 486 487 # process operations 488 if w[0] == 'F': 489 npid = int(w[2]) 490 pid_cwd[npid] = cwd 491 pid_last_dir[npid] = cwd 492 last_pid = npid 493 continue 494 elif w[0] == 'C': 495 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 496 if not cwd: 497 cwd = w[2] 498 if self.debug > 1: 499 print("missing cwd=", cwd, file=self.debug_out) 500 if cwd.endswith('/.'): 501 cwd = cwd[0:-2] 502 self.last_dir = pid_last_dir[pid] = cwd 503 pid_cwd[pid] = cwd 504 if self.debug > 1: 505 print("cwd=", cwd, file=self.debug_out) 506 continue 507 508 if w[2] in self.seen: 509 if self.debug > 2: 510 print("seen:", w[2], file=self.debug_out) 511 continue 512 # file operations 513 if w[0] in 'ML': 514 # these are special, tread src as read and 515 # target as write 516 self.parse_path(w[2].strip("'"), cwd, 'R', w) 517 self.parse_path(w[3].strip("'"), cwd, 'W', w) 518 continue 519 elif w[0] in 'ERWS': 520 path = w[2] 521 if path == '.': 522 continue 523 self.parse_path(path, cwd, w[0], w) 524 525 assert(version > 0) 526 if not file: 527 f.close() 528 529 def is_src(self, base, dir, rdir): 530 """is base in srctop""" 531 for dir in [dir,rdir]: 532 if not dir: 533 continue 534 path = '/'.join([dir,base]) 535 srctop = self.find_top(path, self.srctops) 536 if srctop: 537 if self.dpdeps: 538 self.add(self.file_deps, path.replace(srctop,''), 'file') 539 self.add(self.src_deps, dir.replace(srctop,''), 'src') 540 self.seenit(dir) 541 return True 542 return False 543 544 def parse_path(self, path, cwd, op=None, w=[]): 545 """look at a path for the op specified""" 546 547 if not op: 548 op = w[0] 549 550 # we are never interested in .dirdep files as dependencies 551 if path.endswith('.dirdep'): 552 return 553 for p in self.excludes: 554 if p and path.startswith(p): 555 if self.debug > 2: 556 print("exclude:", p, path, file=self.debug_out) 557 return 558 # we don't want to resolve the last component if it is 559 # a symlink 560 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out) 561 if not path: 562 return 563 dir,base = os.path.split(path) 564 if dir in self.seen: 565 if self.debug > 2: 566 print("seen:", dir, file=self.debug_out) 567 return 568 # we can have a path in an objdir which is a link 569 # to the src dir, we may need to add dependencies for each 570 rdir = dir 571 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out) 572 if dir: 573 rdir = os.path.realpath(dir) 574 else: 575 dir = rdir 576 if rdir == dir: 577 rdir = None 578 # now put path back together 579 path = '/'.join([dir,base]) 580 if self.debug > 1: 581 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out) 582 if op in 'RWS': 583 if path in [self.last_dir, cwd, self.cwd, self.curdir]: 584 if self.debug > 1: 585 print("skipping:", path, file=self.debug_out) 586 return 587 if os.path.isdir(path): 588 if op in 'RW': 589 self.last_dir = path; 590 if self.debug > 1: 591 print("ldir=", self.last_dir, file=self.debug_out) 592 return 593 594 if op in 'ER': 595 # finally, we get down to it 596 if dir == self.cwd or dir == self.curdir: 597 return 598 if self.is_src(base, dir, rdir): 599 self.seenit(w[2]) 600 if not rdir: 601 return 602 603 objroot = None 604 for dir in [dir,rdir]: 605 if not dir: 606 continue 607 objroot = self.find_top(dir, self.objroots) 608 if objroot: 609 break 610 if objroot: 611 ddep = self.find_obj(objroot, dir, path, w[2]) 612 if ddep: 613 self.add(self.obj_deps, ddep, 'obj') 614 if self.dpdeps and objroot.endswith('/stage/'): 615 sp = '/'.join(path.replace(objroot,'').split('/')[1:]) 616 self.add(self.file_deps, sp, 'file') 617 else: 618 # don't waste time looking again 619 self.seenit(w[2]) 620 self.seenit(dir) 621 622 623def main(argv, klass=MetaFile, xopts='', xoptf=None): 624 """Simple driver for class MetaFile. 625 626 Usage: 627 script [options] [key=value ...] "meta" ... 628 629 Options and key=value pairs contribute to the 630 dictionary passed to MetaFile. 631 632 -S "SRCTOP" 633 add "SRCTOP" to the "SRCTOPS" list. 634 635 -C "CURDIR" 636 637 -O "OBJROOT" 638 add "OBJROOT" to the "OBJROOTS" list. 639 640 -m "MACHINE" 641 642 -a "MACHINE_ARCH" 643 644 -H "HOST_TARGET" 645 646 -D "DPDEPS" 647 648 -d bumps debug level 649 650 """ 651 import getopt 652 653 # import Psyco if we can 654 # it can speed things up quite a bit 655 have_psyco = 0 656 try: 657 import psyco 658 psyco.full() 659 have_psyco = 1 660 except: 661 pass 662 663 conf = { 664 'SRCTOPS': [], 665 'OBJROOTS': [], 666 'EXCLUDES': [], 667 } 668 669 try: 670 machine = os.environ['MACHINE'] 671 if machine: 672 conf['MACHINE'] = machine 673 machine_arch = os.environ['MACHINE_ARCH'] 674 if machine_arch: 675 conf['MACHINE_ARCH'] = machine_arch 676 srctop = os.environ['SB_SRC'] 677 if srctop: 678 conf['SRCTOPS'].append(srctop) 679 objroot = os.environ['SB_OBJROOT'] 680 if objroot: 681 conf['OBJROOTS'].append(objroot) 682 except: 683 pass 684 685 debug = 0 686 output = True 687 688 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts) 689 for o, a in opts: 690 if o == '-a': 691 conf['MACHINE_ARCH'] = a 692 elif o == '-d': 693 debug += 1 694 elif o == '-q': 695 output = False 696 elif o == '-H': 697 conf['HOST_TARGET'] = a 698 elif o == '-S': 699 if a not in conf['SRCTOPS']: 700 conf['SRCTOPS'].append(a) 701 elif o == '-C': 702 conf['CURDIR'] = a 703 elif o == '-O': 704 if a not in conf['OBJROOTS']: 705 conf['OBJROOTS'].append(a) 706 elif o == '-R': 707 conf['RELDIR'] = a 708 elif o == '-D': 709 conf['DPDEPS'] = a 710 elif o == '-m': 711 conf['MACHINE'] = a 712 elif o == '-T': 713 conf['TARGET_SPEC'] = a 714 elif o == '-X': 715 if a not in conf['EXCLUDES']: 716 conf['EXCLUDES'].append(a) 717 elif xoptf: 718 xoptf(o, a, conf) 719 720 conf['debug'] = debug 721 722 # get any var=val assignments 723 eaten = [] 724 for a in args: 725 if a.find('=') > 0: 726 k,v = a.split('=') 727 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 728 if k == 'SRCTOP': 729 k = 'SRCTOPS' 730 elif k == 'OBJROOT': 731 k = 'OBJROOTS' 732 if v not in conf[k]: 733 conf[k].append(v) 734 else: 735 conf[k] = v 736 eaten.append(a) 737 continue 738 break 739 740 for a in eaten: 741 args.remove(a) 742 743 debug_out = conf.get('debug_out', sys.stderr) 744 745 if debug: 746 print("config:", file=debug_out) 747 print("psyco=", have_psyco, file=debug_out) 748 for k,v in list(conf.items()): 749 print("%s=%s" % (k,v), file=debug_out) 750 751 m = None 752 for a in args: 753 if a.endswith('.meta'): 754 if not os.path.exists(a): 755 continue 756 m = klass(a, conf) 757 elif a.startswith('@'): 758 # there can actually multiple files per line 759 for line in open(a[1:]): 760 for f in line.strip().split(): 761 if not os.path.exists(f): 762 continue 763 m = klass(f, conf) 764 765 if output and m: 766 print(m.dirdeps()) 767 768 print(m.src_dirdeps('\nsrc:')) 769 770 dpdeps = conf.get('DPDEPS') 771 if dpdeps: 772 m.file_depends(open(dpdeps, 'w')) 773 774 return m 775 776if __name__ == '__main__': 777 try: 778 main(sys.argv) 779 except: 780 # yes, this goes to stdout 781 print("ERROR: ", sys.exc_info()[1]) 782 raise 783 784