1#!/usr/bin/env python 2 3from __future__ import print_function 4 5""" 6This script parses each "meta" file and extracts the 7information needed to deduce build and src dependencies. 8 9It works much the same as the original shell script, but is 10*much* more efficient. 11 12The parsing work is handled by the class MetaFile. 13We only pay attention to a subset of the information in the 14"meta" files. Specifically: 15 16'CWD' to initialize our notion. 17 18'C' to track chdir(2) on a per process basis 19 20'R' files read are what we really care about. 21 directories read, provide a clue to resolving 22 subsequent relative paths. That is if we cannot find 23 them relative to 'cwd', we check relative to the last 24 dir read. 25 26'W' files opened for write or read-write, 27 for filemon V3 and earlier. 28 29'E' files executed. 30 31'L' files linked 32 33'V' the filemon version, this record is used as a clue 34 that we have reached the interesting bit. 35 36""" 37 38""" 39RCSid: 40 $FreeBSD$ 41 $Id: meta2deps.py,v 1.40 2021/12/13 19:32:46 sjg Exp $ 42 43 Copyright (c) 2011-2020, Simon J. Gerraty 44 Copyright (c) 2011-2017, Juniper Networks, Inc. 45 All rights reserved. 46 47 Redistribution and use in source and binary forms, with or without 48 modification, are permitted provided that the following conditions 49 are met: 50 1. Redistributions of source code must retain the above copyright 51 notice, this list of conditions and the following disclaimer. 52 2. Redistributions in binary form must reproduce the above copyright 53 notice, this list of conditions and the following disclaimer in the 54 documentation and/or other materials provided with the distribution. 55 56 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 57 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 58 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 59 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 60 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 61 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 62 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 63 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 64 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 65 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 66 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 67 68""" 69 70import os, re, sys 71 72def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 73 """ 74 Return an absolute path, resolving via cwd or last_dir if needed. 75 """ 76 if path.endswith('/.'): 77 path = path[0:-2] 78 if len(path) > 0 and path[0] == '/': 79 if os.path.exists(path): 80 return path 81 if debug > 2: 82 print("skipping non-existent:", path, file=debug_out) 83 return None 84 if path == '.': 85 return cwd 86 if path.startswith('./'): 87 return cwd + path[1:] 88 if last_dir == cwd: 89 last_dir = None 90 for d in [last_dir, cwd]: 91 if not d: 92 continue 93 if path == '..': 94 dw = d.split('/') 95 p = '/'.join(dw[:-1]) 96 if not p: 97 p = '/' 98 return p 99 p = '/'.join([d,path]) 100 if debug > 2: 101 print("looking for:", p, end=' ', file=debug_out) 102 if not os.path.exists(p): 103 if debug > 2: 104 print("nope", file=debug_out) 105 p = None 106 continue 107 if debug > 2: 108 print("found:", p, file=debug_out) 109 return p 110 return None 111 112def cleanpath(path): 113 """cleanup path without using realpath(3)""" 114 if path.startswith('/'): 115 r = '/' 116 else: 117 r = '' 118 p = [] 119 w = path.split('/') 120 for d in w: 121 if not d or d == '.': 122 continue 123 if d == '..': 124 try: 125 p.pop() 126 continue 127 except: 128 break 129 p.append(d) 130 131 return r + '/'.join(p) 132 133def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 134 """ 135 Return an absolute path, resolving via cwd or last_dir if needed. 136 this gets called a lot, so we try to avoid calling realpath. 137 """ 138 rpath = resolve(path, cwd, last_dir, debug, debug_out) 139 if rpath: 140 path = rpath 141 elif len(path) > 0 and path[0] == '/': 142 return None 143 if (path.find('/') < 0 or 144 path.find('./') > 0 or 145 path.endswith('/..')): 146 path = cleanpath(path) 147 return path 148 149def sort_unique(list, cmp=None, key=None, reverse=False): 150 if sys.version_info[0] == 2: 151 list.sort(cmp, key, reverse) 152 else: 153 list.sort(reverse=reverse) 154 nl = [] 155 le = None 156 for e in list: 157 if e == le: 158 continue 159 le = e 160 nl.append(e) 161 return nl 162 163def add_trims(x): 164 return ['/' + x + '/', 165 '/' + x, 166 x + '/', 167 x] 168 169def target_spec_exts(target_spec): 170 """return a list of dirdep extensions that could match target_spec""" 171 172 if target_spec.find(',') < 0: 173 return ['.'+target_spec] 174 w = target_spec.split(',') 175 n = len(w) 176 e = [] 177 while n > 0: 178 e.append('.'+','.join(w[0:n])) 179 n -= 1 180 return e 181 182class MetaFile: 183 """class to parse meta files generated by bmake.""" 184 185 conf = None 186 dirdep_re = None 187 host_target = None 188 srctops = [] 189 objroots = [] 190 excludes = [] 191 seen = {} 192 obj_deps = [] 193 src_deps = [] 194 file_deps = [] 195 196 def __init__(self, name, conf={}): 197 """if name is set we will parse it now. 198 conf can have the follwing keys: 199 200 SRCTOPS list of tops of the src tree(s). 201 202 CURDIR the src directory 'bmake' was run from. 203 204 RELDIR the relative path from SRCTOP to CURDIR 205 206 MACHINE the machine we built for. 207 set to 'none' if we are not cross-building. 208 More specifically if machine cannot be deduced from objdirs. 209 210 TARGET_SPEC 211 Sometimes MACHINE isn't enough. 212 213 HOST_TARGET 214 when we build for the pseudo machine 'host' 215 the object tree uses HOST_TARGET rather than MACHINE. 216 217 OBJROOTS a list of the common prefix for all obj dirs it might 218 end in '/' or '-'. 219 220 DPDEPS names an optional file to which per file dependencies 221 will be appended. 222 For example if 'some/path/foo.h' is read from SRCTOP 223 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 224 This can allow 'bmake' to learn all the dirs within 225 the tree that depend on 'foo.h' 226 227 EXCLUDES 228 A list of paths to ignore. 229 ccache(1) can otherwise be trouble. 230 231 debug desired debug level 232 233 debug_out open file to send debug output to (sys.stderr) 234 235 """ 236 237 self.name = name 238 self.debug = conf.get('debug', 0) 239 self.debug_out = conf.get('debug_out', sys.stderr) 240 241 self.machine = conf.get('MACHINE', '') 242 self.machine_arch = conf.get('MACHINE_ARCH', '') 243 self.target_spec = conf.get('TARGET_SPEC', self.machine) 244 self.exts = target_spec_exts(self.target_spec) 245 self.curdir = conf.get('CURDIR') 246 self.reldir = conf.get('RELDIR') 247 self.dpdeps = conf.get('DPDEPS') 248 self.line = 0 249 250 if not self.conf: 251 # some of the steps below we want to do only once 252 self.conf = conf 253 self.host_target = conf.get('HOST_TARGET') 254 for srctop in conf.get('SRCTOPS', []): 255 if srctop[-1] != '/': 256 srctop += '/' 257 if not srctop in self.srctops: 258 self.srctops.append(srctop) 259 _srctop = os.path.realpath(srctop) 260 if _srctop[-1] != '/': 261 _srctop += '/' 262 if not _srctop in self.srctops: 263 self.srctops.append(_srctop) 264 265 trim_list = add_trims(self.machine) 266 if self.machine == 'host': 267 trim_list += add_trims(self.host_target) 268 if self.target_spec != self.machine: 269 trim_list += add_trims(self.target_spec) 270 271 for objroot in conf.get('OBJROOTS', []): 272 for e in trim_list: 273 if objroot.endswith(e): 274 # this is not what we want - fix it 275 objroot = objroot[0:-len(e)] 276 277 if objroot[-1] != '/': 278 objroot += '/' 279 if not objroot in self.objroots: 280 self.objroots.append(objroot) 281 _objroot = os.path.realpath(objroot) 282 if objroot[-1] == '/': 283 _objroot += '/' 284 if not _objroot in self.objroots: 285 self.objroots.append(_objroot) 286 287 # we want the longest match 288 self.srctops.sort(reverse=True) 289 self.objroots.sort(reverse=True) 290 291 self.excludes = conf.get('EXCLUDES', []) 292 293 if self.debug: 294 print("host_target=", self.host_target, file=self.debug_out) 295 print("srctops=", self.srctops, file=self.debug_out) 296 print("objroots=", self.objroots, file=self.debug_out) 297 print("excludes=", self.excludes, file=self.debug_out) 298 print("ext_list=", self.exts, file=self.debug_out) 299 300 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 301 302 if self.dpdeps and not self.reldir: 303 if self.debug: 304 print("need reldir:", end=' ', file=self.debug_out) 305 if self.curdir: 306 srctop = self.find_top(self.curdir, self.srctops) 307 if srctop: 308 self.reldir = self.curdir.replace(srctop,'') 309 if self.debug: 310 print(self.reldir, file=self.debug_out) 311 if not self.reldir: 312 self.dpdeps = None # we cannot do it? 313 314 self.cwd = os.getcwd() # make sure this is initialized 315 self.last_dir = self.cwd 316 317 if name: 318 self.try_parse() 319 320 def reset(self): 321 """reset state if we are being passed meta files from multiple directories.""" 322 self.seen = {} 323 self.obj_deps = [] 324 self.src_deps = [] 325 self.file_deps = [] 326 327 def dirdeps(self, sep='\n'): 328 """return DIRDEPS""" 329 return sep.strip() + sep.join(self.obj_deps) 330 331 def src_dirdeps(self, sep='\n'): 332 """return SRC_DIRDEPS""" 333 return sep.strip() + sep.join(self.src_deps) 334 335 def file_depends(self, out=None): 336 """Append DPDEPS_${file} += ${RELDIR} 337 for each file we saw, to the output file.""" 338 if not self.reldir: 339 return None 340 for f in sort_unique(self.file_deps): 341 print('DPDEPS_%s += %s' % (f, self.reldir), file=out) 342 # these entries provide for reverse DIRDEPS lookup 343 for f in self.obj_deps: 344 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out) 345 346 def seenit(self, dir): 347 """rememer that we have seen dir.""" 348 self.seen[dir] = 1 349 350 def add(self, list, data, clue=''): 351 """add data to list if it isn't already there.""" 352 if data not in list: 353 list.append(data) 354 if self.debug: 355 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out) 356 357 def find_top(self, path, list): 358 """the logical tree may be split across multiple trees""" 359 for top in list: 360 if path.startswith(top): 361 if self.debug > 2: 362 print("found in", top, file=self.debug_out) 363 return top 364 return None 365 366 def find_obj(self, objroot, dir, path, input): 367 """return path within objroot, taking care of .dirdep files""" 368 ddep = None 369 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 370 if not ddep and os.path.exists(ddepf): 371 ddep = open(ddepf, 'r').readline().strip('# \n') 372 if self.debug > 1: 373 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out) 374 for e in self.exts: 375 if ddep.endswith(e): 376 ddep = ddep[0:-len(e)] 377 break 378 379 if not ddep: 380 # no .dirdeps, so remember that we've seen the raw input 381 self.seenit(input) 382 self.seenit(dir) 383 if self.machine == 'none': 384 if dir.startswith(objroot): 385 return dir.replace(objroot,'') 386 return None 387 m = self.dirdep_re.match(dir.replace(objroot,'')) 388 if m: 389 ddep = m.group(2) 390 dmachine = m.group(1) 391 if dmachine != self.machine: 392 if not (self.machine == 'host' and 393 dmachine == self.host_target): 394 if self.debug > 2: 395 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out) 396 ddep += '.' + dmachine 397 398 return ddep 399 400 def try_parse(self, name=None, file=None): 401 """give file and line number causing exception""" 402 try: 403 self.parse(name, file) 404 except: 405 # give a useful clue 406 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr) 407 raise 408 409 def parse(self, name=None, file=None): 410 """A meta file looks like: 411 412 # Meta data file "path" 413 CMD "command-line" 414 CWD "cwd" 415 TARGET "target" 416 -- command output -- 417 -- filemon acquired metadata -- 418 # buildmon version 3 419 V 3 420 C "pid" "cwd" 421 E "pid" "path" 422 F "pid" "child" 423 R "pid" "path" 424 W "pid" "path" 425 X "pid" "status" 426 D "pid" "path" 427 L "pid" "src" "target" 428 M "pid" "old" "new" 429 S "pid" "path" 430 # Bye bye 431 432 We go to some effort to avoid processing a dependency more than once. 433 Of the above record types only C,E,F,L,R,V and W are of interest. 434 """ 435 436 version = 0 # unknown 437 if name: 438 self.name = name; 439 if file: 440 f = file 441 cwd = self.last_dir = self.cwd 442 else: 443 f = open(self.name, 'r') 444 skip = True 445 pid_cwd = {} 446 pid_last_dir = {} 447 last_pid = 0 448 449 self.line = 0 450 if self.curdir: 451 self.seenit(self.curdir) # we ignore this 452 453 interesting = 'CEFLRV' 454 for line in f: 455 self.line += 1 456 # ignore anything we don't care about 457 if not line[0] in interesting: 458 continue 459 if self.debug > 2: 460 print("input:", line, end=' ', file=self.debug_out) 461 w = line.split() 462 463 if skip: 464 if w[0] == 'V': 465 skip = False 466 version = int(w[1]) 467 """ 468 if version < 4: 469 # we cannot ignore 'W' records 470 # as they may be 'rw' 471 interesting += 'W' 472 """ 473 elif w[0] == 'CWD': 474 self.cwd = cwd = self.last_dir = w[1] 475 self.seenit(cwd) # ignore this 476 if self.debug: 477 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out) 478 continue 479 480 pid = int(w[1]) 481 if pid != last_pid: 482 if last_pid: 483 pid_last_dir[last_pid] = self.last_dir 484 cwd = pid_cwd.get(pid, self.cwd) 485 self.last_dir = pid_last_dir.get(pid, self.cwd) 486 last_pid = pid 487 488 # process operations 489 if w[0] == 'F': 490 npid = int(w[2]) 491 pid_cwd[npid] = cwd 492 pid_last_dir[npid] = cwd 493 last_pid = npid 494 continue 495 elif w[0] == 'C': 496 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 497 if not cwd: 498 cwd = w[2] 499 if self.debug > 1: 500 print("missing cwd=", cwd, file=self.debug_out) 501 if cwd.endswith('/.'): 502 cwd = cwd[0:-2] 503 self.last_dir = pid_last_dir[pid] = cwd 504 pid_cwd[pid] = cwd 505 if self.debug > 1: 506 print("cwd=", cwd, file=self.debug_out) 507 continue 508 509 if w[2] in self.seen: 510 if self.debug > 2: 511 print("seen:", w[2], file=self.debug_out) 512 continue 513 # file operations 514 if w[0] in 'ML': 515 # these are special, tread src as read and 516 # target as write 517 self.parse_path(w[2].strip("'"), cwd, 'R', w) 518 self.parse_path(w[3].strip("'"), cwd, 'W', w) 519 continue 520 elif w[0] in 'ERWS': 521 path = w[2] 522 if path == '.': 523 continue 524 self.parse_path(path, cwd, w[0], w) 525 526 assert(version > 0) 527 if not file: 528 f.close() 529 530 def is_src(self, base, dir, rdir): 531 """is base in srctop""" 532 for dir in [dir,rdir]: 533 if not dir: 534 continue 535 path = '/'.join([dir,base]) 536 srctop = self.find_top(path, self.srctops) 537 if srctop: 538 if self.dpdeps: 539 self.add(self.file_deps, path.replace(srctop,''), 'file') 540 self.add(self.src_deps, dir.replace(srctop,''), 'src') 541 self.seenit(dir) 542 return True 543 return False 544 545 def parse_path(self, path, cwd, op=None, w=[]): 546 """look at a path for the op specified""" 547 548 if not op: 549 op = w[0] 550 551 # we are never interested in .dirdep files as dependencies 552 if path.endswith('.dirdep'): 553 return 554 for p in self.excludes: 555 if p and path.startswith(p): 556 if self.debug > 2: 557 print("exclude:", p, path, file=self.debug_out) 558 return 559 # we don't want to resolve the last component if it is 560 # a symlink 561 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out) 562 if not path: 563 return 564 dir,base = os.path.split(path) 565 if dir in self.seen: 566 if self.debug > 2: 567 print("seen:", dir, file=self.debug_out) 568 return 569 # we can have a path in an objdir which is a link 570 # to the src dir, we may need to add dependencies for each 571 rdir = dir 572 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out) 573 if dir: 574 rdir = os.path.realpath(dir) 575 else: 576 dir = rdir 577 if rdir == dir: 578 rdir = None 579 # now put path back together 580 path = '/'.join([dir,base]) 581 if self.debug > 1: 582 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out) 583 if op in 'RWS': 584 if path in [self.last_dir, cwd, self.cwd, self.curdir]: 585 if self.debug > 1: 586 print("skipping:", path, file=self.debug_out) 587 return 588 if os.path.isdir(path): 589 if op in 'RW': 590 self.last_dir = path; 591 if self.debug > 1: 592 print("ldir=", self.last_dir, file=self.debug_out) 593 return 594 595 if op in 'ER': 596 # finally, we get down to it 597 if dir == self.cwd or dir == self.curdir: 598 return 599 if self.is_src(base, dir, rdir): 600 self.seenit(w[2]) 601 if not rdir: 602 return 603 604 objroot = None 605 for dir in [dir,rdir]: 606 if not dir: 607 continue 608 objroot = self.find_top(dir, self.objroots) 609 if objroot: 610 break 611 if objroot: 612 ddep = self.find_obj(objroot, dir, path, w[2]) 613 if ddep: 614 self.add(self.obj_deps, ddep, 'obj') 615 if self.dpdeps and objroot.endswith('/stage/'): 616 sp = '/'.join(path.replace(objroot,'').split('/')[1:]) 617 self.add(self.file_deps, sp, 'file') 618 else: 619 # don't waste time looking again 620 self.seenit(w[2]) 621 self.seenit(dir) 622 623 624def main(argv, klass=MetaFile, xopts='', xoptf=None): 625 """Simple driver for class MetaFile. 626 627 Usage: 628 script [options] [key=value ...] "meta" ... 629 630 Options and key=value pairs contribute to the 631 dictionary passed to MetaFile. 632 633 -S "SRCTOP" 634 add "SRCTOP" to the "SRCTOPS" list. 635 636 -C "CURDIR" 637 638 -O "OBJROOT" 639 add "OBJROOT" to the "OBJROOTS" list. 640 641 -m "MACHINE" 642 643 -a "MACHINE_ARCH" 644 645 -H "HOST_TARGET" 646 647 -D "DPDEPS" 648 649 -d bumps debug level 650 651 """ 652 import getopt 653 654 # import Psyco if we can 655 # it can speed things up quite a bit 656 have_psyco = 0 657 try: 658 import psyco 659 psyco.full() 660 have_psyco = 1 661 except: 662 pass 663 664 conf = { 665 'SRCTOPS': [], 666 'OBJROOTS': [], 667 'EXCLUDES': [], 668 } 669 670 try: 671 machine = os.environ['MACHINE'] 672 if machine: 673 conf['MACHINE'] = machine 674 machine_arch = os.environ['MACHINE_ARCH'] 675 if machine_arch: 676 conf['MACHINE_ARCH'] = machine_arch 677 srctop = os.environ['SB_SRC'] 678 if srctop: 679 conf['SRCTOPS'].append(srctop) 680 objroot = os.environ['SB_OBJROOT'] 681 if objroot: 682 conf['OBJROOTS'].append(objroot) 683 except: 684 pass 685 686 debug = 0 687 output = True 688 689 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts) 690 for o, a in opts: 691 if o == '-a': 692 conf['MACHINE_ARCH'] = a 693 elif o == '-d': 694 debug += 1 695 elif o == '-q': 696 output = False 697 elif o == '-H': 698 conf['HOST_TARGET'] = a 699 elif o == '-S': 700 if a not in conf['SRCTOPS']: 701 conf['SRCTOPS'].append(a) 702 elif o == '-C': 703 conf['CURDIR'] = a 704 elif o == '-O': 705 if a not in conf['OBJROOTS']: 706 conf['OBJROOTS'].append(a) 707 elif o == '-R': 708 conf['RELDIR'] = a 709 elif o == '-D': 710 conf['DPDEPS'] = a 711 elif o == '-m': 712 conf['MACHINE'] = a 713 elif o == '-T': 714 conf['TARGET_SPEC'] = a 715 elif o == '-X': 716 if a not in conf['EXCLUDES']: 717 conf['EXCLUDES'].append(a) 718 elif xoptf: 719 xoptf(o, a, conf) 720 721 conf['debug'] = debug 722 723 # get any var=val assignments 724 eaten = [] 725 for a in args: 726 if a.find('=') > 0: 727 k,v = a.split('=') 728 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 729 if k == 'SRCTOP': 730 k = 'SRCTOPS' 731 elif k == 'OBJROOT': 732 k = 'OBJROOTS' 733 if v not in conf[k]: 734 conf[k].append(v) 735 else: 736 conf[k] = v 737 eaten.append(a) 738 continue 739 break 740 741 for a in eaten: 742 args.remove(a) 743 744 debug_out = conf.get('debug_out', sys.stderr) 745 746 if debug: 747 print("config:", file=debug_out) 748 print("psyco=", have_psyco, file=debug_out) 749 for k,v in list(conf.items()): 750 print("%s=%s" % (k,v), file=debug_out) 751 752 m = None 753 for a in args: 754 if a.endswith('.meta'): 755 if not os.path.exists(a): 756 continue 757 m = klass(a, conf) 758 elif a.startswith('@'): 759 # there can actually multiple files per line 760 for line in open(a[1:]): 761 for f in line.strip().split(): 762 if not os.path.exists(f): 763 continue 764 m = klass(f, conf) 765 766 if output and m: 767 print(m.dirdeps()) 768 769 print(m.src_dirdeps('\nsrc:')) 770 771 dpdeps = conf.get('DPDEPS') 772 if dpdeps: 773 m.file_depends(open(dpdeps, 'w')) 774 775 return m 776 777if __name__ == '__main__': 778 try: 779 main(sys.argv) 780 except: 781 # yes, this goes to stdout 782 print("ERROR: ", sys.exc_info()[1]) 783 raise 784 785