1#!/usr/bin/env python 2 3""" 4This script parses each "meta" file and extracts the 5information needed to deduce build and src dependencies. 6 7It works much the same as the original shell script, but is 8*much* more efficient. 9 10The parsing work is handled by the class MetaFile. 11We only pay attention to a subset of the information in the 12"meta" files. Specifically: 13 14'CWD' to initialize our notion. 15 16'C' to track chdir(2) on a per process basis 17 18'R' files read are what we really care about. 19 directories read, provide a clue to resolving 20 subsequent relative paths. That is if we cannot find 21 them relative to 'cwd', we check relative to the last 22 dir read. 23 24'W' files opened for write or read-write, 25 for filemon V3 and earlier. 26 27'E' files executed. 28 29'L' files linked 30 31'V' the filemon version, this record is used as a clue 32 that we have reached the interesting bit. 33 34""" 35 36""" 37RCSid: 38 $Id: meta2deps.py,v 1.8 2013/02/10 19:21:46 sjg Exp $ 39 40 Copyright (c) 2011-2013, Juniper Networks, Inc. 41 All rights reserved. 42 43 Redistribution and use in source and binary forms, with or without 44 modification, are permitted provided that the following conditions 45 are met: 46 1. Redistributions of source code must retain the above copyright 47 notice, this list of conditions and the following disclaimer. 48 2. Redistributions in binary form must reproduce the above copyright 49 notice, this list of conditions and the following disclaimer in the 50 documentation and/or other materials provided with the distribution. 51 52 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 53 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 54 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 55 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 56 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 57 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 58 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 59 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 60 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 61 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 62 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 63 64""" 65 66import os, re, sys 67 68def getv(dict, key, d=None): 69 """Lookup key in dict and return value or the supplied default.""" 70 if key in dict: 71 return dict[key] 72 return d 73 74def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 75 """ 76 Return an absolute path, resolving via cwd or last_dir if needed. 77 """ 78 if path.endswith('/.'): 79 path = path[0:-2] 80 if path[0] == '/': 81 return path 82 if path == '.': 83 return cwd 84 if path.startswith('./'): 85 return cwd + path[1:] 86 if last_dir == cwd: 87 last_dir = None 88 for d in [last_dir, cwd]: 89 if not d: 90 continue 91 p = '/'.join([d,path]) 92 if debug > 2: 93 print >> debug_out, "looking for:", p, 94 if not os.path.exists(p): 95 if debug > 2: 96 print >> debug_out, "nope" 97 p = None 98 continue 99 if debug > 2: 100 print >> debug_out, "found:", p 101 return p 102 return None 103 104def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 105 """ 106 Return an absolute path, resolving via cwd or last_dir if needed. 107 this gets called a lot, so we try to avoid calling realpath 108 until we know we have something. 109 """ 110 path = resolve(path, cwd, last_dir, debug, debug_out) 111 if path and (path.find('./') > 0 or 112 path.endswith('/..') or 113 os.path.islink(path)): 114 return os.path.realpath(path) 115 return path 116 117def sort_unique(list, cmp=None, key=None, reverse=False): 118 list.sort(cmp, key, reverse) 119 nl = [] 120 le = None 121 for e in list: 122 if e == le: 123 continue 124 nl.append(e) 125 return nl 126 127class MetaFile: 128 """class to parse meta files generated by bmake.""" 129 130 conf = None 131 dirdep_re = None 132 host_target = None 133 srctops = [] 134 objroots = [] 135 136 seen = {} 137 obj_deps = [] 138 src_deps = [] 139 file_deps = [] 140 141 def __init__(self, name, conf={}): 142 """if name is set we will parse it now. 143 conf can have the follwing keys: 144 145 SRCTOPS list of tops of the src tree(s). 146 147 CURDIR the src directory 'bmake' was run from. 148 149 RELDIR the relative path from SRCTOP to CURDIR 150 151 MACHINE the machine we built for. 152 set to 'none' if we are not cross-building. 153 154 HOST_TARGET 155 when we build for the psuedo machine 'host' 156 the object tree uses HOST_TARGET rather than MACHINE. 157 158 OBJROOTS a list of the common prefix for all obj dirs it might 159 end in '/' or '-'. 160 161 DPDEPS names an optional file to which per file dependencies 162 will be appended. 163 For example if 'some/path/foo.h' is read from SRCTOP 164 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 165 This can allow 'bmake' to learn all the dirs within 166 the tree that depend on 'foo.h' 167 168 debug desired debug level 169 170 debug_out open file to send debug output to (sys.stderr) 171 172 """ 173 174 self.name = name 175 self.debug = getv(conf, 'debug', 0) 176 self.debug_out = getv(conf, 'debug_out', sys.stderr) 177 178 self.machine = getv(conf, 'MACHINE', '') 179 180 if not self.conf: 181 # some of the steps below we want to do only once 182 self.conf = conf 183 self.host_target = getv(conf, 'HOST_TARGET') 184 for srctop in getv(conf, 'SRCTOPS', []): 185 if srctop[-1] != '/': 186 srctop += '/' 187 if not srctop in self.srctops: 188 self.srctops.append(srctop) 189 _srctop = os.path.realpath(srctop) 190 if _srctop[-1] != '/': 191 _srctop += '/' 192 if not _srctop in self.srctops: 193 self.srctops.append(_srctop) 194 195 for objroot in getv(conf, 'OBJROOTS', []): 196 for e in ['/' + self.machine, '/' + self.machine + '/']: 197 if objroot.endswith(e): 198 # this is not what we want - fix it 199 objroot = objroot[0:-len(e)] 200 if e.endswith('/'): 201 objroot += '/' 202 if not objroot in self.objroots: 203 self.objroots.append(objroot) 204 _objroot = os.path.realpath(objroot) 205 if objroot[-1] == '/': 206 _objroot += '/' 207 if not _objroot in self.objroots: 208 self.objroots.append(_objroot) 209 210 # we want the longest match 211 self.srctops.sort(reverse=True) 212 self.objroots.sort(reverse=True) 213 214 if self.debug: 215 print >> self.debug_out, "host_target=", self.host_target 216 print >> self.debug_out, "srctops=", self.srctops 217 print >> self.debug_out, "objroots=", self.objroots 218 219 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 220 221 self.curdir = getv(conf, 'CURDIR') 222 self.reldir = getv(conf, 'RELDIR') 223 self.dpdeps = getv(conf, 'DPDEPS') 224 if self.dpdeps and not self.reldir: 225 if self.debug: 226 print >> self.debug_out, "need reldir:", 227 if self.curdir: 228 srctop = self.find_top(self.curdir, self.srctops) 229 if srctop: 230 self.reldir = self.curdir.replace(srctop,'') 231 if self.debug: 232 print >> self.debug_out, self.reldir 233 if not self.reldir: 234 self.dpdeps = None # we cannot do it? 235 236 if name: 237 self.parse() 238 239 def reset(self): 240 """reset state if we are being passed meta files from multiple directories.""" 241 self.seen = {} 242 self.obj_deps = [] 243 self.src_deps = [] 244 self.file_deps = [] 245 246 def dirdeps(self, sep='\n'): 247 """return DIRDEPS""" 248 return sep.strip() + sep.join(self.obj_deps) 249 250 def src_dirdeps(self, sep='\n'): 251 """return SRC_DIRDEPS""" 252 return sep.strip() + sep.join(self.src_deps) 253 254 def file_depends(self, out=None): 255 """Append DPDEPS_${file} += ${RELDIR} 256 for each file we saw, to the output file.""" 257 if not self.reldir: 258 return None 259 for f in sort_unique(self.file_deps): 260 print >> out, 'DPDEPS_%s += %s' % (f, self.reldir) 261 262 def seenit(self, dir): 263 """rememer that we have seen dir.""" 264 self.seen[dir] = 1 265 266 def add(self, list, data, clue=''): 267 """add data to list if it isn't already there.""" 268 if data not in list: 269 list.append(data) 270 if self.debug: 271 print >> self.debug_out, "%s: %sAdd: %s" % (self.name, clue, data) 272 273 def find_top(self, path, list): 274 """the logical tree may be split accross multiple trees""" 275 for top in list: 276 if path.startswith(top): 277 if self.debug > 2: 278 print >> self.debug_out, "found in", top 279 return top 280 return None 281 282 def find_obj(self, objroot, dir, path, input): 283 """return path within objroot, taking care of .dirdep files""" 284 ddep = None 285 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 286 if not ddep and os.path.exists(ddepf): 287 ddep = open(ddepf, 'rb').readline().strip('# \n') 288 if self.debug > 1: 289 print >> self.debug_out, "found %s: %s\n" % (ddepf, ddep) 290 if ddep.endswith(self.machine): 291 ddep = ddep[0:-(1+len(self.machine))] 292 293 if not ddep: 294 # no .dirdeps, so remember that we've seen the raw input 295 self.seenit(input) 296 self.seenit(dir) 297 if self.machine == 'none': 298 if dir.startswith(objroot): 299 return dir.replace(objroot,'') 300 return None 301 m = self.dirdep_re.match(dir.replace(objroot,'')) 302 if m: 303 ddep = m.group(2) 304 dmachine = m.group(1) 305 if dmachine != self.machine: 306 if not (self.machine == 'host' and 307 dmachine == self.host_target): 308 if self.debug > 2: 309 print >> self.debug_out, "adding .%s to %s" % (dmachine, ddep) 310 ddep += '.' + dmachine 311 312 return ddep 313 314 def parse(self, name=None, file=None): 315 """A meta file looks like: 316 317 # Meta data file "path" 318 CMD "command-line" 319 CWD "cwd" 320 TARGET "target" 321 -- command output -- 322 -- filemon acquired metadata -- 323 # buildmon version 3 324 V 3 325 C "pid" "cwd" 326 E "pid" "path" 327 F "pid" "child" 328 R "pid" "path" 329 W "pid" "path" 330 X "pid" "status" 331 D "pid" "path" 332 L "pid" "src" "target" 333 M "pid" "old" "new" 334 S "pid" "path" 335 # Bye bye 336 337 We go to some effort to avoid processing a dependency more than once. 338 Of the above record types only C,E,F,L,R,V and W are of interest. 339 """ 340 341 version = 0 # unknown 342 if name: 343 self.name = name; 344 if file: 345 f = file 346 cwd = last_dir = self.cwd 347 else: 348 f = open(self.name, 'rb') 349 skip = True 350 pid_cwd = {} 351 pid_last_dir = {} 352 last_pid = 0 353 354 if self.curdir: 355 self.seenit(self.curdir) # we ignore this 356 357 interesting = 'CEFLRV' 358 for line in f: 359 # ignore anything we don't care about 360 if not line[0] in interesting: 361 continue 362 if self.debug > 2: 363 print >> self.debug_out, "input:", line, 364 w = line.split() 365 366 if skip: 367 if w[0] == 'V': 368 skip = False 369 version = int(w[1]) 370 """ 371 if version < 4: 372 # we cannot ignore 'W' records 373 # as they may be 'rw' 374 interesting += 'W' 375 """ 376 elif w[0] == 'CWD': 377 self.cwd = cwd = last_dir = w[1] 378 self.seenit(cwd) # ignore this 379 if self.debug: 380 print >> self.debug_out, "%s: CWD=%s" % (self.name, cwd) 381 continue 382 383 pid = int(w[1]) 384 if pid != last_pid: 385 if last_pid: 386 pid_cwd[last_pid] = cwd 387 pid_last_dir[last_pid] = last_dir 388 cwd = getv(pid_cwd, pid, self.cwd) 389 last_dir = getv(pid_last_dir, pid, self.cwd) 390 last_pid = pid 391 392 # process operations 393 if w[0] == 'F': 394 npid = int(w[2]) 395 pid_cwd[npid] = cwd 396 pid_last_dir[npid] = cwd 397 last_pid = npid 398 continue 399 elif w[0] == 'C': 400 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 401 if cwd.endswith('/.'): 402 cwd = cwd[0:-2] 403 last_dir = cwd 404 if self.debug > 1: 405 print >> self.debug_out, "cwd=", cwd 406 continue 407 408 if w[2] in self.seen: 409 if self.debug > 2: 410 print >> self.debug_out, "seen:", w[2] 411 continue 412 # file operations 413 if w[0] in 'ML': 414 path = w[2].strip("'") 415 else: 416 path = w[2] 417 # we are never interested in .dirdep files as dependencies 418 if path.endswith('.dirdep'): 419 continue 420 # we don't want to resolve the last component if it is 421 # a symlink 422 path = resolve(path, cwd, last_dir, self.debug, self.debug_out) 423 if not path: 424 continue 425 dir,base = os.path.split(path) 426 if dir in self.seen: 427 if self.debug > 2: 428 print >> self.debug_out, "seen:", dir 429 continue 430 # we can have a path in an objdir which is a link 431 # to the src dir, we may need to add dependencies for each 432 rdir = dir 433 dir = abspath(dir, cwd, last_dir, self.debug, self.debug_out) 434 if rdir == dir or rdir.find('./') > 0: 435 rdir = None 436 # now put path back together 437 path = '/'.join([dir,base]) 438 if self.debug > 1: 439 print >> self.debug_out, "raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path) 440 if w[0] in 'SRWL': 441 if w[0] == 'W' and path.endswith('.dirdep'): 442 continue 443 if path in [last_dir, cwd, self.cwd, self.curdir]: 444 if self.debug > 1: 445 print >> self.debug_out, "skipping:", path 446 continue 447 if os.path.isdir(path): 448 if w[0] in 'RW': 449 last_dir = path; 450 if self.debug > 1: 451 print >> self.debug_out, "ldir=", last_dir 452 continue 453 454 if w[0] in 'REWML': 455 # finally, we get down to it 456 if dir == self.cwd or dir == self.curdir: 457 continue 458 srctop = self.find_top(path, self.srctops) 459 if srctop: 460 if self.dpdeps: 461 self.add(self.file_deps, path.replace(srctop,''), 'file') 462 self.add(self.src_deps, dir.replace(srctop,''), 'src') 463 self.seenit(w[2]) 464 self.seenit(dir) 465 if rdir and not rdir.startswith(srctop): 466 dir = rdir # for below 467 rdir = None 468 else: 469 continue 470 471 objroot = None 472 for dir in [dir,rdir]: 473 if not dir: 474 continue 475 objroot = self.find_top(dir, self.objroots) 476 if objroot: 477 break 478 if objroot: 479 ddep = self.find_obj(objroot, dir, path, w[2]) 480 if ddep: 481 self.add(self.obj_deps, ddep, 'obj') 482 else: 483 # don't waste time looking again 484 self.seenit(w[2]) 485 self.seenit(dir) 486 if not file: 487 f.close() 488 489 490def main(argv, klass=MetaFile, xopts='', xoptf=None): 491 """Simple driver for class MetaFile. 492 493 Usage: 494 script [options] [key=value ...] "meta" ... 495 496 Options and key=value pairs contribute to the 497 dictionary passed to MetaFile. 498 499 -S "SRCTOP" 500 add "SRCTOP" to the "SRCTOPS" list. 501 502 -C "CURDIR" 503 504 -O "OBJROOT" 505 add "OBJROOT" to the "OBJROOTS" list. 506 507 -m "MACHINE" 508 509 -H "HOST_TARGET" 510 511 -D "DPDEPS" 512 513 -d bumps debug level 514 515 """ 516 import getopt 517 518 # import Psyco if we can 519 # it can speed things up quite a bit 520 have_psyco = 0 521 try: 522 import psyco 523 psyco.full() 524 have_psyco = 1 525 except: 526 pass 527 528 conf = { 529 'SRCTOPS': [], 530 'OBJROOTS': [], 531 } 532 533 try: 534 machine = os.environ['MACHINE'] 535 if machine: 536 conf['MACHINE'] = machine 537 srctop = os.environ['SB_SRC'] 538 if srctop: 539 conf['SRCTOPS'].append(srctop) 540 objroot = os.environ['SB_OBJROOT'] 541 if objroot: 542 conf['OBJROOTS'].append(objroot) 543 except: 544 pass 545 546 debug = 0 547 output = True 548 549 opts, args = getopt.getopt(argv[1:], 'dS:C:O:R:m:D:H:q' + xopts) 550 for o, a in opts: 551 if o == '-d': 552 debug += 1 553 elif o == '-q': 554 output = False 555 elif o == '-H': 556 conf['HOST_TARGET'] = a 557 elif o == '-S': 558 if a not in conf['SRCTOPS']: 559 conf['SRCTOPS'].append(a) 560 elif o == '-C': 561 conf['CURDIR'] = a 562 elif o == '-O': 563 if a not in conf['OBJROOTS']: 564 conf['OBJROOTS'].append(a) 565 elif o == '-R': 566 conf['RELDIR'] = a 567 elif o == '-D': 568 conf['DPDEPS'] = a 569 elif o == '-m': 570 conf['MACHINE'] = a 571 elif xoptf: 572 xoptf(o, a, conf) 573 574 conf['debug'] = debug 575 576 # get any var=val assignments 577 eaten = [] 578 for a in args: 579 if a.find('=') > 0: 580 k,v = a.split('=') 581 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 582 if k == 'SRCTOP': 583 k = 'SRCTOPS' 584 elif k == 'OBJROOT': 585 k = 'OBJROOTS' 586 if v not in conf[k]: 587 conf[k].append(v) 588 else: 589 conf[k] = v 590 eaten.append(a) 591 continue 592 break 593 594 for a in eaten: 595 args.remove(a) 596 597 debug_out = getv(conf, 'debug_out', sys.stderr) 598 599 if debug: 600 print >> debug_out, "config:" 601 print >> debug_out, "psyco=", have_psyco 602 for k,v in conf.items(): 603 print >> debug_out, "%s=%s" % (k,v) 604 605 for a in args: 606 m = klass(a, conf) 607 608 if output: 609 print m.dirdeps() 610 611 print m.src_dirdeps('\nsrc:') 612 613 dpdeps = getv(conf, 'DPDEPS') 614 if dpdeps: 615 m.file_depends(open(dpdeps, 'wb')) 616 617 return m 618 619if __name__ == '__main__': 620 try: 621 main(sys.argv) 622 except: 623 # yes, this goes to stdout 624 print "ERROR: ", sys.exc_info()[1] 625 raise 626 627