1#!/usr/bin/env python 2 3""" 4This script parses each "meta" file and extracts the 5information needed to deduce build and src dependencies. 6 7It works much the same as the original shell script, but is 8*much* more efficient. 9 10The parsing work is handled by the class MetaFile. 11We only pay attention to a subset of the information in the 12"meta" files. Specifically: 13 14'CWD' to initialize our notion. 15 16'C' to track chdir(2) on a per process basis 17 18'R' files read are what we really care about. 19 directories read, provide a clue to resolving 20 subsequent relative paths. That is if we cannot find 21 them relative to 'cwd', we check relative to the last 22 dir read. 23 24'W' files opened for write or read-write, 25 for filemon V3 and earlier. 26 27'E' files executed. 28 29'L' files linked 30 31'V' the filemon version, this record is used as a clue 32 that we have reached the interesting bit. 33 34""" 35 36""" 37RCSid: 38 $Id: meta2deps.py,v 1.7 2012/11/06 05:44:03 sjg Exp $ 39 40 Copyright (c) 2011, Juniper Networks, Inc. 41 42 Redistribution and use in source and binary forms, with or without 43 modification, are permitted provided that the following conditions 44 are met: 45 1. Redistributions of source code must retain the above copyright 46 notice, this list of conditions and the following disclaimer. 47 2. Redistributions in binary form must reproduce the above copyright 48 notice, this list of conditions and the following disclaimer in the 49 documentation and/or other materials provided with the distribution. 50 51 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 52 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 53 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 54 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 55 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 56 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 57 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 58 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 59 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 60 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 61 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 62 63""" 64 65import os, re, sys 66 67def getv(dict, key, d=None): 68 """Lookup key in dict and return value or the supplied default.""" 69 if key in dict: 70 return dict[key] 71 return d 72 73def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 74 """ 75 Return an absolute path, resolving via cwd or last_dir if needed. 76 """ 77 if path.endswith('/.'): 78 path = path[0:-2] 79 if path[0] == '/': 80 return path 81 if path == '.': 82 return cwd 83 if path.startswith('./'): 84 return cwd + path[1:] 85 if last_dir == cwd: 86 last_dir = None 87 for d in [last_dir, cwd]: 88 if not d: 89 continue 90 p = '/'.join([d,path]) 91 if debug > 2: 92 print >> debug_out, "looking for:", p, 93 if not os.path.exists(p): 94 if debug > 2: 95 print >> debug_out, "nope" 96 p = None 97 continue 98 if debug > 2: 99 print >> debug_out, "found:", p 100 return p 101 return None 102 103def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 104 """ 105 Return an absolute path, resolving via cwd or last_dir if needed. 106 this gets called a lot, so we try to avoid calling realpath 107 until we know we have something. 108 """ 109 path = resolve(path, cwd, last_dir, debug, debug_out) 110 if path and (path.find('./') > 0 or 111 path.endswith('/..') or 112 os.path.islink(path)): 113 return os.path.realpath(path) 114 return path 115 116def sort_unique(list, cmp=None, key=None, reverse=False): 117 list.sort(cmp, key, reverse) 118 nl = [] 119 le = None 120 for e in list: 121 if e == le: 122 continue 123 nl.append(e) 124 return nl 125 126class MetaFile: 127 """class to parse meta files generated by bmake.""" 128 129 conf = None 130 dirdep_re = None 131 host_target = None 132 srctops = [] 133 objroots = [] 134 135 seen = {} 136 obj_deps = [] 137 src_deps = [] 138 file_deps = [] 139 140 def __init__(self, name, conf={}): 141 """if name is set we will parse it now. 142 conf can have the follwing keys: 143 144 SRCTOPS list of tops of the src tree(s). 145 146 CURDIR the src directory 'bmake' was run from. 147 148 RELDIR the relative path from SRCTOP to CURDIR 149 150 MACHINE the machine we built for. 151 set to 'none' if we are not cross-building. 152 153 HOST_TARGET 154 when we build for the psuedo machine 'host' 155 the object tree uses HOST_TARGET rather than MACHINE. 156 157 OBJROOTS a list of the common prefix for all obj dirs it might 158 end in '/' or '-'. 159 160 DPDEPS names an optional file to which per file dependencies 161 will be appended. 162 For example if 'some/path/foo.h' is read from SRCTOP 163 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 164 This can allow 'bmake' to learn all the dirs within 165 the tree that depend on 'foo.h' 166 167 debug desired debug level 168 169 debug_out open file to send debug output to (sys.stderr) 170 171 """ 172 173 self.name = name 174 self.debug = getv(conf, 'debug', 0) 175 self.debug_out = getv(conf, 'debug_out', sys.stderr) 176 177 if not self.conf: 178 # some of the steps below we want to do only once 179 self.conf = conf 180 self.host_target = getv(conf, 'HOST_TARGET') 181 for srctop in getv(conf, 'SRCTOPS', []): 182 if srctop[-1] != '/': 183 srctop += '/' 184 if not srctop in self.srctops: 185 self.srctops.append(srctop) 186 _srctop = os.path.realpath(srctop) 187 if _srctop[-1] != '/': 188 _srctop += '/' 189 if not _srctop in self.srctops: 190 self.srctops.append(_srctop) 191 192 for objroot in getv(conf, 'OBJROOTS', []): 193 if not objroot in self.objroots: 194 self.objroots.append(objroot) 195 _objroot = os.path.realpath(objroot) 196 if objroot[-1] == '/': 197 _objroot += '/' 198 if not _objroot in self.objroots: 199 self.objroots.append(_objroot) 200 201 if self.debug: 202 print >> self.debug_out, "host_target=", self.host_target 203 print >> self.debug_out, "srctops=", self.srctops 204 print >> self.debug_out, "objroots=", self.objroots 205 206 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 207 208 self.curdir = getv(conf, 'CURDIR') 209 self.machine = getv(conf, 'MACHINE', '') 210 self.reldir = getv(conf, 'RELDIR') 211 self.dpdeps = getv(conf, 'DPDEPS') 212 if self.dpdeps and not self.reldir: 213 if self.debug: 214 print >> self.debug_out, "need reldir:", 215 if self.curdir: 216 srctop = self.find_top(self.curdir, self.srctops) 217 if srctop: 218 self.reldir = self.curdir.replace(srctop,'') 219 if self.debug: 220 print >> self.debug_out, self.reldir 221 if not self.reldir: 222 self.dpdeps = None # we cannot do it? 223 224 if name: 225 self.parse() 226 227 def reset(self): 228 """reset state if we are being passed meta files from multiple directories.""" 229 self.seen = {} 230 self.obj_deps = [] 231 self.src_deps = [] 232 self.file_deps = [] 233 234 def dirdeps(self, sep='\n'): 235 """return DIRDEPS""" 236 return sep.strip() + sep.join(self.obj_deps) 237 238 def src_dirdeps(self, sep='\n'): 239 """return SRC_DIRDEPS""" 240 return sep.strip() + sep.join(self.src_deps) 241 242 def file_depends(self, out=None): 243 """Append DPDEPS_${file} += ${RELDIR} 244 for each file we saw, to the output file.""" 245 if not self.reldir: 246 return None 247 for f in sort_unique(self.file_deps): 248 print >> out, 'DPDEPS_%s += %s' % (f, self.reldir) 249 250 def seenit(self, dir): 251 """rememer that we have seen dir.""" 252 self.seen[dir] = 1 253 254 def add(self, list, data, clue=''): 255 """add data to list if it isn't already there.""" 256 if data not in list: 257 list.append(data) 258 if self.debug: 259 print >> self.debug_out, "%s: %sAdd: %s" % (self.name, clue, data) 260 261 def find_top(self, path, list): 262 """the logical tree may be split accross multiple trees""" 263 for top in list: 264 if path.startswith(top): 265 if self.debug > 2: 266 print >> self.debug_out, "found in", top 267 return top 268 return None 269 270 def find_obj(self, objroot, dir, path, input): 271 """return path within objroot, taking care of .dirdep files""" 272 ddep = None 273 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 274 if not ddep and os.path.exists(ddepf): 275 ddep = open(ddepf, 'rb').readline().strip('# \n') 276 if self.debug > 1: 277 print >> self.debug_out, "found %s: %s\n" % (ddepf, ddep) 278 if ddep.endswith(self.machine): 279 ddep = ddep[0:-(1+len(self.machine))] 280 281 if not ddep: 282 # no .dirdeps, so remember that we've seen the raw input 283 self.seenit(input) 284 self.seenit(dir) 285 if self.machine == 'none': 286 if dir.startswith(objroot): 287 return dir.replace(objroot,'') 288 return None 289 m = self.dirdep_re.match(dir.replace(objroot,'')) 290 if m: 291 ddep = m.group(2) 292 dmachine = m.group(1) 293 if dmachine != self.machine: 294 if not (self.machine == 'host' and 295 dmachine == self.host_target): 296 if self.debug > 2: 297 print >> self.debug_out, "adding .%s to %s" % (dmachine, ddep) 298 ddep += '.' + dmachine 299 300 return ddep 301 302 def parse(self, name=None, file=None): 303 """A meta file looks like: 304 305 # Meta data file "path" 306 CMD "command-line" 307 CWD "cwd" 308 TARGET "target" 309 -- command output -- 310 -- filemon acquired metadata -- 311 # buildmon version 3 312 V 3 313 C "pid" "cwd" 314 E "pid" "path" 315 F "pid" "child" 316 R "pid" "path" 317 W "pid" "path" 318 X "pid" "status" 319 D "pid" "path" 320 L "pid" "src" "target" 321 M "pid" "old" "new" 322 S "pid" "path" 323 # Bye bye 324 325 We go to some effort to avoid processing a dependency more than once. 326 Of the above record types only C,E,F,L,R,V and W are of interest. 327 """ 328 329 version = 0 # unknown 330 if name: 331 self.name = name; 332 if file: 333 f = file 334 cwd = last_dir = self.cwd 335 else: 336 f = open(self.name, 'rb') 337 skip = True 338 pid_cwd = {} 339 pid_last_dir = {} 340 last_pid = 0 341 342 if self.curdir: 343 self.seenit(self.curdir) # we ignore this 344 345 interesting = 'CEFLRV' 346 for line in f: 347 # ignore anything we don't care about 348 if not line[0] in interesting: 349 continue 350 if self.debug > 2: 351 print >> self.debug_out, "input:", line, 352 w = line.split() 353 354 if skip: 355 if w[0] == 'V': 356 skip = False 357 version = int(w[1]) 358 """ 359 if version < 4: 360 # we cannot ignore 'W' records 361 # as they may be 'rw' 362 interesting += 'W' 363 """ 364 elif w[0] == 'CWD': 365 self.cwd = cwd = last_dir = w[1] 366 self.seenit(cwd) # ignore this 367 if self.debug: 368 print >> self.debug_out, "%s: CWD=%s" % (self.name, cwd) 369 continue 370 371 pid = int(w[1]) 372 if pid != last_pid: 373 if last_pid: 374 pid_cwd[last_pid] = cwd 375 pid_last_dir[last_pid] = last_dir 376 cwd = getv(pid_cwd, pid, self.cwd) 377 last_dir = getv(pid_last_dir, pid, self.cwd) 378 last_pid = pid 379 380 # process operations 381 if w[0] == 'F': 382 npid = int(w[2]) 383 pid_cwd[npid] = cwd 384 pid_last_dir[npid] = cwd 385 last_pid = npid 386 continue 387 elif w[0] == 'C': 388 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 389 if cwd.endswith('/.'): 390 cwd = cwd[0:-2] 391 last_dir = cwd 392 if self.debug > 1: 393 print >> self.debug_out, "cwd=", cwd 394 continue 395 396 if w[2] in self.seen: 397 if self.debug > 2: 398 print >> self.debug_out, "seen:", w[2] 399 continue 400 # file operations 401 if w[0] in 'ML': 402 path = w[2].strip("'") 403 else: 404 path = w[2] 405 # we are never interested in .dirdep files as dependencies 406 if path.endswith('.dirdep'): 407 continue 408 # we don't want to resolve the last component if it is 409 # a symlink 410 path = resolve(path, cwd, last_dir, self.debug, self.debug_out) 411 if not path: 412 continue 413 dir,base = os.path.split(path) 414 if dir in self.seen: 415 if self.debug > 2: 416 print >> self.debug_out, "seen:", dir 417 continue 418 # we can have a path in an objdir which is a link 419 # to the src dir, we may need to add dependencies for each 420 rdir = dir 421 dir = abspath(dir, cwd, last_dir, self.debug, self.debug_out) 422 if rdir == dir or rdir.find('./') > 0: 423 rdir = None 424 # now put path back together 425 path = '/'.join([dir,base]) 426 if self.debug > 1: 427 print >> self.debug_out, "raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path) 428 if w[0] in 'SRWL': 429 if w[0] == 'W' and path.endswith('.dirdep'): 430 continue 431 if path in [last_dir, cwd, self.cwd, self.curdir]: 432 if self.debug > 1: 433 print >> self.debug_out, "skipping:", path 434 continue 435 if os.path.isdir(path): 436 if w[0] in 'RW': 437 last_dir = path; 438 if self.debug > 1: 439 print >> self.debug_out, "ldir=", last_dir 440 continue 441 442 if w[0] in 'REWML': 443 # finally, we get down to it 444 if dir == self.cwd or dir == self.curdir: 445 continue 446 srctop = self.find_top(path, self.srctops) 447 if srctop: 448 if self.dpdeps: 449 self.add(self.file_deps, path.replace(srctop,''), 'file') 450 self.add(self.src_deps, dir.replace(srctop,''), 'src') 451 self.seenit(w[2]) 452 self.seenit(dir) 453 if rdir and not rdir.startswith(srctop): 454 dir = rdir # for below 455 rdir = None 456 else: 457 continue 458 459 objroot = None 460 for dir in [dir,rdir]: 461 if not dir: 462 continue 463 objroot = self.find_top(dir, self.objroots) 464 if objroot: 465 break 466 if objroot: 467 ddep = self.find_obj(objroot, dir, path, w[2]) 468 if ddep: 469 self.add(self.obj_deps, ddep, 'obj') 470 else: 471 # don't waste time looking again 472 self.seenit(w[2]) 473 self.seenit(dir) 474 if not file: 475 f.close() 476 477 478def main(argv, klass=MetaFile, xopts='', xoptf=None): 479 """Simple driver for class MetaFile. 480 481 Usage: 482 script [options] [key=value ...] "meta" ... 483 484 Options and key=value pairs contribute to the 485 dictionary passed to MetaFile. 486 487 -S "SRCTOP" 488 add "SRCTOP" to the "SRCTOPS" list. 489 490 -C "CURDIR" 491 492 -O "OBJROOT" 493 add "OBJROOT" to the "OBJROOTS" list. 494 495 -m "MACHINE" 496 497 -H "HOST_TARGET" 498 499 -D "DPDEPS" 500 501 -d bumps debug level 502 503 """ 504 import getopt 505 506 # import Psyco if we can 507 # it can speed things up quite a bit 508 have_psyco = 0 509 try: 510 import psyco 511 psyco.full() 512 have_psyco = 1 513 except: 514 pass 515 516 conf = { 517 'SRCTOPS': [], 518 'OBJROOTS': [], 519 } 520 521 try: 522 machine = os.environ['MACHINE'] 523 if machine: 524 conf['MACHINE'] = machine 525 srctop = os.environ['SB_SRC'] 526 if srctop: 527 conf['SRCTOPS'].append(srctop) 528 objroot = os.environ['SB_OBJROOT'] 529 if objroot: 530 conf['OBJROOTS'].append(objroot) 531 except: 532 pass 533 534 debug = 0 535 output = True 536 537 opts, args = getopt.getopt(argv[1:], 'dS:C:O:R:m:D:H:q' + xopts) 538 for o, a in opts: 539 if o == '-d': 540 debug += 1 541 elif o == '-q': 542 output = False 543 elif o == '-H': 544 conf['HOST_TARGET'] = a 545 elif o == '-S': 546 if a not in conf['SRCTOPS']: 547 conf['SRCTOPS'].append(a) 548 elif o == '-C': 549 conf['CURDIR'] = a 550 elif o == '-O': 551 if a not in conf['OBJROOTS']: 552 conf['OBJROOTS'].append(a) 553 elif o == '-R': 554 conf['RELDIR'] = a 555 elif o == '-D': 556 conf['DPDEPS'] = a 557 elif o == '-m': 558 conf['MACHINE'] = a 559 elif xoptf: 560 xoptf(o, a, conf) 561 562 conf['debug'] = debug 563 564 # get any var=val assignments 565 eaten = [] 566 for a in args: 567 if a.find('=') > 0: 568 k,v = a.split('=') 569 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 570 if k == 'SRCTOP': 571 k = 'SRCTOPS' 572 elif k == 'OBJROOT': 573 k = 'OBJROOTS' 574 if v not in conf[k]: 575 conf[k].append(v) 576 else: 577 conf[k] = v 578 eaten.append(a) 579 continue 580 break 581 582 for a in eaten: 583 args.remove(a) 584 585 debug_out = getv(conf, 'debug_out', sys.stderr) 586 587 if debug: 588 print >> debug_out, "config:" 589 print >> debug_out, "psyco=", have_psyco 590 for k,v in conf.items(): 591 print >> debug_out, "%s=%s" % (k,v) 592 593 for a in args: 594 m = klass(a, conf) 595 596 if output: 597 print m.dirdeps() 598 599 print m.src_dirdeps('\nsrc:') 600 601 dpdeps = getv(conf, 'DPDEPS') 602 if dpdeps: 603 m.file_depends(open(dpdeps, 'wb')) 604 605 return m 606 607if __name__ == '__main__': 608 try: 609 main(sys.argv) 610 except: 611 # yes, this goes to stdout 612 print "ERROR: ", sys.exc_info()[1] 613 raise 614 615