1#!/usr/bin/env python 2 3""" 4This script parses each "meta" file and extracts the 5information needed to deduce build and src dependencies. 6 7It works much the same as the original shell script, but is 8*much* more efficient. 9 10The parsing work is handled by the class MetaFile. 11We only pay attention to a subset of the information in the 12"meta" files. Specifically: 13 14'CWD' to initialize our notion. 15 16'C' to track chdir(2) on a per process basis 17 18'R' files read are what we really care about. 19 directories read, provide a clue to resolving 20 subsequent relative paths. That is if we cannot find 21 them relative to 'cwd', we check relative to the last 22 dir read. 23 24'W' files opened for write or read-write, 25 for filemon V3 and earlier. 26 27'E' files executed. 28 29'L' files linked 30 31'V' the filemon version, this record is used as a clue 32 that we have reached the interesting bit. 33 34""" 35 36""" 37RCSid: 38 $Id: meta2deps.py,v 1.5 2011/11/14 00:18:42 sjg Exp $ 39 40 Copyright (c) 2011, Juniper Networks, Inc. 41 42 Redistribution and use in source and binary forms, with or without 43 modification, are permitted provided that the following conditions 44 are met: 45 1. Redistributions of source code must retain the above copyright 46 notice, this list of conditions and the following disclaimer. 47 2. Redistributions in binary form must reproduce the above copyright 48 notice, this list of conditions and the following disclaimer in the 49 documentation and/or other materials provided with the distribution. 50 51 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 52 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 53 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 54 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 55 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 56 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 57 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 58 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 59 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 60 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 61 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 62 63""" 64 65import os, re, sys 66 67def getv(dict, key, d=None): 68 """Lookup key in dict and return value or the supplied default.""" 69 if key in dict: 70 return dict[key] 71 return d 72 73def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 74 """ 75 Return an absolute path, resolving via cwd or last_dir if needed. 76 """ 77 if path.endswith('/.'): 78 path = path[0:-2] 79 if path[0] == '/': 80 return path 81 if path == '.': 82 return cwd 83 if path.startswith('./'): 84 return cwd + path[1:] 85 if last_dir == cwd: 86 last_dir = None 87 for d in [last_dir, cwd]: 88 if not d: 89 continue 90 p = '/'.join([d,path]) 91 if debug > 2: 92 print >> debug_out, "looking for:", p, 93 if not os.path.exists(p): 94 if debug > 2: 95 print >> debug_out, "nope" 96 p = None 97 continue 98 if debug > 2: 99 print >> debug_out, "found:", p 100 return p 101 return None 102 103def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 104 """ 105 Return an absolute path, resolving via cwd or last_dir if needed. 106 this gets called a lot, so we try to avoid calling realpath 107 until we know we have something. 108 """ 109 path = resolve(path, cwd, last_dir, debug, debug_out) 110 if path and (path.find('./') > 0 or 111 path.endswith('/..') or 112 os.path.islink(path)): 113 return os.path.realpath(path) 114 return path 115 116def sort_unique(list, cmp=None, key=None, reverse=False): 117 list.sort(cmp, key, reverse) 118 nl = [] 119 le = None 120 for e in list: 121 if e == le: 122 continue 123 nl.append(e) 124 return nl 125 126class MetaFile: 127 """class to parse meta files generated by bmake.""" 128 129 conf = None 130 dirdep_re = None 131 host_target = None 132 srctops = [] 133 objroots = [] 134 135 seen = {} 136 obj_deps = [] 137 src_deps = [] 138 file_deps = [] 139 140 def __init__(self, name, conf={}): 141 """if name is set we will parse it now. 142 conf can have the follwing keys: 143 144 SRCTOPS list of tops of the src tree(s). 145 146 CURDIR the src directory 'bmake' was run from. 147 148 RELDIR the relative path from SRCTOP to CURDIR 149 150 MACHINE the machine we built for. 151 set to 'none' if we are not cross-building. 152 153 HOST_TARGET 154 when we build for the psuedo machine 'host' 155 the object tree uses HOST_TARGET rather than MACHINE. 156 157 OBJROOTS a list of the common prefix for all obj dirs it might 158 end in '/' or '-'. 159 160 DPDEPS names an optional file to which per file dependencies 161 will be appended. 162 For example if 'some/path/foo.h' is read from SRCTOP 163 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 164 This can allow 'bmake' to learn all the dirs within 165 the tree that depend on 'foo.h' 166 167 debug desired debug level 168 169 debug_out open file to send debug output to (sys.stderr) 170 171 """ 172 173 self.name = name 174 self.debug = getv(conf, 'debug', 0) 175 self.debug_out = getv(conf, 'debug_out', sys.stderr) 176 177 if not self.conf: 178 # some of the steps below we want to do only once 179 self.conf = conf 180 self.host_target = getv(conf, 'HOST_TARGET') 181 for srctop in getv(conf, 'SRCTOPS', []): 182 if srctop[-1] != '/': 183 srctop += '/' 184 if not srctop in self.srctops: 185 self.srctops.append(srctop) 186 187 for objroot in getv(conf, 'OBJROOTS', []): 188 if not objroot in self.objroots: 189 self.objroots.append(objroot) 190 _objroot = os.path.realpath(objroot) 191 if objroot[-1] == '/': 192 _objroot += '/' 193 if not _objroot in self.objroots: 194 self.objroots.append(_objroot) 195 196 if self.debug: 197 print >> self.debug_out, "host_target=", self.host_target 198 print >> self.debug_out, "srctops=", self.srctops 199 print >> self.debug_out, "objroots=", self.objroots 200 201 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 202 203 self.curdir = getv(conf, 'CURDIR') 204 self.machine = getv(conf, 'MACHINE', '') 205 self.reldir = getv(conf, 'RELDIR') 206 self.dpdeps = getv(conf, 'DPDEPS') 207 if self.dpdeps and not self.reldir: 208 if self.debug: 209 print >> self.debug_out, "need reldir:", 210 if self.curdir: 211 srctop = self.find_top(self.curdir, self.srctops) 212 if srctop: 213 self.reldir = self.curdir.replace(srctop,'') 214 if self.debug: 215 print >> self.debug_out, self.reldir 216 if not self.reldir: 217 self.dpdeps = None # we cannot do it? 218 219 if name: 220 self.parse() 221 222 def reset(self): 223 """reset state if we are being passed meta files from multiple directories.""" 224 self.seen = {} 225 self.obj_deps = [] 226 self.src_deps = [] 227 self.file_deps = [] 228 229 def dirdeps(self, sep='\n'): 230 """return DIRDEPS""" 231 return sep.strip() + sep.join(self.obj_deps) 232 233 def src_dirdeps(self, sep='\n'): 234 """return SRC_DIRDEPS""" 235 return sep.strip() + sep.join(self.src_deps) 236 237 def file_depends(self, out=None): 238 """Append DPDEPS_${file} += ${RELDIR} 239 for each file we saw, to the output file.""" 240 if not self.reldir: 241 return None 242 for f in sort_unique(self.file_deps): 243 print >> out, 'DPDEPS_%s += %s' % (f, self.reldir) 244 245 def seenit(self, dir): 246 """rememer that we have seen dir.""" 247 self.seen[dir] = 1 248 249 def add(self, list, data, clue=''): 250 """add data to list if it isn't already there.""" 251 if data not in list: 252 list.append(data) 253 if self.debug: 254 print >> self.debug_out, "%s: %sAdd: %s" % (self.name, clue, data) 255 256 def find_top(self, path, list): 257 """the logical tree may be split accross multiple trees""" 258 for top in list: 259 if path.startswith(top): 260 if self.debug > 2: 261 print >> self.debug_out, "found in", top 262 return top 263 return None 264 265 def find_obj(self, objroot, dir, path, input): 266 """return path within objroot, taking care of .dirdep files""" 267 ddep = None 268 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 269 if not ddep and os.path.exists(ddepf): 270 ddep = open(ddepf, 'rb').readline().strip('# \n') 271 if self.debug > 1: 272 print >> self.debug_out, "found %s: %s\n" % (ddepf, ddep) 273 if ddep.endswith(self.machine): 274 ddep = ddep[0:-(1+len(self.machine))] 275 276 if not ddep: 277 # no .dirdeps, so remember that we've seen the raw input 278 self.seenit(input) 279 self.seenit(dir) 280 if self.machine == 'none': 281 if dir.startswith(objroot): 282 return dir.replace(objroot,'') 283 return None 284 m = self.dirdep_re.match(dir.replace(objroot,'')) 285 if m: 286 ddep = m.group(2) 287 dmachine = m.group(1) 288 if dmachine != self.machine: 289 if not (self.machine == 'host' and 290 dmachine == self.host_target): 291 if self.debug > 2: 292 print >> self.debug_out, "adding .%s to %s" % (dmachine, ddep) 293 ddep += '.' + dmachine 294 295 return ddep 296 297 def parse(self, name=None, file=None): 298 """A meta file looks like: 299 300 # Meta data file "path" 301 CMD "command-line" 302 CWD "cwd" 303 TARGET "target" 304 -- command output -- 305 -- filemon acquired metadata -- 306 # buildmon version 3 307 V 3 308 C "pid" "cwd" 309 E "pid" "path" 310 F "pid" "child" 311 R "pid" "path" 312 W "pid" "path" 313 X "pid" "status" 314 D "pid" "path" 315 L "pid" "src" "target" 316 M "pid" "old" "new" 317 S "pid" "path" 318 # Bye bye 319 320 We go to some effort to avoid processing a dependency more than once. 321 Of the above record types only C,E,F,L,R,V and W are of interest. 322 """ 323 324 version = 0 # unknown 325 if name: 326 self.name = name; 327 if file: 328 f = file 329 cwd = last_dir = self.cwd 330 else: 331 f = open(self.name, 'rb') 332 skip = True 333 pid_cwd = {} 334 pid_last_dir = {} 335 last_pid = 0 336 337 if self.curdir: 338 self.seenit(self.curdir) # we ignore this 339 340 interesting = 'CEFLRV' 341 for line in f: 342 # ignore anything we don't care about 343 if not line[0] in interesting: 344 continue 345 if self.debug > 2: 346 print >> self.debug_out, "input:", line, 347 w = line.split() 348 349 if skip: 350 if w[0] == 'V': 351 skip = False 352 version = int(w[1]) 353 """ 354 if version < 4: 355 # we cannot ignore 'W' records 356 # as they may be 'rw' 357 interesting += 'W' 358 """ 359 elif w[0] == 'CWD': 360 self.cwd = cwd = last_dir = w[1] 361 self.seenit(cwd) # ignore this 362 if self.debug: 363 print >> self.debug_out, "%s: CWD=%s" % (self.name, cwd) 364 continue 365 366 pid = int(w[1]) 367 if pid != last_pid: 368 if last_pid: 369 pid_cwd[last_pid] = cwd 370 pid_last_dir[last_pid] = last_dir 371 cwd = getv(pid_cwd, pid, self.cwd) 372 last_dir = getv(pid_last_dir, pid, self.cwd) 373 last_pid = pid 374 375 # process operations 376 if w[0] == 'F': 377 npid = int(w[2]) 378 pid_cwd[npid] = cwd 379 pid_last_dir[npid] = cwd 380 last_pid = npid 381 continue 382 elif w[0] == 'C': 383 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 384 if cwd.endswith('/.'): 385 cwd = cwd[0:-2] 386 last_dir = cwd 387 if self.debug > 1: 388 print >> self.debug_out, "cwd=", cwd 389 continue 390 391 if w[2] in self.seen: 392 if self.debug > 2: 393 print >> self.debug_out, "seen:", w[2] 394 continue 395 # file operations 396 if w[0] in 'ML': 397 path = w[2].strip("'") 398 else: 399 path = w[2] 400 # we don't want to resolve the last component if it is 401 # a symlink 402 path = resolve(path, cwd, last_dir, self.debug, self.debug_out) 403 if not path: 404 continue 405 dir,base = os.path.split(path) 406 if dir in self.seen: 407 if self.debug > 2: 408 print >> self.debug_out, "seen:", dir 409 continue 410 # we can have a path in an objdir which is a link 411 # to the src dir, we may need to add dependencies for each 412 rdir = dir 413 dir = abspath(dir, cwd, last_dir, self.debug, self.debug_out) 414 if rdir == dir or rdir.find('./') > 0: 415 rdir = None 416 # now put path back together 417 path = '/'.join([dir,base]) 418 if self.debug > 1: 419 print >> self.debug_out, "raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path) 420 if w[0] in 'SRWL': 421 if w[0] == 'W' and path.endswith('.dirdep'): 422 continue 423 if path in [last_dir, cwd, self.cwd, self.curdir]: 424 if self.debug > 1: 425 print >> self.debug_out, "skipping:", path 426 continue 427 if os.path.isdir(path): 428 if w[0] in 'RW': 429 last_dir = path; 430 if self.debug > 1: 431 print >> self.debug_out, "ldir=", last_dir 432 continue 433 434 if w[0] in 'REWML': 435 # finally, we get down to it 436 if dir == self.cwd or dir == self.curdir: 437 continue 438 srctop = self.find_top(path, self.srctops) 439 if srctop: 440 if self.dpdeps: 441 self.add(self.file_deps, path.replace(srctop,''), 'file') 442 self.add(self.src_deps, dir.replace(srctop,''), 'src') 443 self.seenit(w[2]) 444 self.seenit(dir) 445 if rdir and not rdir.startswith(srctop): 446 dir = rdir # for below 447 rdir = None 448 else: 449 continue 450 451 objroot = None 452 for dir in [dir,rdir]: 453 if not dir: 454 continue 455 objroot = self.find_top(dir, self.objroots) 456 if objroot: 457 break 458 if objroot: 459 ddep = self.find_obj(objroot, dir, path, w[2]) 460 if ddep: 461 self.add(self.obj_deps, ddep, 'obj') 462 else: 463 # don't waste time looking again 464 self.seenit(w[2]) 465 self.seenit(dir) 466 if not file: 467 f.close() 468 469 470def main(argv, klass=MetaFile, xopts='', xoptf=None): 471 """Simple driver for class MetaFile. 472 473 Usage: 474 script [options] [key=value ...] "meta" ... 475 476 Options and key=value pairs contribute to the 477 dictionary passed to MetaFile. 478 479 -S "SRCTOP" 480 add "SRCTOP" to the "SRCTOPS" list. 481 482 -C "CURDIR" 483 484 -O "OBJROOT" 485 add "OBJROOT" to the "OBJROOTS" list. 486 487 -m "MACHINE" 488 489 -H "HOST_TARGET" 490 491 -D "DPDEPS" 492 493 -d bumps debug level 494 495 """ 496 import getopt 497 498 # import Psyco if we can 499 # it can speed things up quite a bit 500 have_psyco = 0 501 try: 502 import psyco 503 psyco.full() 504 have_psyco = 1 505 except: 506 pass 507 508 conf = { 509 'SRCTOPS': [], 510 'OBJROOTS': [], 511 } 512 513 try: 514 machine = os.environ['MACHINE'] 515 if machine: 516 conf['MACHINE'] = machine 517 srctop = os.environ['SB_SRC'] 518 if srctop: 519 conf['SRCTOPS'].append(srctop) 520 objroot = os.environ['SB_OBJROOT'] 521 if objroot: 522 conf['OBJROOTS'].append(objroot) 523 except: 524 pass 525 526 debug = 0 527 output = True 528 529 opts, args = getopt.getopt(argv[1:], 'dS:C:O:R:m:D:H:q' + xopts) 530 for o, a in opts: 531 if o == '-d': 532 debug += 1 533 elif o == '-q': 534 output = False 535 elif o == '-H': 536 conf['HOST_TARGET'] = a 537 elif o == '-S': 538 if a not in conf['SRCTOPS']: 539 conf['SRCTOPS'].append(a) 540 elif o == '-C': 541 conf['CURDIR'] = a 542 elif o == '-O': 543 if a not in conf['OBJROOTS']: 544 conf['OBJROOTS'].append(a) 545 elif o == '-R': 546 conf['RELDIR'] = a 547 elif o == '-D': 548 conf['DPDEPS'] = a 549 elif o == '-m': 550 conf['MACHINE'] = a 551 elif xoptf: 552 xoptf(o, a, conf) 553 554 conf['debug'] = debug 555 556 # get any var=val assignments 557 eaten = [] 558 for a in args: 559 if a.find('=') > 0: 560 k,v = a.split('=') 561 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 562 if k == 'SRCTOP': 563 k = 'SRCTOPS' 564 elif k == 'OBJROOT': 565 k = 'OBJROOTS' 566 if v not in conf[k]: 567 conf[k].append(v) 568 else: 569 conf[k] = v 570 eaten.append(a) 571 continue 572 break 573 574 for a in eaten: 575 args.remove(a) 576 577 debug_out = getv(conf, 'debug_out', sys.stderr) 578 579 if debug: 580 print >> debug_out, "config:" 581 print >> debug_out, "psyco=", have_psyco 582 for k,v in conf.items(): 583 print >> debug_out, "%s=%s" % (k,v) 584 585 for a in args: 586 m = klass(a, conf) 587 588 if output: 589 print m.dirdeps() 590 591 print m.src_dirdeps('\nsrc:') 592 593 dpdeps = getv(conf, 'DPDEPS') 594 if dpdeps: 595 m.file_depends(open(dpdeps, 'wb')) 596 597 return m 598 599if __name__ == '__main__': 600 try: 601 main(sys.argv) 602 except: 603 # yes, this goes to stdout 604 print "ERROR: ", sys.exc_info()[1] 605 raise 606 607