1from collections import defaultdict 2import copy 3import json 4import sys 5import pprint 6 7from constants import ( 8 GLOBAL_BLACKLIST, 9 IMPL_DEP_FILE_STR, 10 OUTPUT_FILE_STR, 11 SYSCALL_PREFIXES, 12 ListType, 13 hardcode_syscall_read_fields, 14 hardcode_syscall_write_fields, 15) 16 17class Parser(object): 18 def __init__( 19 self, 20 impl_dep_file_str=IMPL_DEP_FILE_STR, 21 output_file_str=OUTPUT_FILE_STR, 22 verbose=False, 23 pretty=False 24 ): 25 try: 26 self.impl_dep_file = file(impl_dep_file_str, 'r') 27 self.output_file = file(output_file_str + '.json', 'w+') 28 if verbose: 29 self.output_file_verbose = file(output_file_str + '_verbose.json', 'w+') 30 if pretty: 31 self.pretty_output_file = file(output_file_str + '.pretty', 'w+') 32 self.pretty_output_file_verbose = file(output_file_str + '_verbose.pretty', 'w+') 33 except IOError: 34 sys.stderr.write("ERROR: Cannot open files %s %s.\n" % (impl_dep_file_str, output_file_str)) 35 sys.exit(1) 36 self.verbose = verbose 37 self.pretty = pretty 38 self.syscall_read_fields = defaultdict(set) 39 self.syscall_write_fields = defaultdict(set) 40 self.implicit_dependencies = defaultdict(set) 41 self.verbose_impl_dep = defaultdict(list) 42 self.deref_counter = defaultdict(int) # count which struct->members are most common 43 44 for syscall,fields in hardcode_syscall_read_fields.iteritems(): 45 self.syscall_read_fields[syscall].update(set(fields)) 46 47 for syscall,fields in hardcode_syscall_write_fields.iteritems(): 48 self.syscall_write_fields[syscall].update(set(fields)) 49 50 def _sanitize_syscall(self, syscall): 51 for prefix in SYSCALL_PREFIXES: 52 if syscall.startswith(prefix): 53 return syscall[len(prefix):] 54 return syscall 55 56 def _deref_to_tuple(self, deref): 57 """ (struct a)->b ==> (a,b) """ 58 struct, member = deref.split('->') 59 struct = struct[1:-1] # strip parens 60 struct = struct.split(' ')[1] # drop struct keyword 61 return (struct, member) 62 63 def _split_field(self, field): 64 field = field.strip() 65 field = field[1: -1] # strip square brackets 66 derefs = [struct.strip() for struct in field.strip().split(',') if struct] 67 return map( 68 lambda deref: self._deref_to_tuple(deref), 69 derefs 70 ) 71 72 def _sanitize_line(self, line): 73 syscall_and_listtype, field = line.split(':') 74 syscall, list_type = syscall_and_listtype.split(' ') 75 syscall = self._sanitize_syscall(syscall) 76 derefs = self._split_field(field) 77 return syscall, list_type, derefs 78 79 def _add_fields(self, syscall, list_type, derefs): 80 if list_type == ListType.READ: 81 d = self.syscall_read_fields 82 elif list_type == ListType.WRITE: 83 d = self.syscall_write_fields 84 for deref in derefs: 85 if deref in GLOBAL_BLACKLIST: # ignore spammy structs 86 continue 87 d[syscall].add(deref) 88 89 def _construct_implicit_deps(self): 90 """ just do a naive O(n^2) loop to see intersections between write_list and read_list """ 91 for this_call,read_fields in self.syscall_read_fields.iteritems(): 92 for that_call,write_fields in self.syscall_write_fields.iteritems(): 93 if that_call == this_call: # calls are obviously dependent on themselves. ignore. 94 continue 95 intersection = read_fields & write_fields 96 if intersection: 97 self.implicit_dependencies[this_call].add(that_call) 98 if intersection and self.verbose: 99 self.verbose_impl_dep[this_call].append({ 100 'call': that_call, 101 'reason': intersection, 102 }) 103 for deref in intersection: 104 self.deref_counter[deref] += 1 105 106 def parse(self): 107 for line in self.impl_dep_file: 108 syscall, list_type, derefs = self._sanitize_line(line) 109 self._add_fields(syscall, list_type, derefs) 110 # pprint.pprint(dict(self.syscall_write_fields)) 111 # pprint.pprint(dict(self.syscall_read_fields)) 112 self._construct_implicit_deps() 113 # pprint.pprint(dict(self.implicit_dependencies)) 114 # pprint.pprint(dict(self.verbose_impl_dep)) 115 116 def _listify_verbose_reason(self, reason): 117 r = copy.deepcopy(reason) 118 r['reason'] = list(r['reason']) 119 r['reason'] = map( 120 lambda (struct,field): struct + '->' + field, 121 r['reason'] 122 ) 123 return r 124 125 def _get_json_dependencies(self): 126 implicit_dependencies = {} 127 verbose_impl_dep = {} 128 for call, dep_set in self.implicit_dependencies.iteritems(): 129 implicit_dependencies[call] = list(dep_set) 130 for call, call_reasons in self.verbose_impl_dep.iteritems(): 131 verbose_impl_dep[call] = map( 132 lambda reason: self._listify_verbose_reason(reason), 133 call_reasons, 134 ) 135 return implicit_dependencies, verbose_impl_dep 136 137 def write(self): 138 implicit_dependencies, verbose_impl_dep = self._get_json_dependencies() 139 json.dump(implicit_dependencies, self.output_file) 140 if self.verbose: 141 json.dump(verbose_impl_dep, self.output_file_verbose) 142 if self.pretty: 143 pprint.pprint(dict(self.implicit_dependencies), self.pretty_output_file) 144 pprint.pprint(dict(self.verbose_impl_dep), self.pretty_output_file_verbose) 145 for deref, count in sorted(self.deref_counter.iteritems(), key=lambda (k,v): (v,k)): 146 print "%s: %d" % (deref, count) 147 148 def close(self): 149 self.output_file.close() 150 self.impl_dep_file.close() 151 if self.verbose: 152 self.output_file_verbose.close() 153