xref: /illumos-gate/usr/src/tools/smatch/src/smatch_data/db/smdb.py (revision 44bf619d328827ce5eca6833fcd5c69f1592e578)
1#!/usr/bin/python
2
3# Copyright (C) 2013 Oracle.
4#
5# Licensed under the Open Software License version 1.1
6
7import sqlite3
8import sys
9import re
10
11try:
12    con = sqlite3.connect('smatch_db.sqlite')
13except sqlite3.Error, e:
14    print "Error %s:" % e.args[0]
15    sys.exit(1)
16
17def usage():
18    print "%s" %(sys.argv[0])
19    print "<function> - how a function is called"
20    print "info <type> - how a function is called, filtered by type"
21    print "return_states <function> - what a function returns"
22    print "call_tree <function> - show the call tree"
23    print "where <struct_type> <member> - where a struct member is set"
24    print "type_size <struct_type> <member> - how a struct member is allocated"
25    print "data_info <struct_type> <member> - information about a given data type"
26    print "function_ptr <function> - which function pointers point to this"
27    print "trace_param <function> <param> - trace where a parameter came from"
28    print "locals <file> - print the local values in a file."
29    sys.exit(1)
30
31function_ptrs = []
32searched_ptrs = []
33def get_function_pointers_helper(func):
34    cur = con.cursor()
35    cur.execute("select distinct ptr from function_ptr where function = '%s';" %(func))
36    for row in cur:
37        ptr = row[0]
38        if ptr in function_ptrs:
39            continue
40        function_ptrs.append(ptr)
41        if not ptr in searched_ptrs:
42            searched_ptrs.append(ptr)
43            get_function_pointers_helper(ptr)
44
45def get_function_pointers(func):
46    global function_ptrs
47    global searched_ptrs
48    function_ptrs = [func]
49    searched_ptrs = [func]
50    get_function_pointers_helper(func)
51    return function_ptrs
52
53db_types = {   0: "INTERNAL",
54             101: "PARAM_CLEARED",
55             103: "PARAM_LIMIT",
56             104: "PARAM_FILTER",
57            1001: "PARAM_VALUE",
58            1002: "BUF_SIZE",
59            1004: "CAPPED_DATA",
60            1005: "RETURN_VALUE",
61            1006: "DEREFERENCE",
62            1007: "RANGE_CAP",
63            1008: "LOCK_HELD",
64            1009: "LOCK_RELEASED",
65            1010: "ABSOLUTE_LIMITS",
66            1012: "PARAM_ADD",
67            1013: "PARAM_FREED",
68            1014: "DATA_SOURCE",
69            1015: "FUZZY_MAX",
70            1016: "STR_LEN",
71            1017: "ARRAY_LEN",
72            1018: "CAPABLE",
73            1019: "NS_CAPABLE",
74            1022: "TYPE_LINK",
75            1023: "UNTRACKED_PARAM",
76            1024: "CULL_PATH",
77            1025: "PARAM_SET",
78            1026: "PARAM_USED",
79            1027: "BYTE_UNITS",
80            1028: "COMPARE_LIMIT",
81            1029: "PARAM_COMPARE",
82            1030: "EXPECTS_TYPE",
83            1031: "CONSTRAINT",
84            1032: "PASSES_TYPE",
85            1033: "CONSTRAINT_REQUIRED",
86            1034: "BIT_INFO",
87            1035: "NOSPEC",
88            1036: "NOSPEC_WB",
89            1037: "STMT_CNT",
90            1038: "TERMINATED",
91            1039: "SLEEP",
92            1040: "NO_SLEEP_CNT",
93            1041: "SMALLISH",
94            1042: "FRESH_MTAG",
95
96            8017: "USER_DATA",
97            9017: "USER_DATA_SET",
98            8018: "NO_OVERFLOW",
99            8019: "NO_OVERFLOW_SIMPLE",
100            8020: "LOCKED",
101            8021: "UNLOCKED",
102            8023: "ATOMIC_INC",
103            8024: "ATOMIC_DEC",
104};
105
106def add_range(rl, min_val, max_val):
107    check_next = 0
108    done = 0
109    ret = []
110    idx = 0
111
112    if len(rl) == 0:
113        return [[min_val, max_val]]
114
115    for idx in range(len(rl)):
116        cur_min = rl[idx][0]
117        cur_max = rl[idx][1]
118
119        # we already merged the new range but we might need to change later
120        # ranges if they over lap with more than one
121        if check_next:
122            # join with added range
123            if max_val + 1 == cur_min:
124                ret[len(ret) - 1][1] = cur_max
125                done = 1
126                break
127            # don't overlap
128            if max_val < cur_min:
129                ret.append([cur_min, cur_max])
130                done = 1
131                break
132            # partially overlap
133            if max_val < cur_max:
134                ret[len(ret) - 1][1] = cur_max
135                done = 1
136                break
137            # completely overlap
138            continue
139
140        # join 2 ranges into one
141        if max_val + 1 == cur_min:
142            ret.append([min_val, cur_max])
143            done = 1
144            break
145        # range is entirely below
146        if max_val < cur_min:
147            ret.append([min_val, max_val])
148            ret.append([cur_min, cur_max])
149            done = 1
150            break
151        # range is partially below
152        if min_val < cur_min:
153            if max_val <= cur_max:
154                ret.append([min_val, cur_max])
155                done = 1
156                break
157            else:
158                ret.append([min_val, max_val])
159                check_next = 1
160                continue
161        # range already included
162        if max_val <= cur_max:
163            ret.append([cur_min, cur_max])
164            done = 1
165            break;
166        # range partially above
167        if min_val <= cur_max:
168            ret.append([cur_min, max_val])
169            check_next = 1
170            continue
171        # join 2 ranges on the other side
172        if min_val - 1 == cur_max:
173            ret.append([cur_min, max_val])
174            check_next = 1
175            continue
176        # range is above
177        ret.append([cur_min, cur_max])
178
179    if idx + 1 < len(rl):          # we hit a break statement
180        ret = ret + rl[idx + 1:]
181    elif done:                     # we hit a break on the last iteration
182        pass
183    elif not check_next:           # it's past the end of the rl
184        ret.append([min_val, max_val])
185
186    return ret;
187
188def rl_union(rl1, rl2):
189    ret = []
190    for r in rl1:
191        ret = add_range(ret, r[0], r[1])
192    for r in rl2:
193        ret = add_range(ret, r[0], r[1])
194
195    if (rl1 or rl2) and not ret:
196        print "bug: merging %s + %s gives empty" %(rl1, rl2)
197
198    return ret
199
200def txt_to_val(txt):
201    if txt == "s64min":
202        return -(2**63)
203    elif txt == "s32min":
204        return -(2**31)
205    elif txt == "s16min":
206        return -(2**15)
207    elif txt == "s64max":
208        return 2**63 - 1
209    elif txt == "s32max":
210        return 2**31 - 1
211    elif txt == "s16max":
212        return 2**15 - 1
213    elif txt == "u64max":
214        return 2**64 - 1
215    elif txt == "ptr_max":
216        return 2**64 - 1
217    elif txt == "u32max":
218        return 2**32 - 1
219    elif txt == "u16max":
220        return 2**16 - 1
221    else:
222        try:
223            return int(txt)
224        except ValueError:
225            return 0
226
227def val_to_txt(val):
228    if val == -(2**63):
229        return "s64min"
230    elif val == -(2**31):
231        return "s32min"
232    elif val == -(2**15):
233        return "s16min"
234    elif val == 2**63 - 1:
235        return "s64max"
236    elif val == 2**31 - 1:
237        return "s32max"
238    elif val == 2**15 - 1:
239        return "s16max"
240    elif val == 2**64 - 1:
241        return "u64max"
242    elif val == 2**32 - 1:
243        return "u32max"
244    elif val == 2**16 - 1:
245        return "u16max"
246    elif val < 0:
247        return "(%d)" %(val)
248    else:
249        return "%d" %(val)
250
251def get_next_str(txt):
252    val = ""
253    parsed = 0
254
255    if txt[0] == '(':
256        parsed += 1
257        for char in txt[1:]:
258            if char == ')':
259                break
260            parsed += 1
261        val = txt[1:parsed]
262        parsed += 1
263    elif txt[0] == 's' or txt[0] == 'u':
264        parsed += 6
265        val = txt[:parsed]
266    else:
267        if txt[0] == '-':
268            parsed += 1
269        for char in txt[parsed:]:
270            if char == '-':
271                break
272            parsed += 1
273        val = txt[:parsed]
274    return [parsed, val]
275
276def txt_to_rl(txt):
277    if len(txt) == 0:
278        return []
279
280    ret = []
281    pairs = txt.split(",")
282    for pair in pairs:
283        cnt, min_str = get_next_str(pair)
284        if cnt == len(pair):
285            max_str = min_str
286        else:
287            cnt, max_str = get_next_str(pair[cnt + 1:])
288        min_val = txt_to_val(min_str)
289        max_val = txt_to_val(max_str)
290        ret.append([min_val, max_val])
291
292#    Hm...  Smatch won't call INT_MAX s32max if the variable is unsigned.
293#    if txt != rl_to_txt(ret):
294#        print "bug: converting: text = %s rl = %s internal = %s" %(txt, rl_to_txt(ret), ret)
295
296    return ret
297
298def rl_to_txt(rl):
299    ret = ""
300    for idx in range(len(rl)):
301        cur_min = rl[idx][0]
302        cur_max = rl[idx][1]
303
304        if idx != 0:
305            ret += ","
306
307        if cur_min == cur_max:
308            ret += val_to_txt(cur_min)
309        else:
310            ret += val_to_txt(cur_min)
311            ret += "-"
312            ret += val_to_txt(cur_max)
313    return ret
314
315def type_to_str(type_int):
316
317    t = int(type_int)
318    if db_types.has_key(t):
319        return db_types[t]
320    return type_int
321
322def type_to_int(type_string):
323    for k in db_types.keys():
324        if db_types[k] == type_string:
325            return k
326    return -1
327
328def display_caller_info(printed, cur, param_names):
329    for txt in cur:
330        if not printed:
331            print "file | caller | function | type | parameter | key | value |"
332        printed = 1
333
334        parameter = int(txt[6])
335        key = txt[7]
336        if len(param_names) and parameter in param_names:
337            key = key.replace("$", param_names[parameter])
338
339        print "%20s | %20s | %20s |" %(txt[0], txt[1], txt[2]),
340        print " %10s |" %(type_to_str(txt[5])),
341        print " %d | %s | %s" %(parameter, key, txt[8])
342    return printed
343
344def get_caller_info(filename, ptrs, my_type):
345    cur = con.cursor()
346    param_names = get_param_names(filename, func)
347    printed = 0
348    type_filter = ""
349    if my_type != "":
350        type_filter = "and type = %d" %(type_to_int(my_type))
351    for ptr in ptrs:
352        cur.execute("select * from caller_info where function = '%s' %s;" %(ptr, type_filter))
353        printed = display_caller_info(printed, cur, param_names)
354
355def print_caller_info(filename, func, my_type = ""):
356    ptrs = get_function_pointers(func)
357    get_caller_info(filename, ptrs, my_type)
358
359def merge_values(param_names, vals, cur):
360    for txt in cur:
361        parameter = int(txt[0])
362        name = txt[1]
363        rl = txt_to_rl(txt[2])
364        if parameter in param_names:
365            name = name.replace("$", param_names[parameter])
366
367        if not parameter in vals:
368            vals[parameter] = {}
369
370        # the first item on the list is the number of rows.  it's incremented
371        # every time we call merge_values().
372        if name in vals[parameter]:
373            vals[parameter][name] = [vals[parameter][name][0] + 1, rl_union(vals[parameter][name][1], rl)]
374        else:
375            vals[parameter][name] = [1, rl]
376
377def get_param_names(filename, func):
378    cur = con.cursor()
379    param_names = {}
380    cur.execute("select parameter, value from parameter_name where file = '%s' and function = '%s';" %(filename, func))
381    for txt in cur:
382        parameter = int(txt[0])
383        name = txt[1]
384        param_names[parameter] = name
385    if len(param_names):
386        return param_names
387
388    cur.execute("select parameter, value from parameter_name where function = '%s';" %(func))
389    for txt in cur:
390        parameter = int(txt[0])
391        name = txt[1]
392        param_names[parameter] = name
393    return param_names
394
395def get_caller_count(ptrs):
396    cur = con.cursor()
397    count = 0
398    for ptr in ptrs:
399        cur.execute("select count(distinct(call_id)) from caller_info where function = '%s';" %(ptr))
400        for txt in cur:
401            count += int(txt[0])
402    return count
403
404def print_merged_caller_values(filename, func, ptrs, param_names, call_cnt):
405    cur = con.cursor()
406    vals = {}
407    for ptr in ptrs:
408        cur.execute("select parameter, key, value from caller_info where function = '%s' and type = %d;" %(ptr, type_to_int("PARAM_VALUE")))
409        merge_values(param_names, vals, cur);
410
411    for param in sorted(vals):
412        for name in sorted(vals[param]):
413            if vals[param][name][0] != call_cnt:
414                continue
415            print "%d %s -> %s" %(param, name, rl_to_txt(vals[param][name][1]))
416
417
418def print_unmerged_caller_values(filename, func, ptrs, param_names):
419    cur = con.cursor()
420    for ptr in ptrs:
421        prev = -1
422        cur.execute("select file, caller, call_id, parameter, key, value from caller_info where function = '%s' and type = %d;" %(ptr, type_to_int("PARAM_VALUE")))
423        for filename, caller, call_id, parameter, name, value in cur:
424            if prev != int(call_id):
425                prev = int(call_id)
426
427            parameter = int(parameter)
428            if parameter < len(param_names):
429                name = name.replace("$", param_names[parameter])
430            else:
431                name = name.replace("$", "$%d" %(parameter))
432
433            print "%s | %s | %s | %s" %(filename, caller, name, value)
434        print "=========================="
435
436def print_caller_values(filename, func, ptrs):
437    param_names = get_param_names(filename, func)
438    call_cnt = get_caller_count(ptrs)
439
440    print_merged_caller_values(filename, func, ptrs, param_names, call_cnt)
441    print "=========================="
442    print_unmerged_caller_values(filename, func, ptrs, param_names)
443
444def caller_info_values(filename, func):
445    ptrs = get_function_pointers(func)
446    print_caller_values(filename, func, ptrs)
447
448def print_return_states(func):
449    cur = con.cursor()
450    cur.execute("select * from return_states where function = '%s';" %(func))
451    count = 0
452    for txt in cur:
453        printed = 1
454        if count == 0:
455            print "file | function | return_id | return_value | type | param | key | value |"
456        count += 1
457        print "%s | %s | %2s | %13s" %(txt[0], txt[1], txt[3], txt[4]),
458        print "| %13s |" %(type_to_str(txt[6])),
459        print " %2d | %20s | %20s |" %(txt[7], txt[8], txt[9])
460
461def print_return_implies(func):
462    cur = con.cursor()
463    cur.execute("select * from return_implies where function = '%s';" %(func))
464    count = 0
465    for txt in cur:
466        if not count:
467            print "file | function | type | param | key | value |"
468        count += 1
469        print "%15s | %15s" %(txt[0], txt[1]),
470        print "| %15s" %(type_to_str(txt[4])),
471        print "| %3d | %s | %15s |" %(txt[5], txt[6], txt[7])
472
473def print_type_size(struct_type, member):
474    cur = con.cursor()
475    cur.execute("select * from type_size where type like '(struct %s)->%s';" %(struct_type, member))
476    print "type | size"
477    for txt in cur:
478        print "%-15s | %s" %(txt[0], txt[1])
479
480    cur.execute("select * from function_type_size where type like '(struct %s)->%s';" %(struct_type, member))
481    print "file | function | type | size"
482    for txt in cur:
483        print "%-15s | %-15s | %-15s | %s" %(txt[0], txt[1], txt[2], txt[3])
484
485def print_data_info(struct_type, member):
486    cur = con.cursor()
487    cur.execute("select * from data_info where data like '(struct %s)->%s';" %(struct_type, member))
488    print "file | data | type | value"
489    for txt in cur:
490        print "%-15s | %-15s | %-15s | %s" %(txt[0], txt[1], type_to_str(txt[2]), txt[3])
491
492def print_fn_ptrs(func):
493    ptrs = get_function_pointers(func)
494    if not ptrs:
495        return
496    print "%s = " %(func),
497    print(ptrs)
498
499def print_functions(member):
500    cur = con.cursor()
501    cur.execute("select * from function_ptr where ptr like '%%->%s';" %(member))
502    print "File | Pointer | Function | Static"
503    for txt in cur:
504        print "%-15s | %-15s | %-15s | %s" %(txt[0], txt[2], txt[1], txt[3])
505
506def get_callers(func):
507    ret = []
508    cur = con.cursor()
509    ptrs = get_function_pointers(func)
510    for ptr in ptrs:
511        cur.execute("select distinct caller from caller_info where function = '%s';" %(ptr))
512        for row in cur:
513            ret.append(row[0])
514    return ret
515
516printed_funcs = []
517def call_tree_helper(func, indent = 0):
518    global printed_funcs
519    if func in printed_funcs:
520        return
521    print "%s%s()" %(" " * indent, func)
522    if func == "too common":
523        return
524    if indent > 6:
525        return
526    printed_funcs.append(func)
527    callers = get_callers(func)
528    if len(callers) >= 20:
529        print "Over 20 callers for %s()" %(func)
530        return
531    for caller in callers:
532        call_tree_helper(caller, indent + 2)
533
534def print_call_tree(func):
535    global printed_funcs
536    printed_funcs = []
537    call_tree_helper(func)
538
539def function_type_value(struct_type, member):
540    cur = con.cursor()
541    cur.execute("select * from function_type_value where type like '(struct %s)->%s';" %(struct_type, member))
542    for txt in cur:
543        print "%-30s | %-30s | %s | %s" %(txt[0], txt[1], txt[2], txt[3])
544
545def trace_callers(func, param):
546    sources = []
547    prev_type = 0
548
549    cur = con.cursor()
550    ptrs = get_function_pointers(func)
551    for ptr in ptrs:
552        cur.execute("select type, caller, value from caller_info where function = '%s' and (type = 0 or type = 1014 or type = 1028) and (parameter = -1 or parameter = %d);" %(ptr, param))
553        for row in cur:
554            data_type = int(row[0])
555            if data_type == 1014:
556                sources.append((row[1], row[2]))
557            elif data_type == 1028:
558                sources.append(("%", row[2])) # hack...
559            elif data_type == 0 and prev_type == 0:
560                sources.append((row[1], ""))
561            prev_type = data_type
562    return sources
563
564def trace_param_helper(func, param, indent = 0):
565    global printed_funcs
566    if func in printed_funcs:
567        return
568    print "%s%s(param %d)" %(" " * indent, func, param)
569    if func == "too common":
570        return
571    if indent > 20:
572        return
573    printed_funcs.append(func)
574    sources = trace_callers(func, param)
575    for path in sources:
576
577        if len(path[1]) and path[1][0] == 'p' and path[1][1] == ' ':
578            p = int(path[1][2:])
579            trace_param_helper(path[0], p, indent + 2)
580        elif len(path[0]) and path[0][0] == '%':
581            print "  %s%s" %(" " * indent, path[1])
582        else:
583            print "* %s%s %s" %(" " * (indent - 1), path[0], path[1])
584
585def trace_param(func, param):
586    global printed_funcs
587    printed_funcs = []
588    print "tracing %s %d" %(func, param)
589    trace_param_helper(func, param)
590
591def print_locals(filename):
592    cur = con.cursor()
593    cur.execute("select file,data,value from data_info where file = '%s' and type = 8029 and value != 0;" %(filename))
594    for txt in cur:
595        print "%s | %s | %s" %(txt[0], txt[1], txt[2])
596
597def constraint(struct_type, member):
598    cur = con.cursor()
599    cur.execute("select * from constraints_required where data like '(struct %s)->%s' or bound like '(struct %s)->%s';" %(struct_type, member, struct_type, member))
600    for txt in cur:
601        print "%-30s | %-30s | %s | %s" %(txt[0], txt[1], txt[2], txt[3])
602
603if len(sys.argv) < 2:
604    usage()
605
606if len(sys.argv) == 2:
607    func = sys.argv[1]
608    print_caller_info("", func)
609elif sys.argv[1] == "info":
610    my_type = ""
611    if len(sys.argv) == 4:
612        my_type = sys.argv[3]
613    func = sys.argv[2]
614    print_caller_info("", func, my_type)
615elif sys.argv[1] == "call_info":
616    if len(sys.argv) != 4:
617        usage()
618    filename = sys.argv[2]
619    func = sys.argv[3]
620    caller_info_values(filename, func)
621    print_caller_info(filename, func)
622elif sys.argv[1] == "function_ptr" or sys.argv[1] == "fn_ptr":
623    func = sys.argv[2]
624    print_fn_ptrs(func)
625elif sys.argv[1] == "return_states":
626    func = sys.argv[2]
627    print_return_states(func)
628    print "================================================"
629    print_return_implies(func)
630elif sys.argv[1] == "return_implies":
631    func = sys.argv[2]
632    print_return_implies(func)
633elif sys.argv[1] == "type_size" or sys.argv[1] == "buf_size":
634    struct_type = sys.argv[2]
635    member = sys.argv[3]
636    print_type_size(struct_type, member)
637elif sys.argv[1] == "data_info":
638    struct_type = sys.argv[2]
639    member = sys.argv[3]
640    print_data_info(struct_type, member)
641elif sys.argv[1] == "call_tree":
642    func = sys.argv[2]
643    print_call_tree(func)
644elif sys.argv[1] == "where":
645    if len(sys.argv) == 3:
646        struct_type = "%"
647        member = sys.argv[2]
648    elif len(sys.argv) == 4:
649        struct_type = sys.argv[2]
650        member = sys.argv[3]
651    function_type_value(struct_type, member)
652elif sys.argv[1] == "local":
653    filename = sys.argv[2]
654    variable = ""
655    if len(sys.argv) == 4:
656        variable = sys.argv[3]
657    local_values(filename, variable)
658elif sys.argv[1] == "functions":
659    member = sys.argv[2]
660    print_functions(member)
661elif sys.argv[1] == "trace_param":
662    if len(sys.argv) != 4:
663        usage()
664    func = sys.argv[2]
665    param = int(sys.argv[3])
666    trace_param(func, param)
667elif sys.argv[1] == "locals":
668    if len(sys.argv) != 3:
669        usage()
670    filename = sys.argv[2]
671    print_locals(filename);
672elif sys.argv[1] == "constraint":
673    if len(sys.argv) == 3:
674        struct_type = "%"
675        member = sys.argv[2]
676    elif len(sys.argv) == 4:
677        struct_type = sys.argv[2]
678        member = sys.argv[3]
679    constraint(struct_type, member)
680elif sys.argv[1] == "test":
681    filename = sys.argv[2]
682    func = sys.argv[3]
683    caller_info_values(filename, func)
684else:
685    usage()
686