xref: /linux/tools/workqueue/wq_monitor.py (revision 725e8ec59c56c65fb92e343c10a8842cd0d4f194)
1*725e8ec5STejun Heo#!/usr/bin/env drgn
2*725e8ec5STejun Heo#
3*725e8ec5STejun Heo# Copyright (C) 2023 Tejun Heo <tj@kernel.org>
4*725e8ec5STejun Heo# Copyright (C) 2023 Meta Platforms, Inc. and affiliates.
5*725e8ec5STejun Heo
6*725e8ec5STejun Heodesc = """
7*725e8ec5STejun HeoThis is a drgn script to monitor workqueues. For more info on drgn, visit
8*725e8ec5STejun Heohttps://github.com/osandov/drgn.
9*725e8ec5STejun Heo
10*725e8ec5STejun Heo  total    Total number of work items executed by the workqueue.
11*725e8ec5STejun Heo
12*725e8ec5STejun Heo  infl     The number of currently in-flight work items.
13*725e8ec5STejun Heo
14*725e8ec5STejun Heo  CMwake   The number of concurrency-management wake-ups while executing a
15*725e8ec5STejun Heo           work item of the workqueue.
16*725e8ec5STejun Heo
17*725e8ec5STejun Heo  mayday   The number of times the rescuer was requested while waiting for
18*725e8ec5STejun Heo           new worker creation.
19*725e8ec5STejun Heo
20*725e8ec5STejun Heo  rescued  The number of work items executed by the rescuer.
21*725e8ec5STejun Heo"""
22*725e8ec5STejun Heo
23*725e8ec5STejun Heoimport sys
24*725e8ec5STejun Heoimport signal
25*725e8ec5STejun Heoimport os
26*725e8ec5STejun Heoimport re
27*725e8ec5STejun Heoimport time
28*725e8ec5STejun Heoimport json
29*725e8ec5STejun Heo
30*725e8ec5STejun Heoimport drgn
31*725e8ec5STejun Heofrom drgn.helpers.linux.list import list_for_each_entry,list_empty
32*725e8ec5STejun Heofrom drgn.helpers.linux.cpumask import for_each_possible_cpu
33*725e8ec5STejun Heo
34*725e8ec5STejun Heoimport argparse
35*725e8ec5STejun Heoparser = argparse.ArgumentParser(description=desc,
36*725e8ec5STejun Heo                                 formatter_class=argparse.RawTextHelpFormatter)
37*725e8ec5STejun Heoparser.add_argument('workqueue', metavar='REGEX', nargs='*',
38*725e8ec5STejun Heo                    help='Target workqueue name patterns (all if empty)')
39*725e8ec5STejun Heoparser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1,
40*725e8ec5STejun Heo                    help='Monitoring interval (0 to print once and exit)')
41*725e8ec5STejun Heoparser.add_argument('-j', '--json', action='store_true',
42*725e8ec5STejun Heo                    help='Output in json')
43*725e8ec5STejun Heoargs = parser.parse_args()
44*725e8ec5STejun Heo
45*725e8ec5STejun Heodef err(s):
46*725e8ec5STejun Heo    print(s, file=sys.stderr, flush=True)
47*725e8ec5STejun Heo    sys.exit(1)
48*725e8ec5STejun Heo
49*725e8ec5STejun Heoworkqueues              = prog['workqueues']
50*725e8ec5STejun Heo
51*725e8ec5STejun HeoWQ_UNBOUND              = prog['WQ_UNBOUND']
52*725e8ec5STejun HeoWQ_MEM_RECLAIM          = prog['WQ_MEM_RECLAIM']
53*725e8ec5STejun Heo
54*725e8ec5STejun HeoPWQ_STAT_STARTED        = prog['PWQ_STAT_STARTED']      # work items started execution
55*725e8ec5STejun HeoPWQ_STAT_COMPLETED      = prog['PWQ_STAT_COMPLETED']	# work items completed execution
56*725e8ec5STejun HeoPWQ_STAT_CM_WAKEUP      = prog['PWQ_STAT_CM_WAKEUP']    # concurrency-management worker wakeups
57*725e8ec5STejun HeoPWQ_STAT_MAYDAY         = prog['PWQ_STAT_MAYDAY']	# maydays to rescuer
58*725e8ec5STejun HeoPWQ_STAT_RESCUED        = prog['PWQ_STAT_RESCUED']	# linked work items executed by rescuer
59*725e8ec5STejun HeoPWQ_NR_STATS            = prog['PWQ_NR_STATS']
60*725e8ec5STejun Heo
61*725e8ec5STejun Heoclass WqStats:
62*725e8ec5STejun Heo    def __init__(self, wq):
63*725e8ec5STejun Heo        self.name = wq.name.string_().decode()
64*725e8ec5STejun Heo        self.unbound = wq.flags & WQ_UNBOUND != 0
65*725e8ec5STejun Heo        self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0
66*725e8ec5STejun Heo        self.stats = [0] * PWQ_NR_STATS
67*725e8ec5STejun Heo        for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'):
68*725e8ec5STejun Heo            for i in range(PWQ_NR_STATS):
69*725e8ec5STejun Heo                self.stats[i] += int(pwq.stats[i])
70*725e8ec5STejun Heo
71*725e8ec5STejun Heo    def dict(self, now):
72*725e8ec5STejun Heo        return { 'timestamp'            : now,
73*725e8ec5STejun Heo                 'name'                 : self.name,
74*725e8ec5STejun Heo                 'unbound'              : self.unbound,
75*725e8ec5STejun Heo                 'mem_reclaim'          : self.mem_reclaim,
76*725e8ec5STejun Heo                 'started'              : self.stats[PWQ_STAT_STARTED],
77*725e8ec5STejun Heo                 'completed'            : self.stats[PWQ_STAT_COMPLETED],
78*725e8ec5STejun Heo                 'cm_wakeup'            : self.stats[PWQ_STAT_CM_WAKEUP],
79*725e8ec5STejun Heo                 'mayday'               : self.stats[PWQ_STAT_MAYDAY],
80*725e8ec5STejun Heo                 'rescued'              : self.stats[PWQ_STAT_RESCUED], }
81*725e8ec5STejun Heo
82*725e8ec5STejun Heo    def table_header_str():
83*725e8ec5STejun Heo        return f'{"":>24} {"total":>8} {"infl":>5} {"CMwake":>7} {"mayday":>7} {"rescued":>7}'
84*725e8ec5STejun Heo
85*725e8ec5STejun Heo    def table_row_str(self):
86*725e8ec5STejun Heo        cm_wakeup = '-'
87*725e8ec5STejun Heo        mayday = '-'
88*725e8ec5STejun Heo        rescued = '-'
89*725e8ec5STejun Heo
90*725e8ec5STejun Heo        if not self.unbound:
91*725e8ec5STejun Heo            cm_wakeup = str(self.stats[PWQ_STAT_CM_WAKEUP])
92*725e8ec5STejun Heo
93*725e8ec5STejun Heo        if self.mem_reclaim:
94*725e8ec5STejun Heo            mayday = str(self.stats[PWQ_STAT_MAYDAY])
95*725e8ec5STejun Heo            rescued = str(self.stats[PWQ_STAT_RESCUED])
96*725e8ec5STejun Heo
97*725e8ec5STejun Heo        out = f'{self.name[-24:]:24} ' \
98*725e8ec5STejun Heo              f'{self.stats[PWQ_STAT_STARTED]:8} ' \
99*725e8ec5STejun Heo              f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \
100*725e8ec5STejun Heo              f'{cm_wakeup:>7} ' \
101*725e8ec5STejun Heo              f'{mayday:>7} ' \
102*725e8ec5STejun Heo              f'{rescued:>7} '
103*725e8ec5STejun Heo        return out.rstrip(':')
104*725e8ec5STejun Heo
105*725e8ec5STejun Heoexit_req = False
106*725e8ec5STejun Heo
107*725e8ec5STejun Heodef sigint_handler(signr, frame):
108*725e8ec5STejun Heo    global exit_req
109*725e8ec5STejun Heo    exit_req = True
110*725e8ec5STejun Heo
111*725e8ec5STejun Heodef main():
112*725e8ec5STejun Heo    # handle args
113*725e8ec5STejun Heo    table_fmt = not args.json
114*725e8ec5STejun Heo    interval = args.interval
115*725e8ec5STejun Heo
116*725e8ec5STejun Heo    re_str = None
117*725e8ec5STejun Heo    if args.workqueue:
118*725e8ec5STejun Heo        for r in args.workqueue:
119*725e8ec5STejun Heo            if re_str is None:
120*725e8ec5STejun Heo                re_str = r
121*725e8ec5STejun Heo            else:
122*725e8ec5STejun Heo                re_str += '|' + r
123*725e8ec5STejun Heo
124*725e8ec5STejun Heo    filter_re = re.compile(re_str) if re_str else None
125*725e8ec5STejun Heo
126*725e8ec5STejun Heo    # monitoring loop
127*725e8ec5STejun Heo    signal.signal(signal.SIGINT, sigint_handler)
128*725e8ec5STejun Heo
129*725e8ec5STejun Heo    while not exit_req:
130*725e8ec5STejun Heo        now = time.time()
131*725e8ec5STejun Heo
132*725e8ec5STejun Heo        if table_fmt:
133*725e8ec5STejun Heo            print()
134*725e8ec5STejun Heo            print(WqStats.table_header_str())
135*725e8ec5STejun Heo
136*725e8ec5STejun Heo        for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'):
137*725e8ec5STejun Heo            stats = WqStats(wq)
138*725e8ec5STejun Heo            if filter_re and not filter_re.search(stats.name):
139*725e8ec5STejun Heo                continue
140*725e8ec5STejun Heo            if table_fmt:
141*725e8ec5STejun Heo                print(stats.table_row_str())
142*725e8ec5STejun Heo            else:
143*725e8ec5STejun Heo                print(stats.dict(now))
144*725e8ec5STejun Heo
145*725e8ec5STejun Heo        if interval == 0:
146*725e8ec5STejun Heo            break
147*725e8ec5STejun Heo        time.sleep(interval)
148*725e8ec5STejun Heo
149*725e8ec5STejun Heoif __name__ == "__main__":
150*725e8ec5STejun Heo    main()
151