1725e8ec5STejun Heo#!/usr/bin/env drgn 2725e8ec5STejun Heo# 3725e8ec5STejun Heo# Copyright (C) 2023 Tejun Heo <tj@kernel.org> 4725e8ec5STejun Heo# Copyright (C) 2023 Meta Platforms, Inc. and affiliates. 5725e8ec5STejun Heo 6725e8ec5STejun Heodesc = """ 7725e8ec5STejun HeoThis is a drgn script to monitor workqueues. For more info on drgn, visit 8725e8ec5STejun Heohttps://github.com/osandov/drgn. 9725e8ec5STejun Heo 10725e8ec5STejun Heo total Total number of work items executed by the workqueue. 11725e8ec5STejun Heo 12725e8ec5STejun Heo infl The number of currently in-flight work items. 13725e8ec5STejun Heo 148a1dd1e5STejun Heo CPUtime Total CPU time consumed by the workqueue in seconds. This is 158a1dd1e5STejun Heo sampled from scheduler ticks and only provides ballpark 168a1dd1e5STejun Heo measurement. "nohz_full=" CPUs are excluded from measurement. 178a1dd1e5STejun Heo 18616db877STejun Heo CPUitsv The number of times a concurrency-managed work item hogged CPU 19616db877STejun Heo longer than the threshold (workqueue.cpu_intensive_thresh_us) 20616db877STejun Heo and got excluded from concurrency management to avoid stalling 21616db877STejun Heo other work items. 22616db877STejun Heo 238639ecebSTejun Heo CMW/RPR For per-cpu workqueues, the number of concurrency-management 248639ecebSTejun Heo wake-ups while executing a work item of the workqueue. For 258639ecebSTejun Heo unbound workqueues, the number of times a worker was repatriated 268639ecebSTejun Heo to its affinity scope after being migrated to an off-scope CPU by 278639ecebSTejun Heo the scheduler. 28725e8ec5STejun Heo 29725e8ec5STejun Heo mayday The number of times the rescuer was requested while waiting for 30725e8ec5STejun Heo new worker creation. 31725e8ec5STejun Heo 32725e8ec5STejun Heo rescued The number of work items executed by the rescuer. 33725e8ec5STejun Heo""" 34725e8ec5STejun Heo 35725e8ec5STejun Heoimport signal 36725e8ec5STejun Heoimport re 37725e8ec5STejun Heoimport time 38725e8ec5STejun Heoimport json 39725e8ec5STejun Heo 40725e8ec5STejun Heoimport drgn 41*8034b314SKemeng Shifrom drgn.helpers.linux.list import list_for_each_entry 42725e8ec5STejun Heo 43725e8ec5STejun Heoimport argparse 44725e8ec5STejun Heoparser = argparse.ArgumentParser(description=desc, 45725e8ec5STejun Heo formatter_class=argparse.RawTextHelpFormatter) 46725e8ec5STejun Heoparser.add_argument('workqueue', metavar='REGEX', nargs='*', 47725e8ec5STejun Heo help='Target workqueue name patterns (all if empty)') 48725e8ec5STejun Heoparser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1, 49725e8ec5STejun Heo help='Monitoring interval (0 to print once and exit)') 50725e8ec5STejun Heoparser.add_argument('-j', '--json', action='store_true', 51725e8ec5STejun Heo help='Output in json') 52725e8ec5STejun Heoargs = parser.parse_args() 53725e8ec5STejun Heo 54725e8ec5STejun Heoworkqueues = prog['workqueues'] 55725e8ec5STejun Heo 56725e8ec5STejun HeoWQ_UNBOUND = prog['WQ_UNBOUND'] 57725e8ec5STejun HeoWQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM'] 58725e8ec5STejun Heo 59725e8ec5STejun HeoPWQ_STAT_STARTED = prog['PWQ_STAT_STARTED'] # work items started execution 60725e8ec5STejun HeoPWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED'] # work items completed execution 618a1dd1e5STejun HeoPWQ_STAT_CPU_TIME = prog['PWQ_STAT_CPU_TIME'] # total CPU time consumed 62616db877STejun HeoPWQ_STAT_CPU_INTENSIVE = prog['PWQ_STAT_CPU_INTENSIVE'] # wq_cpu_intensive_thresh_us violations 63725e8ec5STejun HeoPWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP'] # concurrency-management worker wakeups 648639ecebSTejun HeoPWQ_STAT_REPATRIATED = prog['PWQ_STAT_REPATRIATED'] # unbound workers brought back into scope 65725e8ec5STejun HeoPWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY'] # maydays to rescuer 66725e8ec5STejun HeoPWQ_STAT_RESCUED = prog['PWQ_STAT_RESCUED'] # linked work items executed by rescuer 67725e8ec5STejun HeoPWQ_NR_STATS = prog['PWQ_NR_STATS'] 68725e8ec5STejun Heo 69725e8ec5STejun Heoclass WqStats: 70725e8ec5STejun Heo def __init__(self, wq): 71725e8ec5STejun Heo self.name = wq.name.string_().decode() 72725e8ec5STejun Heo self.unbound = wq.flags & WQ_UNBOUND != 0 73725e8ec5STejun Heo self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0 74725e8ec5STejun Heo self.stats = [0] * PWQ_NR_STATS 75725e8ec5STejun Heo for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'): 76725e8ec5STejun Heo for i in range(PWQ_NR_STATS): 77725e8ec5STejun Heo self.stats[i] += int(pwq.stats[i]) 78725e8ec5STejun Heo 79725e8ec5STejun Heo def dict(self, now): 80725e8ec5STejun Heo return { 'timestamp' : now, 81725e8ec5STejun Heo 'name' : self.name, 82725e8ec5STejun Heo 'unbound' : self.unbound, 83725e8ec5STejun Heo 'mem_reclaim' : self.mem_reclaim, 84725e8ec5STejun Heo 'started' : self.stats[PWQ_STAT_STARTED], 85725e8ec5STejun Heo 'completed' : self.stats[PWQ_STAT_COMPLETED], 868a1dd1e5STejun Heo 'cpu_time' : self.stats[PWQ_STAT_CPU_TIME], 87616db877STejun Heo 'cpu_intensive' : self.stats[PWQ_STAT_CPU_INTENSIVE], 88725e8ec5STejun Heo 'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP], 898639ecebSTejun Heo 'repatriated' : self.stats[PWQ_STAT_REPATRIATED], 90725e8ec5STejun Heo 'mayday' : self.stats[PWQ_STAT_MAYDAY], 91725e8ec5STejun Heo 'rescued' : self.stats[PWQ_STAT_RESCUED], } 92725e8ec5STejun Heo 93725e8ec5STejun Heo def table_header_str(): 948a1dd1e5STejun Heo return f'{"":>24} {"total":>8} {"infl":>5} {"CPUtime":>8} '\ 958639ecebSTejun Heo f'{"CPUitsv":>7} {"CMW/RPR":>7} {"mayday":>7} {"rescued":>7}' 96725e8ec5STejun Heo 97725e8ec5STejun Heo def table_row_str(self): 98616db877STejun Heo cpu_intensive = '-' 998639ecebSTejun Heo cmw_rpr = '-' 100725e8ec5STejun Heo mayday = '-' 101725e8ec5STejun Heo rescued = '-' 102725e8ec5STejun Heo 1038639ecebSTejun Heo if self.unbound: 1048639ecebSTejun Heo cmw_rpr = str(self.stats[PWQ_STAT_REPATRIATED]); 1058639ecebSTejun Heo else: 106616db877STejun Heo cpu_intensive = str(self.stats[PWQ_STAT_CPU_INTENSIVE]) 1078639ecebSTejun Heo cmw_rpr = str(self.stats[PWQ_STAT_CM_WAKEUP]) 108725e8ec5STejun Heo 109725e8ec5STejun Heo if self.mem_reclaim: 110725e8ec5STejun Heo mayday = str(self.stats[PWQ_STAT_MAYDAY]) 111725e8ec5STejun Heo rescued = str(self.stats[PWQ_STAT_RESCUED]) 112725e8ec5STejun Heo 113725e8ec5STejun Heo out = f'{self.name[-24:]:24} ' \ 114725e8ec5STejun Heo f'{self.stats[PWQ_STAT_STARTED]:8} ' \ 115725e8ec5STejun Heo f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \ 1168a1dd1e5STejun Heo f'{self.stats[PWQ_STAT_CPU_TIME] / 1000000:8.1f} ' \ 117616db877STejun Heo f'{cpu_intensive:>7} ' \ 1188639ecebSTejun Heo f'{cmw_rpr:>7} ' \ 119725e8ec5STejun Heo f'{mayday:>7} ' \ 120725e8ec5STejun Heo f'{rescued:>7} ' 121725e8ec5STejun Heo return out.rstrip(':') 122725e8ec5STejun Heo 123725e8ec5STejun Heoexit_req = False 124725e8ec5STejun Heo 125725e8ec5STejun Heodef sigint_handler(signr, frame): 126725e8ec5STejun Heo global exit_req 127725e8ec5STejun Heo exit_req = True 128725e8ec5STejun Heo 129725e8ec5STejun Heodef main(): 130725e8ec5STejun Heo # handle args 131725e8ec5STejun Heo table_fmt = not args.json 132725e8ec5STejun Heo interval = args.interval 133725e8ec5STejun Heo 134725e8ec5STejun Heo re_str = None 135725e8ec5STejun Heo if args.workqueue: 136725e8ec5STejun Heo for r in args.workqueue: 137725e8ec5STejun Heo if re_str is None: 138725e8ec5STejun Heo re_str = r 139725e8ec5STejun Heo else: 140725e8ec5STejun Heo re_str += '|' + r 141725e8ec5STejun Heo 142725e8ec5STejun Heo filter_re = re.compile(re_str) if re_str else None 143725e8ec5STejun Heo 144725e8ec5STejun Heo # monitoring loop 145725e8ec5STejun Heo signal.signal(signal.SIGINT, sigint_handler) 146725e8ec5STejun Heo 147725e8ec5STejun Heo while not exit_req: 148725e8ec5STejun Heo now = time.time() 149725e8ec5STejun Heo 150725e8ec5STejun Heo if table_fmt: 151725e8ec5STejun Heo print() 152725e8ec5STejun Heo print(WqStats.table_header_str()) 153725e8ec5STejun Heo 154725e8ec5STejun Heo for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'): 155725e8ec5STejun Heo stats = WqStats(wq) 156725e8ec5STejun Heo if filter_re and not filter_re.search(stats.name): 157725e8ec5STejun Heo continue 158725e8ec5STejun Heo if table_fmt: 159725e8ec5STejun Heo print(stats.table_row_str()) 160725e8ec5STejun Heo else: 161725e8ec5STejun Heo print(stats.dict(now)) 162725e8ec5STejun Heo 163725e8ec5STejun Heo if interval == 0: 164725e8ec5STejun Heo break 165725e8ec5STejun Heo time.sleep(interval) 166725e8ec5STejun Heo 167725e8ec5STejun Heoif __name__ == "__main__": 168725e8ec5STejun Heo main() 169