1*2444172cSRyan Roberts#!/usr/bin/env python3 2*2444172cSRyan Roberts# SPDX-License-Identifier: GPL-2.0-only 3*2444172cSRyan Roberts# Copyright (C) 2024 ARM Ltd. 4*2444172cSRyan Roberts# 5*2444172cSRyan Roberts# Utility providing smaps-like output detailing transparent hugepage usage. 6*2444172cSRyan Roberts# For more info, run: 7*2444172cSRyan Roberts# ./thpmaps --help 8*2444172cSRyan Roberts# 9*2444172cSRyan Roberts# Requires numpy: 10*2444172cSRyan Roberts# pip3 install numpy 11*2444172cSRyan Roberts 12*2444172cSRyan Roberts 13*2444172cSRyan Robertsimport argparse 14*2444172cSRyan Robertsimport collections 15*2444172cSRyan Robertsimport math 16*2444172cSRyan Robertsimport os 17*2444172cSRyan Robertsimport re 18*2444172cSRyan Robertsimport resource 19*2444172cSRyan Robertsimport shutil 20*2444172cSRyan Robertsimport sys 21*2444172cSRyan Robertsimport textwrap 22*2444172cSRyan Robertsimport time 23*2444172cSRyan Robertsimport numpy as np 24*2444172cSRyan Roberts 25*2444172cSRyan Roberts 26*2444172cSRyan Robertswith open('/sys/kernel/mm/transparent_hugepage/hpage_pmd_size') as f: 27*2444172cSRyan Roberts PAGE_SIZE = resource.getpagesize() 28*2444172cSRyan Roberts PAGE_SHIFT = int(math.log2(PAGE_SIZE)) 29*2444172cSRyan Roberts PMD_SIZE = int(f.read()) 30*2444172cSRyan Roberts PMD_ORDER = int(math.log2(PMD_SIZE / PAGE_SIZE)) 31*2444172cSRyan Roberts 32*2444172cSRyan Roberts 33*2444172cSRyan Robertsdef align_forward(v, a): 34*2444172cSRyan Roberts return (v + (a - 1)) & ~(a - 1) 35*2444172cSRyan Roberts 36*2444172cSRyan Roberts 37*2444172cSRyan Robertsdef align_offset(v, a): 38*2444172cSRyan Roberts return v & (a - 1) 39*2444172cSRyan Roberts 40*2444172cSRyan Roberts 41*2444172cSRyan Robertsdef kbnr(kb): 42*2444172cSRyan Roberts # Convert KB to number of pages. 43*2444172cSRyan Roberts return (kb << 10) >> PAGE_SHIFT 44*2444172cSRyan Roberts 45*2444172cSRyan Roberts 46*2444172cSRyan Robertsdef nrkb(nr): 47*2444172cSRyan Roberts # Convert number of pages to KB. 48*2444172cSRyan Roberts return (nr << PAGE_SHIFT) >> 10 49*2444172cSRyan Roberts 50*2444172cSRyan Roberts 51*2444172cSRyan Robertsdef odkb(order): 52*2444172cSRyan Roberts # Convert page order to KB. 53*2444172cSRyan Roberts return (PAGE_SIZE << order) >> 10 54*2444172cSRyan Roberts 55*2444172cSRyan Roberts 56*2444172cSRyan Robertsdef cont_ranges_all(search, index): 57*2444172cSRyan Roberts # Given a list of arrays, find the ranges for which values are monotonically 58*2444172cSRyan Roberts # incrementing in all arrays. all arrays in search and index must be the 59*2444172cSRyan Roberts # same size. 60*2444172cSRyan Roberts sz = len(search[0]) 61*2444172cSRyan Roberts r = np.full(sz, 2) 62*2444172cSRyan Roberts d = np.diff(search[0]) == 1 63*2444172cSRyan Roberts for dd in [np.diff(arr) == 1 for arr in search[1:]]: 64*2444172cSRyan Roberts d &= dd 65*2444172cSRyan Roberts r[1:] -= d 66*2444172cSRyan Roberts r[:-1] -= d 67*2444172cSRyan Roberts return [np.repeat(arr, r).reshape(-1, 2) for arr in index] 68*2444172cSRyan Roberts 69*2444172cSRyan Roberts 70*2444172cSRyan Robertsclass ArgException(Exception): 71*2444172cSRyan Roberts pass 72*2444172cSRyan Roberts 73*2444172cSRyan Roberts 74*2444172cSRyan Robertsclass FileIOException(Exception): 75*2444172cSRyan Roberts pass 76*2444172cSRyan Roberts 77*2444172cSRyan Roberts 78*2444172cSRyan Robertsclass BinArrayFile: 79*2444172cSRyan Roberts # Base class used to read /proc/<pid>/pagemap and /proc/kpageflags into a 80*2444172cSRyan Roberts # numpy array. Use inherrited class in a with clause to ensure file is 81*2444172cSRyan Roberts # closed when it goes out of scope. 82*2444172cSRyan Roberts def __init__(self, filename, element_size): 83*2444172cSRyan Roberts self.element_size = element_size 84*2444172cSRyan Roberts self.filename = filename 85*2444172cSRyan Roberts self.fd = os.open(self.filename, os.O_RDONLY) 86*2444172cSRyan Roberts 87*2444172cSRyan Roberts def cleanup(self): 88*2444172cSRyan Roberts os.close(self.fd) 89*2444172cSRyan Roberts 90*2444172cSRyan Roberts def __enter__(self): 91*2444172cSRyan Roberts return self 92*2444172cSRyan Roberts 93*2444172cSRyan Roberts def __exit__(self, exc_type, exc_val, exc_tb): 94*2444172cSRyan Roberts self.cleanup() 95*2444172cSRyan Roberts 96*2444172cSRyan Roberts def _readin(self, offset, buffer): 97*2444172cSRyan Roberts length = os.preadv(self.fd, (buffer,), offset) 98*2444172cSRyan Roberts if len(buffer) != length: 99*2444172cSRyan Roberts raise FileIOException('error: {} failed to read {} bytes at {:x}' 100*2444172cSRyan Roberts .format(self.filename, len(buffer), offset)) 101*2444172cSRyan Roberts 102*2444172cSRyan Roberts def _toarray(self, buf): 103*2444172cSRyan Roberts assert(self.element_size == 8) 104*2444172cSRyan Roberts return np.frombuffer(buf, dtype=np.uint64) 105*2444172cSRyan Roberts 106*2444172cSRyan Roberts def getv(self, vec): 107*2444172cSRyan Roberts vec *= self.element_size 108*2444172cSRyan Roberts offsets = vec[:, 0] 109*2444172cSRyan Roberts lengths = (np.diff(vec) + self.element_size).reshape(len(vec)) 110*2444172cSRyan Roberts buf = bytearray(int(np.sum(lengths))) 111*2444172cSRyan Roberts view = memoryview(buf) 112*2444172cSRyan Roberts pos = 0 113*2444172cSRyan Roberts for offset, length in zip(offsets, lengths): 114*2444172cSRyan Roberts offset = int(offset) 115*2444172cSRyan Roberts length = int(length) 116*2444172cSRyan Roberts self._readin(offset, view[pos:pos+length]) 117*2444172cSRyan Roberts pos += length 118*2444172cSRyan Roberts return self._toarray(buf) 119*2444172cSRyan Roberts 120*2444172cSRyan Roberts def get(self, index, nr=1): 121*2444172cSRyan Roberts offset = index * self.element_size 122*2444172cSRyan Roberts length = nr * self.element_size 123*2444172cSRyan Roberts buf = bytearray(length) 124*2444172cSRyan Roberts self._readin(offset, buf) 125*2444172cSRyan Roberts return self._toarray(buf) 126*2444172cSRyan Roberts 127*2444172cSRyan Roberts 128*2444172cSRyan RobertsPM_PAGE_PRESENT = 1 << 63 129*2444172cSRyan RobertsPM_PFN_MASK = (1 << 55) - 1 130*2444172cSRyan Roberts 131*2444172cSRyan Robertsclass PageMap(BinArrayFile): 132*2444172cSRyan Roberts # Read ranges of a given pid's pagemap into a numpy array. 133*2444172cSRyan Roberts def __init__(self, pid='self'): 134*2444172cSRyan Roberts super().__init__(f'/proc/{pid}/pagemap', 8) 135*2444172cSRyan Roberts 136*2444172cSRyan Roberts 137*2444172cSRyan RobertsKPF_ANON = 1 << 12 138*2444172cSRyan RobertsKPF_COMPOUND_HEAD = 1 << 15 139*2444172cSRyan RobertsKPF_COMPOUND_TAIL = 1 << 16 140*2444172cSRyan RobertsKPF_THP = 1 << 22 141*2444172cSRyan Roberts 142*2444172cSRyan Robertsclass KPageFlags(BinArrayFile): 143*2444172cSRyan Roberts # Read ranges of /proc/kpageflags into a numpy array. 144*2444172cSRyan Roberts def __init__(self): 145*2444172cSRyan Roberts super().__init__(f'/proc/kpageflags', 8) 146*2444172cSRyan Roberts 147*2444172cSRyan Roberts 148*2444172cSRyan Robertsvma_all_stats = set([ 149*2444172cSRyan Roberts "Size", 150*2444172cSRyan Roberts "Rss", 151*2444172cSRyan Roberts "Pss", 152*2444172cSRyan Roberts "Pss_Dirty", 153*2444172cSRyan Roberts "Shared_Clean", 154*2444172cSRyan Roberts "Shared_Dirty", 155*2444172cSRyan Roberts "Private_Clean", 156*2444172cSRyan Roberts "Private_Dirty", 157*2444172cSRyan Roberts "Referenced", 158*2444172cSRyan Roberts "Anonymous", 159*2444172cSRyan Roberts "KSM", 160*2444172cSRyan Roberts "LazyFree", 161*2444172cSRyan Roberts "AnonHugePages", 162*2444172cSRyan Roberts "ShmemPmdMapped", 163*2444172cSRyan Roberts "FilePmdMapped", 164*2444172cSRyan Roberts "Shared_Hugetlb", 165*2444172cSRyan Roberts "Private_Hugetlb", 166*2444172cSRyan Roberts "Swap", 167*2444172cSRyan Roberts "SwapPss", 168*2444172cSRyan Roberts "Locked", 169*2444172cSRyan Roberts]) 170*2444172cSRyan Roberts 171*2444172cSRyan Robertsvma_min_stats = set([ 172*2444172cSRyan Roberts "Rss", 173*2444172cSRyan Roberts "Anonymous", 174*2444172cSRyan Roberts "AnonHugePages", 175*2444172cSRyan Roberts "ShmemPmdMapped", 176*2444172cSRyan Roberts "FilePmdMapped", 177*2444172cSRyan Roberts]) 178*2444172cSRyan Roberts 179*2444172cSRyan RobertsVMA = collections.namedtuple('VMA', [ 180*2444172cSRyan Roberts 'name', 181*2444172cSRyan Roberts 'start', 182*2444172cSRyan Roberts 'end', 183*2444172cSRyan Roberts 'read', 184*2444172cSRyan Roberts 'write', 185*2444172cSRyan Roberts 'execute', 186*2444172cSRyan Roberts 'private', 187*2444172cSRyan Roberts 'pgoff', 188*2444172cSRyan Roberts 'major', 189*2444172cSRyan Roberts 'minor', 190*2444172cSRyan Roberts 'inode', 191*2444172cSRyan Roberts 'stats', 192*2444172cSRyan Roberts]) 193*2444172cSRyan Roberts 194*2444172cSRyan Robertsclass VMAList: 195*2444172cSRyan Roberts # A container for VMAs, parsed from /proc/<pid>/smaps. Iterate over the 196*2444172cSRyan Roberts # instance to receive VMAs. 197*2444172cSRyan Roberts def __init__(self, pid='self', stats=[]): 198*2444172cSRyan Roberts self.vmas = [] 199*2444172cSRyan Roberts with open(f'/proc/{pid}/smaps', 'r') as file: 200*2444172cSRyan Roberts for line in file: 201*2444172cSRyan Roberts elements = line.split() 202*2444172cSRyan Roberts if '-' in elements[0]: 203*2444172cSRyan Roberts start, end = map(lambda x: int(x, 16), elements[0].split('-')) 204*2444172cSRyan Roberts major, minor = map(lambda x: int(x, 16), elements[3].split(':')) 205*2444172cSRyan Roberts self.vmas.append(VMA( 206*2444172cSRyan Roberts name=elements[5] if len(elements) == 6 else '', 207*2444172cSRyan Roberts start=start, 208*2444172cSRyan Roberts end=end, 209*2444172cSRyan Roberts read=elements[1][0] == 'r', 210*2444172cSRyan Roberts write=elements[1][1] == 'w', 211*2444172cSRyan Roberts execute=elements[1][2] == 'x', 212*2444172cSRyan Roberts private=elements[1][3] == 'p', 213*2444172cSRyan Roberts pgoff=int(elements[2], 16), 214*2444172cSRyan Roberts major=major, 215*2444172cSRyan Roberts minor=minor, 216*2444172cSRyan Roberts inode=int(elements[4], 16), 217*2444172cSRyan Roberts stats={}, 218*2444172cSRyan Roberts )) 219*2444172cSRyan Roberts else: 220*2444172cSRyan Roberts param = elements[0][:-1] 221*2444172cSRyan Roberts if param in stats: 222*2444172cSRyan Roberts value = int(elements[1]) 223*2444172cSRyan Roberts self.vmas[-1].stats[param] = {'type': None, 'value': value} 224*2444172cSRyan Roberts 225*2444172cSRyan Roberts def __iter__(self): 226*2444172cSRyan Roberts yield from self.vmas 227*2444172cSRyan Roberts 228*2444172cSRyan Roberts 229*2444172cSRyan Robertsdef thp_parse(vma, kpageflags, ranges, indexes, vfns, pfns, anons, heads): 230*2444172cSRyan Roberts # Given 4 same-sized arrays representing a range within a page table backed 231*2444172cSRyan Roberts # by THPs (vfns: virtual frame numbers, pfns: physical frame numbers, anons: 232*2444172cSRyan Roberts # True if page is anonymous, heads: True if page is head of a THP), return a 233*2444172cSRyan Roberts # dictionary of statistics describing the mapped THPs. 234*2444172cSRyan Roberts stats = { 235*2444172cSRyan Roberts 'file': { 236*2444172cSRyan Roberts 'partial': 0, 237*2444172cSRyan Roberts 'aligned': [0] * (PMD_ORDER + 1), 238*2444172cSRyan Roberts 'unaligned': [0] * (PMD_ORDER + 1), 239*2444172cSRyan Roberts }, 240*2444172cSRyan Roberts 'anon': { 241*2444172cSRyan Roberts 'partial': 0, 242*2444172cSRyan Roberts 'aligned': [0] * (PMD_ORDER + 1), 243*2444172cSRyan Roberts 'unaligned': [0] * (PMD_ORDER + 1), 244*2444172cSRyan Roberts }, 245*2444172cSRyan Roberts } 246*2444172cSRyan Roberts 247*2444172cSRyan Roberts for rindex, rpfn in zip(ranges[0], ranges[2]): 248*2444172cSRyan Roberts index_next = int(rindex[0]) 249*2444172cSRyan Roberts index_end = int(rindex[1]) + 1 250*2444172cSRyan Roberts pfn_end = int(rpfn[1]) + 1 251*2444172cSRyan Roberts 252*2444172cSRyan Roberts folios = indexes[index_next:index_end][heads[index_next:index_end]] 253*2444172cSRyan Roberts 254*2444172cSRyan Roberts # Account pages for any partially mapped THP at the front. In that case, 255*2444172cSRyan Roberts # the first page of the range is a tail. 256*2444172cSRyan Roberts nr = (int(folios[0]) if len(folios) else index_end) - index_next 257*2444172cSRyan Roberts stats['anon' if anons[index_next] else 'file']['partial'] += nr 258*2444172cSRyan Roberts 259*2444172cSRyan Roberts # Account pages for any partially mapped THP at the back. In that case, 260*2444172cSRyan Roberts # the next page after the range is a tail. 261*2444172cSRyan Roberts if len(folios): 262*2444172cSRyan Roberts flags = int(kpageflags.get(pfn_end)[0]) 263*2444172cSRyan Roberts if flags & KPF_COMPOUND_TAIL: 264*2444172cSRyan Roberts nr = index_end - int(folios[-1]) 265*2444172cSRyan Roberts folios = folios[:-1] 266*2444172cSRyan Roberts index_end -= nr 267*2444172cSRyan Roberts stats['anon' if anons[index_end - 1] else 'file']['partial'] += nr 268*2444172cSRyan Roberts 269*2444172cSRyan Roberts # Account fully mapped THPs in the middle of the range. 270*2444172cSRyan Roberts if len(folios): 271*2444172cSRyan Roberts folio_nrs = np.append(np.diff(folios), np.uint64(index_end - folios[-1])) 272*2444172cSRyan Roberts folio_orders = np.log2(folio_nrs).astype(np.uint64) 273*2444172cSRyan Roberts for index, order in zip(folios, folio_orders): 274*2444172cSRyan Roberts index = int(index) 275*2444172cSRyan Roberts order = int(order) 276*2444172cSRyan Roberts nr = 1 << order 277*2444172cSRyan Roberts vfn = int(vfns[index]) 278*2444172cSRyan Roberts align = 'aligned' if align_forward(vfn, nr) == vfn else 'unaligned' 279*2444172cSRyan Roberts anon = 'anon' if anons[index] else 'file' 280*2444172cSRyan Roberts stats[anon][align][order] += nr 281*2444172cSRyan Roberts 282*2444172cSRyan Roberts # Account PMD-mapped THPs spearately, so filter out of the stats. There is a 283*2444172cSRyan Roberts # race between acquiring the smaps stats and reading pagemap, where memory 284*2444172cSRyan Roberts # could be deallocated. So clamp to zero incase it would have gone negative. 285*2444172cSRyan Roberts anon_pmd_mapped = vma.stats['AnonHugePages']['value'] 286*2444172cSRyan Roberts file_pmd_mapped = vma.stats['ShmemPmdMapped']['value'] + \ 287*2444172cSRyan Roberts vma.stats['FilePmdMapped']['value'] 288*2444172cSRyan Roberts stats['anon']['aligned'][PMD_ORDER] = max(0, stats['anon']['aligned'][PMD_ORDER] - kbnr(anon_pmd_mapped)) 289*2444172cSRyan Roberts stats['file']['aligned'][PMD_ORDER] = max(0, stats['file']['aligned'][PMD_ORDER] - kbnr(file_pmd_mapped)) 290*2444172cSRyan Roberts 291*2444172cSRyan Roberts rstats = { 292*2444172cSRyan Roberts f"anon-thp-pmd-aligned-{odkb(PMD_ORDER)}kB": {'type': 'anon', 'value': anon_pmd_mapped}, 293*2444172cSRyan Roberts f"file-thp-pmd-aligned-{odkb(PMD_ORDER)}kB": {'type': 'file', 'value': file_pmd_mapped}, 294*2444172cSRyan Roberts } 295*2444172cSRyan Roberts 296*2444172cSRyan Roberts def flatten_sub(type, subtype, stats): 297*2444172cSRyan Roberts param = f"{type}-thp-pte-{subtype}-{{}}kB" 298*2444172cSRyan Roberts for od, nr in enumerate(stats[2:], 2): 299*2444172cSRyan Roberts rstats[param.format(odkb(od))] = {'type': type, 'value': nrkb(nr)} 300*2444172cSRyan Roberts 301*2444172cSRyan Roberts def flatten_type(type, stats): 302*2444172cSRyan Roberts flatten_sub(type, 'aligned', stats['aligned']) 303*2444172cSRyan Roberts flatten_sub(type, 'unaligned', stats['unaligned']) 304*2444172cSRyan Roberts rstats[f"{type}-thp-pte-partial"] = {'type': type, 'value': nrkb(stats['partial'])} 305*2444172cSRyan Roberts 306*2444172cSRyan Roberts flatten_type('anon', stats['anon']) 307*2444172cSRyan Roberts flatten_type('file', stats['file']) 308*2444172cSRyan Roberts 309*2444172cSRyan Roberts return rstats 310*2444172cSRyan Roberts 311*2444172cSRyan Roberts 312*2444172cSRyan Robertsdef cont_parse(vma, order, ranges, anons, heads): 313*2444172cSRyan Roberts # Given 4 same-sized arrays representing a range within a page table backed 314*2444172cSRyan Roberts # by THPs (vfns: virtual frame numbers, pfns: physical frame numbers, anons: 315*2444172cSRyan Roberts # True if page is anonymous, heads: True if page is head of a THP), return a 316*2444172cSRyan Roberts # dictionary of statistics describing the contiguous blocks. 317*2444172cSRyan Roberts nr_cont = 1 << order 318*2444172cSRyan Roberts nr_anon = 0 319*2444172cSRyan Roberts nr_file = 0 320*2444172cSRyan Roberts 321*2444172cSRyan Roberts for rindex, rvfn, rpfn in zip(*ranges): 322*2444172cSRyan Roberts index_next = int(rindex[0]) 323*2444172cSRyan Roberts index_end = int(rindex[1]) + 1 324*2444172cSRyan Roberts vfn_start = int(rvfn[0]) 325*2444172cSRyan Roberts pfn_start = int(rpfn[0]) 326*2444172cSRyan Roberts 327*2444172cSRyan Roberts if align_offset(pfn_start, nr_cont) != align_offset(vfn_start, nr_cont): 328*2444172cSRyan Roberts continue 329*2444172cSRyan Roberts 330*2444172cSRyan Roberts off = align_forward(vfn_start, nr_cont) - vfn_start 331*2444172cSRyan Roberts index_next += off 332*2444172cSRyan Roberts 333*2444172cSRyan Roberts while index_next + nr_cont <= index_end: 334*2444172cSRyan Roberts folio_boundary = heads[index_next+1:index_next+nr_cont].any() 335*2444172cSRyan Roberts if not folio_boundary: 336*2444172cSRyan Roberts if anons[index_next]: 337*2444172cSRyan Roberts nr_anon += nr_cont 338*2444172cSRyan Roberts else: 339*2444172cSRyan Roberts nr_file += nr_cont 340*2444172cSRyan Roberts index_next += nr_cont 341*2444172cSRyan Roberts 342*2444172cSRyan Roberts # Account blocks that are PMD-mapped spearately, so filter out of the stats. 343*2444172cSRyan Roberts # There is a race between acquiring the smaps stats and reading pagemap, 344*2444172cSRyan Roberts # where memory could be deallocated. So clamp to zero incase it would have 345*2444172cSRyan Roberts # gone negative. 346*2444172cSRyan Roberts anon_pmd_mapped = vma.stats['AnonHugePages']['value'] 347*2444172cSRyan Roberts file_pmd_mapped = vma.stats['ShmemPmdMapped']['value'] + \ 348*2444172cSRyan Roberts vma.stats['FilePmdMapped']['value'] 349*2444172cSRyan Roberts nr_anon = max(0, nr_anon - kbnr(anon_pmd_mapped)) 350*2444172cSRyan Roberts nr_file = max(0, nr_file - kbnr(file_pmd_mapped)) 351*2444172cSRyan Roberts 352*2444172cSRyan Roberts rstats = { 353*2444172cSRyan Roberts f"anon-cont-pmd-aligned-{nrkb(nr_cont)}kB": {'type': 'anon', 'value': anon_pmd_mapped}, 354*2444172cSRyan Roberts f"file-cont-pmd-aligned-{nrkb(nr_cont)}kB": {'type': 'file', 'value': file_pmd_mapped}, 355*2444172cSRyan Roberts } 356*2444172cSRyan Roberts 357*2444172cSRyan Roberts rstats[f"anon-cont-pte-aligned-{nrkb(nr_cont)}kB"] = {'type': 'anon', 'value': nrkb(nr_anon)} 358*2444172cSRyan Roberts rstats[f"file-cont-pte-aligned-{nrkb(nr_cont)}kB"] = {'type': 'file', 'value': nrkb(nr_file)} 359*2444172cSRyan Roberts 360*2444172cSRyan Roberts return rstats 361*2444172cSRyan Roberts 362*2444172cSRyan Roberts 363*2444172cSRyan Robertsdef vma_print(vma, pid): 364*2444172cSRyan Roberts # Prints a VMA instance in a format similar to smaps. The main difference is 365*2444172cSRyan Roberts # that the pid is included as the first value. 366*2444172cSRyan Roberts print("{:010d}: {:016x}-{:016x} {}{}{}{} {:08x} {:02x}:{:02x} {:08x} {}" 367*2444172cSRyan Roberts .format( 368*2444172cSRyan Roberts pid, vma.start, vma.end, 369*2444172cSRyan Roberts 'r' if vma.read else '-', 'w' if vma.write else '-', 370*2444172cSRyan Roberts 'x' if vma.execute else '-', 'p' if vma.private else 's', 371*2444172cSRyan Roberts vma.pgoff, vma.major, vma.minor, vma.inode, vma.name 372*2444172cSRyan Roberts )) 373*2444172cSRyan Roberts 374*2444172cSRyan Roberts 375*2444172cSRyan Robertsdef stats_print(stats, tot_anon, tot_file, inc_empty): 376*2444172cSRyan Roberts # Print a statistics dictionary. 377*2444172cSRyan Roberts label_field = 32 378*2444172cSRyan Roberts for label, stat in stats.items(): 379*2444172cSRyan Roberts type = stat['type'] 380*2444172cSRyan Roberts value = stat['value'] 381*2444172cSRyan Roberts if value or inc_empty: 382*2444172cSRyan Roberts pad = max(0, label_field - len(label) - 1) 383*2444172cSRyan Roberts if type == 'anon' and tot_anon > 0: 384*2444172cSRyan Roberts percent = f' ({value / tot_anon:3.0%})' 385*2444172cSRyan Roberts elif type == 'file' and tot_file > 0: 386*2444172cSRyan Roberts percent = f' ({value / tot_file:3.0%})' 387*2444172cSRyan Roberts else: 388*2444172cSRyan Roberts percent = '' 389*2444172cSRyan Roberts print(f"{label}:{' ' * pad}{value:8} kB{percent}") 390*2444172cSRyan Roberts 391*2444172cSRyan Roberts 392*2444172cSRyan Robertsdef vma_parse(vma, pagemap, kpageflags, contorders): 393*2444172cSRyan Roberts # Generate thp and cont statistics for a single VMA. 394*2444172cSRyan Roberts start = vma.start >> PAGE_SHIFT 395*2444172cSRyan Roberts end = vma.end >> PAGE_SHIFT 396*2444172cSRyan Roberts 397*2444172cSRyan Roberts pmes = pagemap.get(start, end - start) 398*2444172cSRyan Roberts present = pmes & PM_PAGE_PRESENT != 0 399*2444172cSRyan Roberts pfns = pmes & PM_PFN_MASK 400*2444172cSRyan Roberts pfns = pfns[present] 401*2444172cSRyan Roberts vfns = np.arange(start, end, dtype=np.uint64) 402*2444172cSRyan Roberts vfns = vfns[present] 403*2444172cSRyan Roberts 404*2444172cSRyan Roberts pfn_vec = cont_ranges_all([pfns], [pfns])[0] 405*2444172cSRyan Roberts flags = kpageflags.getv(pfn_vec) 406*2444172cSRyan Roberts anons = flags & KPF_ANON != 0 407*2444172cSRyan Roberts heads = flags & KPF_COMPOUND_HEAD != 0 408*2444172cSRyan Roberts thps = flags & KPF_THP != 0 409*2444172cSRyan Roberts 410*2444172cSRyan Roberts vfns = vfns[thps] 411*2444172cSRyan Roberts pfns = pfns[thps] 412*2444172cSRyan Roberts anons = anons[thps] 413*2444172cSRyan Roberts heads = heads[thps] 414*2444172cSRyan Roberts 415*2444172cSRyan Roberts indexes = np.arange(len(vfns), dtype=np.uint64) 416*2444172cSRyan Roberts ranges = cont_ranges_all([vfns, pfns], [indexes, vfns, pfns]) 417*2444172cSRyan Roberts 418*2444172cSRyan Roberts thpstats = thp_parse(vma, kpageflags, ranges, indexes, vfns, pfns, anons, heads) 419*2444172cSRyan Roberts contstats = [cont_parse(vma, order, ranges, anons, heads) for order in contorders] 420*2444172cSRyan Roberts 421*2444172cSRyan Roberts tot_anon = vma.stats['Anonymous']['value'] 422*2444172cSRyan Roberts tot_file = vma.stats['Rss']['value'] - tot_anon 423*2444172cSRyan Roberts 424*2444172cSRyan Roberts return { 425*2444172cSRyan Roberts **thpstats, 426*2444172cSRyan Roberts **{k: v for s in contstats for k, v in s.items()} 427*2444172cSRyan Roberts }, tot_anon, tot_file 428*2444172cSRyan Roberts 429*2444172cSRyan Roberts 430*2444172cSRyan Robertsdef do_main(args): 431*2444172cSRyan Roberts pids = set() 432*2444172cSRyan Roberts rollup = {} 433*2444172cSRyan Roberts rollup_anon = 0 434*2444172cSRyan Roberts rollup_file = 0 435*2444172cSRyan Roberts 436*2444172cSRyan Roberts if args.cgroup: 437*2444172cSRyan Roberts strict = False 438*2444172cSRyan Roberts for walk_info in os.walk(args.cgroup): 439*2444172cSRyan Roberts cgroup = walk_info[0] 440*2444172cSRyan Roberts with open(f'{cgroup}/cgroup.procs') as pidfile: 441*2444172cSRyan Roberts for line in pidfile.readlines(): 442*2444172cSRyan Roberts pids.add(int(line.strip())) 443*2444172cSRyan Roberts elif args.pid: 444*2444172cSRyan Roberts strict = True 445*2444172cSRyan Roberts pids = pids.union(args.pid) 446*2444172cSRyan Roberts else: 447*2444172cSRyan Roberts strict = False 448*2444172cSRyan Roberts for pid in os.listdir('/proc'): 449*2444172cSRyan Roberts if pid.isdigit(): 450*2444172cSRyan Roberts pids.add(int(pid)) 451*2444172cSRyan Roberts 452*2444172cSRyan Roberts if not args.rollup: 453*2444172cSRyan Roberts print(" PID START END PROT OFFSET DEV INODE OBJECT") 454*2444172cSRyan Roberts 455*2444172cSRyan Roberts for pid in pids: 456*2444172cSRyan Roberts try: 457*2444172cSRyan Roberts with PageMap(pid) as pagemap: 458*2444172cSRyan Roberts with KPageFlags() as kpageflags: 459*2444172cSRyan Roberts for vma in VMAList(pid, vma_all_stats if args.inc_smaps else vma_min_stats): 460*2444172cSRyan Roberts if (vma.read or vma.write or vma.execute) and vma.stats['Rss']['value'] > 0: 461*2444172cSRyan Roberts stats, vma_anon, vma_file = vma_parse(vma, pagemap, kpageflags, args.cont) 462*2444172cSRyan Roberts else: 463*2444172cSRyan Roberts stats = {} 464*2444172cSRyan Roberts vma_anon = 0 465*2444172cSRyan Roberts vma_file = 0 466*2444172cSRyan Roberts if args.inc_smaps: 467*2444172cSRyan Roberts stats = {**vma.stats, **stats} 468*2444172cSRyan Roberts if args.rollup: 469*2444172cSRyan Roberts for k, v in stats.items(): 470*2444172cSRyan Roberts if k in rollup: 471*2444172cSRyan Roberts assert(rollup[k]['type'] == v['type']) 472*2444172cSRyan Roberts rollup[k]['value'] += v['value'] 473*2444172cSRyan Roberts else: 474*2444172cSRyan Roberts rollup[k] = v 475*2444172cSRyan Roberts rollup_anon += vma_anon 476*2444172cSRyan Roberts rollup_file += vma_file 477*2444172cSRyan Roberts else: 478*2444172cSRyan Roberts vma_print(vma, pid) 479*2444172cSRyan Roberts stats_print(stats, vma_anon, vma_file, args.inc_empty) 480*2444172cSRyan Roberts except (FileNotFoundError, ProcessLookupError, FileIOException): 481*2444172cSRyan Roberts if strict: 482*2444172cSRyan Roberts raise 483*2444172cSRyan Roberts 484*2444172cSRyan Roberts if args.rollup: 485*2444172cSRyan Roberts stats_print(rollup, rollup_anon, rollup_file, args.inc_empty) 486*2444172cSRyan Roberts 487*2444172cSRyan Roberts 488*2444172cSRyan Robertsdef main(): 489*2444172cSRyan Roberts docs_width = shutil.get_terminal_size().columns 490*2444172cSRyan Roberts docs_width -= 2 491*2444172cSRyan Roberts docs_width = min(80, docs_width) 492*2444172cSRyan Roberts 493*2444172cSRyan Roberts def format(string): 494*2444172cSRyan Roberts text = re.sub(r'\s+', ' ', string) 495*2444172cSRyan Roberts text = re.sub(r'\s*\\n\s*', '\n', text) 496*2444172cSRyan Roberts paras = text.split('\n') 497*2444172cSRyan Roberts paras = [textwrap.fill(p, width=docs_width) for p in paras] 498*2444172cSRyan Roberts return '\n'.join(paras) 499*2444172cSRyan Roberts 500*2444172cSRyan Roberts def formatter(prog): 501*2444172cSRyan Roberts return argparse.RawDescriptionHelpFormatter(prog, width=docs_width) 502*2444172cSRyan Roberts 503*2444172cSRyan Roberts def size2order(human): 504*2444172cSRyan Roberts units = { 505*2444172cSRyan Roberts "K": 2**10, "M": 2**20, "G": 2**30, 506*2444172cSRyan Roberts "k": 2**10, "m": 2**20, "g": 2**30, 507*2444172cSRyan Roberts } 508*2444172cSRyan Roberts unit = 1 509*2444172cSRyan Roberts if human[-1] in units: 510*2444172cSRyan Roberts unit = units[human[-1]] 511*2444172cSRyan Roberts human = human[:-1] 512*2444172cSRyan Roberts try: 513*2444172cSRyan Roberts size = int(human) 514*2444172cSRyan Roberts except ValueError: 515*2444172cSRyan Roberts raise ArgException('error: --cont value must be integer size with optional KMG unit') 516*2444172cSRyan Roberts size *= unit 517*2444172cSRyan Roberts order = int(math.log2(size / PAGE_SIZE)) 518*2444172cSRyan Roberts if order < 1: 519*2444172cSRyan Roberts raise ArgException('error: --cont value must be size of at least 2 pages') 520*2444172cSRyan Roberts if (1 << order) * PAGE_SIZE != size: 521*2444172cSRyan Roberts raise ArgException('error: --cont value must be size of power-of-2 pages') 522*2444172cSRyan Roberts if order > PMD_ORDER: 523*2444172cSRyan Roberts raise ArgException('error: --cont value must be less than or equal to PMD order') 524*2444172cSRyan Roberts return order 525*2444172cSRyan Roberts 526*2444172cSRyan Roberts parser = argparse.ArgumentParser(formatter_class=formatter, 527*2444172cSRyan Roberts description=format("""Prints information about how transparent huge 528*2444172cSRyan Roberts pages are mapped, either system-wide, or for a specified 529*2444172cSRyan Roberts process or cgroup.\\n 530*2444172cSRyan Roberts \\n 531*2444172cSRyan Roberts When run with --pid, the user explicitly specifies the set 532*2444172cSRyan Roberts of pids to scan. e.g. "--pid 10 [--pid 134 ...]". When run 533*2444172cSRyan Roberts with --cgroup, the user passes either a v1 or v2 cgroup and 534*2444172cSRyan Roberts all pids that belong to the cgroup subtree are scanned. When 535*2444172cSRyan Roberts run with neither --pid nor --cgroup, the full set of pids on 536*2444172cSRyan Roberts the system is gathered from /proc and scanned as if the user 537*2444172cSRyan Roberts had provided "--pid 1 --pid 2 ...".\\n 538*2444172cSRyan Roberts \\n 539*2444172cSRyan Roberts A default set of statistics is always generated for THP 540*2444172cSRyan Roberts mappings. However, it is also possible to generate 541*2444172cSRyan Roberts additional statistics for "contiguous block mappings" where 542*2444172cSRyan Roberts the block size is user-defined.\\n 543*2444172cSRyan Roberts \\n 544*2444172cSRyan Roberts Statistics are maintained independently for anonymous and 545*2444172cSRyan Roberts file-backed (pagecache) memory and are shown both in kB and 546*2444172cSRyan Roberts as a percentage of either total anonymous or total 547*2444172cSRyan Roberts file-backed memory as appropriate.\\n 548*2444172cSRyan Roberts \\n 549*2444172cSRyan Roberts THP Statistics\\n 550*2444172cSRyan Roberts --------------\\n 551*2444172cSRyan Roberts \\n 552*2444172cSRyan Roberts Statistics are always generated for fully- and 553*2444172cSRyan Roberts contiguously-mapped THPs whose mapping address is aligned to 554*2444172cSRyan Roberts their size, for each <size> supported by the system. 555*2444172cSRyan Roberts Separate counters describe THPs mapped by PTE vs those 556*2444172cSRyan Roberts mapped by PMD. (Although note a THP can only be mapped by 557*2444172cSRyan Roberts PMD if it is PMD-sized):\\n 558*2444172cSRyan Roberts \\n 559*2444172cSRyan Roberts - anon-thp-pte-aligned-<size>kB\\n 560*2444172cSRyan Roberts - file-thp-pte-aligned-<size>kB\\n 561*2444172cSRyan Roberts - anon-thp-pmd-aligned-<size>kB\\n 562*2444172cSRyan Roberts - file-thp-pmd-aligned-<size>kB\\n 563*2444172cSRyan Roberts \\n 564*2444172cSRyan Roberts Similarly, statistics are always generated for fully- and 565*2444172cSRyan Roberts contiguously-mapped THPs whose mapping address is *not* 566*2444172cSRyan Roberts aligned to their size, for each <size> supported by the 567*2444172cSRyan Roberts system. Due to the unaligned mapping, it is impossible to 568*2444172cSRyan Roberts map by PMD, so there are only PTE counters for this case:\\n 569*2444172cSRyan Roberts \\n 570*2444172cSRyan Roberts - anon-thp-pte-unaligned-<size>kB\\n 571*2444172cSRyan Roberts - file-thp-pte-unaligned-<size>kB\\n 572*2444172cSRyan Roberts \\n 573*2444172cSRyan Roberts Statistics are also always generated for mapped pages that 574*2444172cSRyan Roberts belong to a THP but where the is THP is *not* fully- and 575*2444172cSRyan Roberts contiguously- mapped. These "partial" mappings are all 576*2444172cSRyan Roberts counted in the same counter regardless of the size of the 577*2444172cSRyan Roberts THP that is partially mapped:\\n 578*2444172cSRyan Roberts \\n 579*2444172cSRyan Roberts - anon-thp-pte-partial\\n 580*2444172cSRyan Roberts - file-thp-pte-partial\\n 581*2444172cSRyan Roberts \\n 582*2444172cSRyan Roberts Contiguous Block Statistics\\n 583*2444172cSRyan Roberts ---------------------------\\n 584*2444172cSRyan Roberts \\n 585*2444172cSRyan Roberts An optional, additional set of statistics is generated for 586*2444172cSRyan Roberts every contiguous block size specified with `--cont <size>`. 587*2444172cSRyan Roberts These statistics show how much memory is mapped in 588*2444172cSRyan Roberts contiguous blocks of <size> and also aligned to <size>. A 589*2444172cSRyan Roberts given contiguous block must all belong to the same THP, but 590*2444172cSRyan Roberts there is no requirement for it to be the *whole* THP. 591*2444172cSRyan Roberts Separate counters describe contiguous blocks mapped by PTE 592*2444172cSRyan Roberts vs those mapped by PMD:\\n 593*2444172cSRyan Roberts \\n 594*2444172cSRyan Roberts - anon-cont-pte-aligned-<size>kB\\n 595*2444172cSRyan Roberts - file-cont-pte-aligned-<size>kB\\n 596*2444172cSRyan Roberts - anon-cont-pmd-aligned-<size>kB\\n 597*2444172cSRyan Roberts - file-cont-pmd-aligned-<size>kB\\n 598*2444172cSRyan Roberts \\n 599*2444172cSRyan Roberts As an example, if monitoring 64K contiguous blocks (--cont 600*2444172cSRyan Roberts 64K), there are a number of sources that could provide such 601*2444172cSRyan Roberts blocks: a fully- and contiguously-mapped 64K THP that is 602*2444172cSRyan Roberts aligned to a 64K boundary would provide 1 block. A fully- 603*2444172cSRyan Roberts and contiguously-mapped 128K THP that is aligned to at least 604*2444172cSRyan Roberts a 64K boundary would provide 2 blocks. Or a 128K THP that 605*2444172cSRyan Roberts maps its first 100K, but contiguously and starting at a 64K 606*2444172cSRyan Roberts boundary would provide 1 block. A fully- and 607*2444172cSRyan Roberts contiguously-mapped 2M THP would provide 32 blocks. There 608*2444172cSRyan Roberts are many other possible permutations.\\n"""), 609*2444172cSRyan Roberts epilog=format("""Requires root privilege to access pagemap and 610*2444172cSRyan Roberts kpageflags.""")) 611*2444172cSRyan Roberts 612*2444172cSRyan Roberts group = parser.add_mutually_exclusive_group(required=False) 613*2444172cSRyan Roberts group.add_argument('--pid', 614*2444172cSRyan Roberts metavar='pid', required=False, type=int, default=[], action='append', 615*2444172cSRyan Roberts help="""Process id of the target process. Maybe issued multiple times to 616*2444172cSRyan Roberts scan multiple processes. --pid and --cgroup are mutually exclusive. 617*2444172cSRyan Roberts If neither are provided, all processes are scanned to provide 618*2444172cSRyan Roberts system-wide information.""") 619*2444172cSRyan Roberts 620*2444172cSRyan Roberts group.add_argument('--cgroup', 621*2444172cSRyan Roberts metavar='path', required=False, 622*2444172cSRyan Roberts help="""Path to the target cgroup in sysfs. Iterates over every pid in 623*2444172cSRyan Roberts the cgroup and its children. --pid and --cgroup are mutually 624*2444172cSRyan Roberts exclusive. If neither are provided, all processes are scanned to 625*2444172cSRyan Roberts provide system-wide information.""") 626*2444172cSRyan Roberts 627*2444172cSRyan Roberts parser.add_argument('--rollup', 628*2444172cSRyan Roberts required=False, default=False, action='store_true', 629*2444172cSRyan Roberts help="""Sum the per-vma statistics to provide a summary over the whole 630*2444172cSRyan Roberts system, process or cgroup.""") 631*2444172cSRyan Roberts 632*2444172cSRyan Roberts parser.add_argument('--cont', 633*2444172cSRyan Roberts metavar='size[KMG]', required=False, default=[], action='append', 634*2444172cSRyan Roberts help="""Adds stats for memory that is mapped in contiguous blocks of 635*2444172cSRyan Roberts <size> and also aligned to <size>. May be issued multiple times to 636*2444172cSRyan Roberts track multiple sized blocks. Useful to infer e.g. arm64 contpte and 637*2444172cSRyan Roberts hpa mappings. Size must be a power-of-2 number of pages.""") 638*2444172cSRyan Roberts 639*2444172cSRyan Roberts parser.add_argument('--inc-smaps', 640*2444172cSRyan Roberts required=False, default=False, action='store_true', 641*2444172cSRyan Roberts help="""Include all numerical, additive /proc/<pid>/smaps stats in the 642*2444172cSRyan Roberts output.""") 643*2444172cSRyan Roberts 644*2444172cSRyan Roberts parser.add_argument('--inc-empty', 645*2444172cSRyan Roberts required=False, default=False, action='store_true', 646*2444172cSRyan Roberts help="""Show all statistics including those whose value is 0.""") 647*2444172cSRyan Roberts 648*2444172cSRyan Roberts parser.add_argument('--periodic', 649*2444172cSRyan Roberts metavar='sleep_ms', required=False, type=int, 650*2444172cSRyan Roberts help="""Run in a loop, polling every sleep_ms milliseconds.""") 651*2444172cSRyan Roberts 652*2444172cSRyan Roberts args = parser.parse_args() 653*2444172cSRyan Roberts 654*2444172cSRyan Roberts try: 655*2444172cSRyan Roberts args.cont = [size2order(cont) for cont in args.cont] 656*2444172cSRyan Roberts except ArgException as e: 657*2444172cSRyan Roberts parser.print_usage() 658*2444172cSRyan Roberts raise 659*2444172cSRyan Roberts 660*2444172cSRyan Roberts if args.periodic: 661*2444172cSRyan Roberts while True: 662*2444172cSRyan Roberts do_main(args) 663*2444172cSRyan Roberts print() 664*2444172cSRyan Roberts time.sleep(args.periodic / 1000) 665*2444172cSRyan Roberts else: 666*2444172cSRyan Roberts do_main(args) 667*2444172cSRyan Roberts 668*2444172cSRyan Roberts 669*2444172cSRyan Robertsif __name__ == "__main__": 670*2444172cSRyan Roberts try: 671*2444172cSRyan Roberts main() 672*2444172cSRyan Roberts except Exception as e: 673*2444172cSRyan Roberts prog = os.path.basename(sys.argv[0]) 674*2444172cSRyan Roberts print(f'{prog}: {e}') 675*2444172cSRyan Roberts exit(1) 676