xref: /linux/tools/mm/thpmaps (revision 79790b6818e96c58fe2bffee1b418c16e64e7b80)
1*2444172cSRyan Roberts#!/usr/bin/env python3
2*2444172cSRyan Roberts# SPDX-License-Identifier: GPL-2.0-only
3*2444172cSRyan Roberts# Copyright (C) 2024 ARM Ltd.
4*2444172cSRyan Roberts#
5*2444172cSRyan Roberts# Utility providing smaps-like output detailing transparent hugepage usage.
6*2444172cSRyan Roberts# For more info, run:
7*2444172cSRyan Roberts# ./thpmaps --help
8*2444172cSRyan Roberts#
9*2444172cSRyan Roberts# Requires numpy:
10*2444172cSRyan Roberts# pip3 install numpy
11*2444172cSRyan Roberts
12*2444172cSRyan Roberts
13*2444172cSRyan Robertsimport argparse
14*2444172cSRyan Robertsimport collections
15*2444172cSRyan Robertsimport math
16*2444172cSRyan Robertsimport os
17*2444172cSRyan Robertsimport re
18*2444172cSRyan Robertsimport resource
19*2444172cSRyan Robertsimport shutil
20*2444172cSRyan Robertsimport sys
21*2444172cSRyan Robertsimport textwrap
22*2444172cSRyan Robertsimport time
23*2444172cSRyan Robertsimport numpy as np
24*2444172cSRyan Roberts
25*2444172cSRyan Roberts
26*2444172cSRyan Robertswith open('/sys/kernel/mm/transparent_hugepage/hpage_pmd_size') as f:
27*2444172cSRyan Roberts    PAGE_SIZE = resource.getpagesize()
28*2444172cSRyan Roberts    PAGE_SHIFT = int(math.log2(PAGE_SIZE))
29*2444172cSRyan Roberts    PMD_SIZE = int(f.read())
30*2444172cSRyan Roberts    PMD_ORDER = int(math.log2(PMD_SIZE / PAGE_SIZE))
31*2444172cSRyan Roberts
32*2444172cSRyan Roberts
33*2444172cSRyan Robertsdef align_forward(v, a):
34*2444172cSRyan Roberts    return (v + (a - 1)) & ~(a - 1)
35*2444172cSRyan Roberts
36*2444172cSRyan Roberts
37*2444172cSRyan Robertsdef align_offset(v, a):
38*2444172cSRyan Roberts    return v & (a - 1)
39*2444172cSRyan Roberts
40*2444172cSRyan Roberts
41*2444172cSRyan Robertsdef kbnr(kb):
42*2444172cSRyan Roberts    # Convert KB to number of pages.
43*2444172cSRyan Roberts    return (kb << 10) >> PAGE_SHIFT
44*2444172cSRyan Roberts
45*2444172cSRyan Roberts
46*2444172cSRyan Robertsdef nrkb(nr):
47*2444172cSRyan Roberts    # Convert number of pages to KB.
48*2444172cSRyan Roberts    return (nr << PAGE_SHIFT) >> 10
49*2444172cSRyan Roberts
50*2444172cSRyan Roberts
51*2444172cSRyan Robertsdef odkb(order):
52*2444172cSRyan Roberts    # Convert page order to KB.
53*2444172cSRyan Roberts    return (PAGE_SIZE << order) >> 10
54*2444172cSRyan Roberts
55*2444172cSRyan Roberts
56*2444172cSRyan Robertsdef cont_ranges_all(search, index):
57*2444172cSRyan Roberts    # Given a list of arrays, find the ranges for which values are monotonically
58*2444172cSRyan Roberts    # incrementing in all arrays. all arrays in search and index must be the
59*2444172cSRyan Roberts    # same size.
60*2444172cSRyan Roberts    sz = len(search[0])
61*2444172cSRyan Roberts    r = np.full(sz, 2)
62*2444172cSRyan Roberts    d = np.diff(search[0]) == 1
63*2444172cSRyan Roberts    for dd in [np.diff(arr) == 1 for arr in search[1:]]:
64*2444172cSRyan Roberts        d &= dd
65*2444172cSRyan Roberts    r[1:] -= d
66*2444172cSRyan Roberts    r[:-1] -= d
67*2444172cSRyan Roberts    return [np.repeat(arr, r).reshape(-1, 2) for arr in index]
68*2444172cSRyan Roberts
69*2444172cSRyan Roberts
70*2444172cSRyan Robertsclass ArgException(Exception):
71*2444172cSRyan Roberts    pass
72*2444172cSRyan Roberts
73*2444172cSRyan Roberts
74*2444172cSRyan Robertsclass FileIOException(Exception):
75*2444172cSRyan Roberts    pass
76*2444172cSRyan Roberts
77*2444172cSRyan Roberts
78*2444172cSRyan Robertsclass BinArrayFile:
79*2444172cSRyan Roberts    # Base class used to read /proc/<pid>/pagemap and /proc/kpageflags into a
80*2444172cSRyan Roberts    # numpy array. Use inherrited class in a with clause to ensure file is
81*2444172cSRyan Roberts    # closed when it goes out of scope.
82*2444172cSRyan Roberts    def __init__(self, filename, element_size):
83*2444172cSRyan Roberts        self.element_size = element_size
84*2444172cSRyan Roberts        self.filename = filename
85*2444172cSRyan Roberts        self.fd = os.open(self.filename, os.O_RDONLY)
86*2444172cSRyan Roberts
87*2444172cSRyan Roberts    def cleanup(self):
88*2444172cSRyan Roberts        os.close(self.fd)
89*2444172cSRyan Roberts
90*2444172cSRyan Roberts    def __enter__(self):
91*2444172cSRyan Roberts        return self
92*2444172cSRyan Roberts
93*2444172cSRyan Roberts    def __exit__(self, exc_type, exc_val, exc_tb):
94*2444172cSRyan Roberts        self.cleanup()
95*2444172cSRyan Roberts
96*2444172cSRyan Roberts    def _readin(self, offset, buffer):
97*2444172cSRyan Roberts        length = os.preadv(self.fd, (buffer,), offset)
98*2444172cSRyan Roberts        if len(buffer) != length:
99*2444172cSRyan Roberts            raise FileIOException('error: {} failed to read {} bytes at {:x}'
100*2444172cSRyan Roberts                            .format(self.filename, len(buffer), offset))
101*2444172cSRyan Roberts
102*2444172cSRyan Roberts    def _toarray(self, buf):
103*2444172cSRyan Roberts        assert(self.element_size == 8)
104*2444172cSRyan Roberts        return np.frombuffer(buf, dtype=np.uint64)
105*2444172cSRyan Roberts
106*2444172cSRyan Roberts    def getv(self, vec):
107*2444172cSRyan Roberts        vec *= self.element_size
108*2444172cSRyan Roberts        offsets = vec[:, 0]
109*2444172cSRyan Roberts        lengths = (np.diff(vec) + self.element_size).reshape(len(vec))
110*2444172cSRyan Roberts        buf = bytearray(int(np.sum(lengths)))
111*2444172cSRyan Roberts        view = memoryview(buf)
112*2444172cSRyan Roberts        pos = 0
113*2444172cSRyan Roberts        for offset, length in zip(offsets, lengths):
114*2444172cSRyan Roberts            offset = int(offset)
115*2444172cSRyan Roberts            length = int(length)
116*2444172cSRyan Roberts            self._readin(offset, view[pos:pos+length])
117*2444172cSRyan Roberts            pos += length
118*2444172cSRyan Roberts        return self._toarray(buf)
119*2444172cSRyan Roberts
120*2444172cSRyan Roberts    def get(self, index, nr=1):
121*2444172cSRyan Roberts        offset = index * self.element_size
122*2444172cSRyan Roberts        length = nr * self.element_size
123*2444172cSRyan Roberts        buf = bytearray(length)
124*2444172cSRyan Roberts        self._readin(offset, buf)
125*2444172cSRyan Roberts        return self._toarray(buf)
126*2444172cSRyan Roberts
127*2444172cSRyan Roberts
128*2444172cSRyan RobertsPM_PAGE_PRESENT = 1 << 63
129*2444172cSRyan RobertsPM_PFN_MASK = (1 << 55) - 1
130*2444172cSRyan Roberts
131*2444172cSRyan Robertsclass PageMap(BinArrayFile):
132*2444172cSRyan Roberts    # Read ranges of a given pid's pagemap into a numpy array.
133*2444172cSRyan Roberts    def __init__(self, pid='self'):
134*2444172cSRyan Roberts        super().__init__(f'/proc/{pid}/pagemap', 8)
135*2444172cSRyan Roberts
136*2444172cSRyan Roberts
137*2444172cSRyan RobertsKPF_ANON = 1 << 12
138*2444172cSRyan RobertsKPF_COMPOUND_HEAD = 1 << 15
139*2444172cSRyan RobertsKPF_COMPOUND_TAIL = 1 << 16
140*2444172cSRyan RobertsKPF_THP = 1 << 22
141*2444172cSRyan Roberts
142*2444172cSRyan Robertsclass KPageFlags(BinArrayFile):
143*2444172cSRyan Roberts    # Read ranges of /proc/kpageflags into a numpy array.
144*2444172cSRyan Roberts    def __init__(self):
145*2444172cSRyan Roberts         super().__init__(f'/proc/kpageflags', 8)
146*2444172cSRyan Roberts
147*2444172cSRyan Roberts
148*2444172cSRyan Robertsvma_all_stats = set([
149*2444172cSRyan Roberts    "Size",
150*2444172cSRyan Roberts    "Rss",
151*2444172cSRyan Roberts    "Pss",
152*2444172cSRyan Roberts    "Pss_Dirty",
153*2444172cSRyan Roberts    "Shared_Clean",
154*2444172cSRyan Roberts    "Shared_Dirty",
155*2444172cSRyan Roberts    "Private_Clean",
156*2444172cSRyan Roberts    "Private_Dirty",
157*2444172cSRyan Roberts    "Referenced",
158*2444172cSRyan Roberts    "Anonymous",
159*2444172cSRyan Roberts    "KSM",
160*2444172cSRyan Roberts    "LazyFree",
161*2444172cSRyan Roberts    "AnonHugePages",
162*2444172cSRyan Roberts    "ShmemPmdMapped",
163*2444172cSRyan Roberts    "FilePmdMapped",
164*2444172cSRyan Roberts    "Shared_Hugetlb",
165*2444172cSRyan Roberts    "Private_Hugetlb",
166*2444172cSRyan Roberts    "Swap",
167*2444172cSRyan Roberts    "SwapPss",
168*2444172cSRyan Roberts    "Locked",
169*2444172cSRyan Roberts])
170*2444172cSRyan Roberts
171*2444172cSRyan Robertsvma_min_stats = set([
172*2444172cSRyan Roberts    "Rss",
173*2444172cSRyan Roberts    "Anonymous",
174*2444172cSRyan Roberts    "AnonHugePages",
175*2444172cSRyan Roberts    "ShmemPmdMapped",
176*2444172cSRyan Roberts    "FilePmdMapped",
177*2444172cSRyan Roberts])
178*2444172cSRyan Roberts
179*2444172cSRyan RobertsVMA = collections.namedtuple('VMA', [
180*2444172cSRyan Roberts    'name',
181*2444172cSRyan Roberts    'start',
182*2444172cSRyan Roberts    'end',
183*2444172cSRyan Roberts    'read',
184*2444172cSRyan Roberts    'write',
185*2444172cSRyan Roberts    'execute',
186*2444172cSRyan Roberts    'private',
187*2444172cSRyan Roberts    'pgoff',
188*2444172cSRyan Roberts    'major',
189*2444172cSRyan Roberts    'minor',
190*2444172cSRyan Roberts    'inode',
191*2444172cSRyan Roberts    'stats',
192*2444172cSRyan Roberts])
193*2444172cSRyan Roberts
194*2444172cSRyan Robertsclass VMAList:
195*2444172cSRyan Roberts    # A container for VMAs, parsed from /proc/<pid>/smaps. Iterate over the
196*2444172cSRyan Roberts    # instance to receive VMAs.
197*2444172cSRyan Roberts    def __init__(self, pid='self', stats=[]):
198*2444172cSRyan Roberts        self.vmas = []
199*2444172cSRyan Roberts        with open(f'/proc/{pid}/smaps', 'r') as file:
200*2444172cSRyan Roberts            for line in file:
201*2444172cSRyan Roberts                elements = line.split()
202*2444172cSRyan Roberts                if '-' in elements[0]:
203*2444172cSRyan Roberts                    start, end = map(lambda x: int(x, 16), elements[0].split('-'))
204*2444172cSRyan Roberts                    major, minor = map(lambda x: int(x, 16), elements[3].split(':'))
205*2444172cSRyan Roberts                    self.vmas.append(VMA(
206*2444172cSRyan Roberts                        name=elements[5] if len(elements) == 6 else '',
207*2444172cSRyan Roberts                        start=start,
208*2444172cSRyan Roberts                        end=end,
209*2444172cSRyan Roberts                        read=elements[1][0] == 'r',
210*2444172cSRyan Roberts                        write=elements[1][1] == 'w',
211*2444172cSRyan Roberts                        execute=elements[1][2] == 'x',
212*2444172cSRyan Roberts                        private=elements[1][3] == 'p',
213*2444172cSRyan Roberts                        pgoff=int(elements[2], 16),
214*2444172cSRyan Roberts                        major=major,
215*2444172cSRyan Roberts                        minor=minor,
216*2444172cSRyan Roberts                        inode=int(elements[4], 16),
217*2444172cSRyan Roberts                        stats={},
218*2444172cSRyan Roberts                    ))
219*2444172cSRyan Roberts                else:
220*2444172cSRyan Roberts                    param = elements[0][:-1]
221*2444172cSRyan Roberts                    if param in stats:
222*2444172cSRyan Roberts                        value = int(elements[1])
223*2444172cSRyan Roberts                        self.vmas[-1].stats[param] = {'type': None, 'value': value}
224*2444172cSRyan Roberts
225*2444172cSRyan Roberts    def __iter__(self):
226*2444172cSRyan Roberts        yield from self.vmas
227*2444172cSRyan Roberts
228*2444172cSRyan Roberts
229*2444172cSRyan Robertsdef thp_parse(vma, kpageflags, ranges, indexes, vfns, pfns, anons, heads):
230*2444172cSRyan Roberts    # Given 4 same-sized arrays representing a range within a page table backed
231*2444172cSRyan Roberts    # by THPs (vfns: virtual frame numbers, pfns: physical frame numbers, anons:
232*2444172cSRyan Roberts    # True if page is anonymous, heads: True if page is head of a THP), return a
233*2444172cSRyan Roberts    # dictionary of statistics describing the mapped THPs.
234*2444172cSRyan Roberts    stats = {
235*2444172cSRyan Roberts        'file': {
236*2444172cSRyan Roberts            'partial': 0,
237*2444172cSRyan Roberts            'aligned': [0] * (PMD_ORDER + 1),
238*2444172cSRyan Roberts            'unaligned': [0] * (PMD_ORDER + 1),
239*2444172cSRyan Roberts        },
240*2444172cSRyan Roberts        'anon': {
241*2444172cSRyan Roberts            'partial': 0,
242*2444172cSRyan Roberts            'aligned': [0] * (PMD_ORDER + 1),
243*2444172cSRyan Roberts            'unaligned': [0] * (PMD_ORDER + 1),
244*2444172cSRyan Roberts        },
245*2444172cSRyan Roberts    }
246*2444172cSRyan Roberts
247*2444172cSRyan Roberts    for rindex, rpfn in zip(ranges[0], ranges[2]):
248*2444172cSRyan Roberts        index_next = int(rindex[0])
249*2444172cSRyan Roberts        index_end = int(rindex[1]) + 1
250*2444172cSRyan Roberts        pfn_end = int(rpfn[1]) + 1
251*2444172cSRyan Roberts
252*2444172cSRyan Roberts        folios = indexes[index_next:index_end][heads[index_next:index_end]]
253*2444172cSRyan Roberts
254*2444172cSRyan Roberts        # Account pages for any partially mapped THP at the front. In that case,
255*2444172cSRyan Roberts        # the first page of the range is a tail.
256*2444172cSRyan Roberts        nr = (int(folios[0]) if len(folios) else index_end) - index_next
257*2444172cSRyan Roberts        stats['anon' if anons[index_next] else 'file']['partial'] += nr
258*2444172cSRyan Roberts
259*2444172cSRyan Roberts        # Account pages for any partially mapped THP at the back. In that case,
260*2444172cSRyan Roberts        # the next page after the range is a tail.
261*2444172cSRyan Roberts        if len(folios):
262*2444172cSRyan Roberts            flags = int(kpageflags.get(pfn_end)[0])
263*2444172cSRyan Roberts            if flags & KPF_COMPOUND_TAIL:
264*2444172cSRyan Roberts                nr = index_end - int(folios[-1])
265*2444172cSRyan Roberts                folios = folios[:-1]
266*2444172cSRyan Roberts                index_end -= nr
267*2444172cSRyan Roberts                stats['anon' if anons[index_end - 1] else 'file']['partial'] += nr
268*2444172cSRyan Roberts
269*2444172cSRyan Roberts        # Account fully mapped THPs in the middle of the range.
270*2444172cSRyan Roberts        if len(folios):
271*2444172cSRyan Roberts            folio_nrs = np.append(np.diff(folios), np.uint64(index_end - folios[-1]))
272*2444172cSRyan Roberts            folio_orders = np.log2(folio_nrs).astype(np.uint64)
273*2444172cSRyan Roberts            for index, order in zip(folios, folio_orders):
274*2444172cSRyan Roberts                index = int(index)
275*2444172cSRyan Roberts                order = int(order)
276*2444172cSRyan Roberts                nr = 1 << order
277*2444172cSRyan Roberts                vfn = int(vfns[index])
278*2444172cSRyan Roberts                align = 'aligned' if align_forward(vfn, nr) == vfn else 'unaligned'
279*2444172cSRyan Roberts                anon = 'anon' if anons[index] else 'file'
280*2444172cSRyan Roberts                stats[anon][align][order] += nr
281*2444172cSRyan Roberts
282*2444172cSRyan Roberts    # Account PMD-mapped THPs spearately, so filter out of the stats. There is a
283*2444172cSRyan Roberts    # race between acquiring the smaps stats and reading pagemap, where memory
284*2444172cSRyan Roberts    # could be deallocated. So clamp to zero incase it would have gone negative.
285*2444172cSRyan Roberts    anon_pmd_mapped = vma.stats['AnonHugePages']['value']
286*2444172cSRyan Roberts    file_pmd_mapped = vma.stats['ShmemPmdMapped']['value'] + \
287*2444172cSRyan Roberts                      vma.stats['FilePmdMapped']['value']
288*2444172cSRyan Roberts    stats['anon']['aligned'][PMD_ORDER] = max(0, stats['anon']['aligned'][PMD_ORDER] - kbnr(anon_pmd_mapped))
289*2444172cSRyan Roberts    stats['file']['aligned'][PMD_ORDER] = max(0, stats['file']['aligned'][PMD_ORDER] - kbnr(file_pmd_mapped))
290*2444172cSRyan Roberts
291*2444172cSRyan Roberts    rstats = {
292*2444172cSRyan Roberts        f"anon-thp-pmd-aligned-{odkb(PMD_ORDER)}kB": {'type': 'anon', 'value': anon_pmd_mapped},
293*2444172cSRyan Roberts        f"file-thp-pmd-aligned-{odkb(PMD_ORDER)}kB": {'type': 'file', 'value': file_pmd_mapped},
294*2444172cSRyan Roberts    }
295*2444172cSRyan Roberts
296*2444172cSRyan Roberts    def flatten_sub(type, subtype, stats):
297*2444172cSRyan Roberts        param = f"{type}-thp-pte-{subtype}-{{}}kB"
298*2444172cSRyan Roberts        for od, nr in enumerate(stats[2:], 2):
299*2444172cSRyan Roberts            rstats[param.format(odkb(od))] = {'type': type, 'value': nrkb(nr)}
300*2444172cSRyan Roberts
301*2444172cSRyan Roberts    def flatten_type(type, stats):
302*2444172cSRyan Roberts        flatten_sub(type, 'aligned', stats['aligned'])
303*2444172cSRyan Roberts        flatten_sub(type, 'unaligned', stats['unaligned'])
304*2444172cSRyan Roberts        rstats[f"{type}-thp-pte-partial"] = {'type': type, 'value': nrkb(stats['partial'])}
305*2444172cSRyan Roberts
306*2444172cSRyan Roberts    flatten_type('anon', stats['anon'])
307*2444172cSRyan Roberts    flatten_type('file', stats['file'])
308*2444172cSRyan Roberts
309*2444172cSRyan Roberts    return rstats
310*2444172cSRyan Roberts
311*2444172cSRyan Roberts
312*2444172cSRyan Robertsdef cont_parse(vma, order, ranges, anons, heads):
313*2444172cSRyan Roberts    # Given 4 same-sized arrays representing a range within a page table backed
314*2444172cSRyan Roberts    # by THPs (vfns: virtual frame numbers, pfns: physical frame numbers, anons:
315*2444172cSRyan Roberts    # True if page is anonymous, heads: True if page is head of a THP), return a
316*2444172cSRyan Roberts    # dictionary of statistics describing the contiguous blocks.
317*2444172cSRyan Roberts    nr_cont = 1 << order
318*2444172cSRyan Roberts    nr_anon = 0
319*2444172cSRyan Roberts    nr_file = 0
320*2444172cSRyan Roberts
321*2444172cSRyan Roberts    for rindex, rvfn, rpfn in zip(*ranges):
322*2444172cSRyan Roberts        index_next = int(rindex[0])
323*2444172cSRyan Roberts        index_end = int(rindex[1]) + 1
324*2444172cSRyan Roberts        vfn_start = int(rvfn[0])
325*2444172cSRyan Roberts        pfn_start = int(rpfn[0])
326*2444172cSRyan Roberts
327*2444172cSRyan Roberts        if align_offset(pfn_start, nr_cont) != align_offset(vfn_start, nr_cont):
328*2444172cSRyan Roberts            continue
329*2444172cSRyan Roberts
330*2444172cSRyan Roberts        off = align_forward(vfn_start, nr_cont) - vfn_start
331*2444172cSRyan Roberts        index_next += off
332*2444172cSRyan Roberts
333*2444172cSRyan Roberts        while index_next + nr_cont <= index_end:
334*2444172cSRyan Roberts            folio_boundary = heads[index_next+1:index_next+nr_cont].any()
335*2444172cSRyan Roberts            if not folio_boundary:
336*2444172cSRyan Roberts                if anons[index_next]:
337*2444172cSRyan Roberts                    nr_anon += nr_cont
338*2444172cSRyan Roberts                else:
339*2444172cSRyan Roberts                    nr_file += nr_cont
340*2444172cSRyan Roberts            index_next += nr_cont
341*2444172cSRyan Roberts
342*2444172cSRyan Roberts    # Account blocks that are PMD-mapped spearately, so filter out of the stats.
343*2444172cSRyan Roberts    # There is a race between acquiring the smaps stats and reading pagemap,
344*2444172cSRyan Roberts    # where memory could be deallocated. So clamp to zero incase it would have
345*2444172cSRyan Roberts    # gone negative.
346*2444172cSRyan Roberts    anon_pmd_mapped = vma.stats['AnonHugePages']['value']
347*2444172cSRyan Roberts    file_pmd_mapped = vma.stats['ShmemPmdMapped']['value'] + \
348*2444172cSRyan Roberts                    vma.stats['FilePmdMapped']['value']
349*2444172cSRyan Roberts    nr_anon = max(0, nr_anon - kbnr(anon_pmd_mapped))
350*2444172cSRyan Roberts    nr_file = max(0, nr_file - kbnr(file_pmd_mapped))
351*2444172cSRyan Roberts
352*2444172cSRyan Roberts    rstats = {
353*2444172cSRyan Roberts        f"anon-cont-pmd-aligned-{nrkb(nr_cont)}kB": {'type': 'anon', 'value': anon_pmd_mapped},
354*2444172cSRyan Roberts        f"file-cont-pmd-aligned-{nrkb(nr_cont)}kB": {'type': 'file', 'value': file_pmd_mapped},
355*2444172cSRyan Roberts    }
356*2444172cSRyan Roberts
357*2444172cSRyan Roberts    rstats[f"anon-cont-pte-aligned-{nrkb(nr_cont)}kB"] = {'type': 'anon', 'value': nrkb(nr_anon)}
358*2444172cSRyan Roberts    rstats[f"file-cont-pte-aligned-{nrkb(nr_cont)}kB"] = {'type': 'file', 'value': nrkb(nr_file)}
359*2444172cSRyan Roberts
360*2444172cSRyan Roberts    return rstats
361*2444172cSRyan Roberts
362*2444172cSRyan Roberts
363*2444172cSRyan Robertsdef vma_print(vma, pid):
364*2444172cSRyan Roberts    # Prints a VMA instance in a format similar to smaps. The main difference is
365*2444172cSRyan Roberts    # that the pid is included as the first value.
366*2444172cSRyan Roberts    print("{:010d}: {:016x}-{:016x} {}{}{}{} {:08x} {:02x}:{:02x} {:08x} {}"
367*2444172cSRyan Roberts        .format(
368*2444172cSRyan Roberts            pid, vma.start, vma.end,
369*2444172cSRyan Roberts            'r' if vma.read else '-', 'w' if vma.write else '-',
370*2444172cSRyan Roberts            'x' if vma.execute else '-', 'p' if vma.private else 's',
371*2444172cSRyan Roberts            vma.pgoff, vma.major, vma.minor, vma.inode, vma.name
372*2444172cSRyan Roberts        ))
373*2444172cSRyan Roberts
374*2444172cSRyan Roberts
375*2444172cSRyan Robertsdef stats_print(stats, tot_anon, tot_file, inc_empty):
376*2444172cSRyan Roberts    # Print a statistics dictionary.
377*2444172cSRyan Roberts    label_field = 32
378*2444172cSRyan Roberts    for label, stat in stats.items():
379*2444172cSRyan Roberts        type = stat['type']
380*2444172cSRyan Roberts        value = stat['value']
381*2444172cSRyan Roberts        if value or inc_empty:
382*2444172cSRyan Roberts            pad = max(0, label_field - len(label) - 1)
383*2444172cSRyan Roberts            if type == 'anon' and tot_anon > 0:
384*2444172cSRyan Roberts                percent = f' ({value / tot_anon:3.0%})'
385*2444172cSRyan Roberts            elif type == 'file' and tot_file > 0:
386*2444172cSRyan Roberts                percent = f' ({value / tot_file:3.0%})'
387*2444172cSRyan Roberts            else:
388*2444172cSRyan Roberts                percent = ''
389*2444172cSRyan Roberts            print(f"{label}:{' ' * pad}{value:8} kB{percent}")
390*2444172cSRyan Roberts
391*2444172cSRyan Roberts
392*2444172cSRyan Robertsdef vma_parse(vma, pagemap, kpageflags, contorders):
393*2444172cSRyan Roberts    # Generate thp and cont statistics for a single VMA.
394*2444172cSRyan Roberts    start = vma.start >> PAGE_SHIFT
395*2444172cSRyan Roberts    end = vma.end >> PAGE_SHIFT
396*2444172cSRyan Roberts
397*2444172cSRyan Roberts    pmes = pagemap.get(start, end - start)
398*2444172cSRyan Roberts    present = pmes & PM_PAGE_PRESENT != 0
399*2444172cSRyan Roberts    pfns = pmes & PM_PFN_MASK
400*2444172cSRyan Roberts    pfns = pfns[present]
401*2444172cSRyan Roberts    vfns = np.arange(start, end, dtype=np.uint64)
402*2444172cSRyan Roberts    vfns = vfns[present]
403*2444172cSRyan Roberts
404*2444172cSRyan Roberts    pfn_vec = cont_ranges_all([pfns], [pfns])[0]
405*2444172cSRyan Roberts    flags = kpageflags.getv(pfn_vec)
406*2444172cSRyan Roberts    anons = flags & KPF_ANON != 0
407*2444172cSRyan Roberts    heads = flags & KPF_COMPOUND_HEAD != 0
408*2444172cSRyan Roberts    thps = flags & KPF_THP != 0
409*2444172cSRyan Roberts
410*2444172cSRyan Roberts    vfns = vfns[thps]
411*2444172cSRyan Roberts    pfns = pfns[thps]
412*2444172cSRyan Roberts    anons = anons[thps]
413*2444172cSRyan Roberts    heads = heads[thps]
414*2444172cSRyan Roberts
415*2444172cSRyan Roberts    indexes = np.arange(len(vfns), dtype=np.uint64)
416*2444172cSRyan Roberts    ranges = cont_ranges_all([vfns, pfns], [indexes, vfns, pfns])
417*2444172cSRyan Roberts
418*2444172cSRyan Roberts    thpstats = thp_parse(vma, kpageflags, ranges, indexes, vfns, pfns, anons, heads)
419*2444172cSRyan Roberts    contstats = [cont_parse(vma, order, ranges, anons, heads) for order in contorders]
420*2444172cSRyan Roberts
421*2444172cSRyan Roberts    tot_anon = vma.stats['Anonymous']['value']
422*2444172cSRyan Roberts    tot_file = vma.stats['Rss']['value'] - tot_anon
423*2444172cSRyan Roberts
424*2444172cSRyan Roberts    return {
425*2444172cSRyan Roberts        **thpstats,
426*2444172cSRyan Roberts        **{k: v for s in contstats for k, v in s.items()}
427*2444172cSRyan Roberts    }, tot_anon, tot_file
428*2444172cSRyan Roberts
429*2444172cSRyan Roberts
430*2444172cSRyan Robertsdef do_main(args):
431*2444172cSRyan Roberts    pids = set()
432*2444172cSRyan Roberts    rollup = {}
433*2444172cSRyan Roberts    rollup_anon = 0
434*2444172cSRyan Roberts    rollup_file = 0
435*2444172cSRyan Roberts
436*2444172cSRyan Roberts    if args.cgroup:
437*2444172cSRyan Roberts        strict = False
438*2444172cSRyan Roberts        for walk_info in os.walk(args.cgroup):
439*2444172cSRyan Roberts            cgroup = walk_info[0]
440*2444172cSRyan Roberts            with open(f'{cgroup}/cgroup.procs') as pidfile:
441*2444172cSRyan Roberts                for line in pidfile.readlines():
442*2444172cSRyan Roberts                    pids.add(int(line.strip()))
443*2444172cSRyan Roberts    elif args.pid:
444*2444172cSRyan Roberts        strict = True
445*2444172cSRyan Roberts        pids = pids.union(args.pid)
446*2444172cSRyan Roberts    else:
447*2444172cSRyan Roberts        strict = False
448*2444172cSRyan Roberts        for pid in os.listdir('/proc'):
449*2444172cSRyan Roberts            if pid.isdigit():
450*2444172cSRyan Roberts                pids.add(int(pid))
451*2444172cSRyan Roberts
452*2444172cSRyan Roberts    if not args.rollup:
453*2444172cSRyan Roberts        print("       PID             START              END PROT   OFFSET   DEV    INODE OBJECT")
454*2444172cSRyan Roberts
455*2444172cSRyan Roberts    for pid in pids:
456*2444172cSRyan Roberts        try:
457*2444172cSRyan Roberts            with PageMap(pid) as pagemap:
458*2444172cSRyan Roberts                with KPageFlags() as kpageflags:
459*2444172cSRyan Roberts                    for vma in VMAList(pid, vma_all_stats if args.inc_smaps else vma_min_stats):
460*2444172cSRyan Roberts                        if (vma.read or vma.write or vma.execute) and vma.stats['Rss']['value'] > 0:
461*2444172cSRyan Roberts                            stats, vma_anon, vma_file = vma_parse(vma, pagemap, kpageflags, args.cont)
462*2444172cSRyan Roberts                        else:
463*2444172cSRyan Roberts                            stats = {}
464*2444172cSRyan Roberts                            vma_anon = 0
465*2444172cSRyan Roberts                            vma_file = 0
466*2444172cSRyan Roberts                        if args.inc_smaps:
467*2444172cSRyan Roberts                            stats = {**vma.stats, **stats}
468*2444172cSRyan Roberts                        if args.rollup:
469*2444172cSRyan Roberts                            for k, v in stats.items():
470*2444172cSRyan Roberts                                if k in rollup:
471*2444172cSRyan Roberts                                    assert(rollup[k]['type'] == v['type'])
472*2444172cSRyan Roberts                                    rollup[k]['value'] += v['value']
473*2444172cSRyan Roberts                                else:
474*2444172cSRyan Roberts                                    rollup[k] = v
475*2444172cSRyan Roberts                            rollup_anon += vma_anon
476*2444172cSRyan Roberts                            rollup_file += vma_file
477*2444172cSRyan Roberts                        else:
478*2444172cSRyan Roberts                            vma_print(vma, pid)
479*2444172cSRyan Roberts                            stats_print(stats, vma_anon, vma_file, args.inc_empty)
480*2444172cSRyan Roberts        except (FileNotFoundError, ProcessLookupError, FileIOException):
481*2444172cSRyan Roberts            if strict:
482*2444172cSRyan Roberts                raise
483*2444172cSRyan Roberts
484*2444172cSRyan Roberts    if args.rollup:
485*2444172cSRyan Roberts        stats_print(rollup, rollup_anon, rollup_file, args.inc_empty)
486*2444172cSRyan Roberts
487*2444172cSRyan Roberts
488*2444172cSRyan Robertsdef main():
489*2444172cSRyan Roberts    docs_width = shutil.get_terminal_size().columns
490*2444172cSRyan Roberts    docs_width -= 2
491*2444172cSRyan Roberts    docs_width = min(80, docs_width)
492*2444172cSRyan Roberts
493*2444172cSRyan Roberts    def format(string):
494*2444172cSRyan Roberts        text = re.sub(r'\s+', ' ', string)
495*2444172cSRyan Roberts        text = re.sub(r'\s*\\n\s*', '\n', text)
496*2444172cSRyan Roberts        paras = text.split('\n')
497*2444172cSRyan Roberts        paras = [textwrap.fill(p, width=docs_width) for p in paras]
498*2444172cSRyan Roberts        return '\n'.join(paras)
499*2444172cSRyan Roberts
500*2444172cSRyan Roberts    def formatter(prog):
501*2444172cSRyan Roberts        return argparse.RawDescriptionHelpFormatter(prog, width=docs_width)
502*2444172cSRyan Roberts
503*2444172cSRyan Roberts    def size2order(human):
504*2444172cSRyan Roberts        units = {
505*2444172cSRyan Roberts            "K": 2**10, "M": 2**20, "G": 2**30,
506*2444172cSRyan Roberts            "k": 2**10, "m": 2**20, "g": 2**30,
507*2444172cSRyan Roberts        }
508*2444172cSRyan Roberts        unit = 1
509*2444172cSRyan Roberts        if human[-1] in units:
510*2444172cSRyan Roberts            unit = units[human[-1]]
511*2444172cSRyan Roberts            human = human[:-1]
512*2444172cSRyan Roberts        try:
513*2444172cSRyan Roberts            size = int(human)
514*2444172cSRyan Roberts        except ValueError:
515*2444172cSRyan Roberts            raise ArgException('error: --cont value must be integer size with optional KMG unit')
516*2444172cSRyan Roberts        size *= unit
517*2444172cSRyan Roberts        order = int(math.log2(size / PAGE_SIZE))
518*2444172cSRyan Roberts        if order < 1:
519*2444172cSRyan Roberts            raise ArgException('error: --cont value must be size of at least 2 pages')
520*2444172cSRyan Roberts        if (1 << order) * PAGE_SIZE != size:
521*2444172cSRyan Roberts            raise ArgException('error: --cont value must be size of power-of-2 pages')
522*2444172cSRyan Roberts        if order > PMD_ORDER:
523*2444172cSRyan Roberts            raise ArgException('error: --cont value must be less than or equal to PMD order')
524*2444172cSRyan Roberts        return order
525*2444172cSRyan Roberts
526*2444172cSRyan Roberts    parser = argparse.ArgumentParser(formatter_class=formatter,
527*2444172cSRyan Roberts        description=format("""Prints information about how transparent huge
528*2444172cSRyan Roberts                    pages are mapped, either system-wide, or for a specified
529*2444172cSRyan Roberts                    process or cgroup.\\n
530*2444172cSRyan Roberts                    \\n
531*2444172cSRyan Roberts                    When run with --pid, the user explicitly specifies the set
532*2444172cSRyan Roberts                    of pids to scan. e.g. "--pid 10 [--pid 134 ...]". When run
533*2444172cSRyan Roberts                    with --cgroup, the user passes either a v1 or v2 cgroup and
534*2444172cSRyan Roberts                    all pids that belong to the cgroup subtree are scanned. When
535*2444172cSRyan Roberts                    run with neither --pid nor --cgroup, the full set of pids on
536*2444172cSRyan Roberts                    the system is gathered from /proc and scanned as if the user
537*2444172cSRyan Roberts                    had provided "--pid 1 --pid 2 ...".\\n
538*2444172cSRyan Roberts                    \\n
539*2444172cSRyan Roberts                    A default set of statistics is always generated for THP
540*2444172cSRyan Roberts                    mappings. However, it is also possible to generate
541*2444172cSRyan Roberts                    additional statistics for "contiguous block mappings" where
542*2444172cSRyan Roberts                    the block size is user-defined.\\n
543*2444172cSRyan Roberts                    \\n
544*2444172cSRyan Roberts                    Statistics are maintained independently for anonymous and
545*2444172cSRyan Roberts                    file-backed (pagecache) memory and are shown both in kB and
546*2444172cSRyan Roberts                    as a percentage of either total anonymous or total
547*2444172cSRyan Roberts                    file-backed memory as appropriate.\\n
548*2444172cSRyan Roberts                    \\n
549*2444172cSRyan Roberts                    THP Statistics\\n
550*2444172cSRyan Roberts                    --------------\\n
551*2444172cSRyan Roberts                    \\n
552*2444172cSRyan Roberts                    Statistics are always generated for fully- and
553*2444172cSRyan Roberts                    contiguously-mapped THPs whose mapping address is aligned to
554*2444172cSRyan Roberts                    their size, for each <size> supported by the system.
555*2444172cSRyan Roberts                    Separate counters describe THPs mapped by PTE vs those
556*2444172cSRyan Roberts                    mapped by PMD. (Although note a THP can only be mapped by
557*2444172cSRyan Roberts                    PMD if it is PMD-sized):\\n
558*2444172cSRyan Roberts                    \\n
559*2444172cSRyan Roberts                    - anon-thp-pte-aligned-<size>kB\\n
560*2444172cSRyan Roberts                    - file-thp-pte-aligned-<size>kB\\n
561*2444172cSRyan Roberts                    - anon-thp-pmd-aligned-<size>kB\\n
562*2444172cSRyan Roberts                    - file-thp-pmd-aligned-<size>kB\\n
563*2444172cSRyan Roberts                    \\n
564*2444172cSRyan Roberts                    Similarly, statistics are always generated for fully- and
565*2444172cSRyan Roberts                    contiguously-mapped THPs whose mapping address is *not*
566*2444172cSRyan Roberts                    aligned to their size, for each <size> supported by the
567*2444172cSRyan Roberts                    system. Due to the unaligned mapping, it is impossible to
568*2444172cSRyan Roberts                    map by PMD, so there are only PTE counters for this case:\\n
569*2444172cSRyan Roberts                    \\n
570*2444172cSRyan Roberts                    - anon-thp-pte-unaligned-<size>kB\\n
571*2444172cSRyan Roberts                    - file-thp-pte-unaligned-<size>kB\\n
572*2444172cSRyan Roberts                    \\n
573*2444172cSRyan Roberts                    Statistics are also always generated for mapped pages that
574*2444172cSRyan Roberts                    belong to a THP but where the is THP is *not* fully- and
575*2444172cSRyan Roberts                    contiguously- mapped. These "partial" mappings are all
576*2444172cSRyan Roberts                    counted in the same counter regardless of the size of the
577*2444172cSRyan Roberts                    THP that is partially mapped:\\n
578*2444172cSRyan Roberts                    \\n
579*2444172cSRyan Roberts                    - anon-thp-pte-partial\\n
580*2444172cSRyan Roberts                    - file-thp-pte-partial\\n
581*2444172cSRyan Roberts                    \\n
582*2444172cSRyan Roberts                    Contiguous Block Statistics\\n
583*2444172cSRyan Roberts                    ---------------------------\\n
584*2444172cSRyan Roberts                    \\n
585*2444172cSRyan Roberts                    An optional, additional set of statistics is generated for
586*2444172cSRyan Roberts                    every contiguous block size specified with `--cont <size>`.
587*2444172cSRyan Roberts                    These statistics show how much memory is mapped in
588*2444172cSRyan Roberts                    contiguous blocks of <size> and also aligned to <size>. A
589*2444172cSRyan Roberts                    given contiguous block must all belong to the same THP, but
590*2444172cSRyan Roberts                    there is no requirement for it to be the *whole* THP.
591*2444172cSRyan Roberts                    Separate counters describe contiguous blocks mapped by PTE
592*2444172cSRyan Roberts                    vs those mapped by PMD:\\n
593*2444172cSRyan Roberts                    \\n
594*2444172cSRyan Roberts                    - anon-cont-pte-aligned-<size>kB\\n
595*2444172cSRyan Roberts                    - file-cont-pte-aligned-<size>kB\\n
596*2444172cSRyan Roberts                    - anon-cont-pmd-aligned-<size>kB\\n
597*2444172cSRyan Roberts                    - file-cont-pmd-aligned-<size>kB\\n
598*2444172cSRyan Roberts                    \\n
599*2444172cSRyan Roberts                    As an example, if monitoring 64K contiguous blocks (--cont
600*2444172cSRyan Roberts                    64K), there are a number of sources that could provide such
601*2444172cSRyan Roberts                    blocks: a fully- and contiguously-mapped 64K THP that is
602*2444172cSRyan Roberts                    aligned to a 64K boundary would provide 1 block. A fully-
603*2444172cSRyan Roberts                    and contiguously-mapped 128K THP that is aligned to at least
604*2444172cSRyan Roberts                    a 64K boundary would provide 2 blocks. Or a 128K THP that
605*2444172cSRyan Roberts                    maps its first 100K, but contiguously and starting at a 64K
606*2444172cSRyan Roberts                    boundary would provide 1 block. A fully- and
607*2444172cSRyan Roberts                    contiguously-mapped 2M THP would provide 32 blocks. There
608*2444172cSRyan Roberts                    are many other possible permutations.\\n"""),
609*2444172cSRyan Roberts        epilog=format("""Requires root privilege to access pagemap and
610*2444172cSRyan Roberts                    kpageflags."""))
611*2444172cSRyan Roberts
612*2444172cSRyan Roberts    group = parser.add_mutually_exclusive_group(required=False)
613*2444172cSRyan Roberts    group.add_argument('--pid',
614*2444172cSRyan Roberts        metavar='pid', required=False, type=int, default=[], action='append',
615*2444172cSRyan Roberts        help="""Process id of the target process. Maybe issued multiple times to
616*2444172cSRyan Roberts            scan multiple processes. --pid and --cgroup are mutually exclusive.
617*2444172cSRyan Roberts            If neither are provided, all processes are scanned to provide
618*2444172cSRyan Roberts            system-wide information.""")
619*2444172cSRyan Roberts
620*2444172cSRyan Roberts    group.add_argument('--cgroup',
621*2444172cSRyan Roberts        metavar='path', required=False,
622*2444172cSRyan Roberts        help="""Path to the target cgroup in sysfs. Iterates over every pid in
623*2444172cSRyan Roberts            the cgroup and its children. --pid and --cgroup are mutually
624*2444172cSRyan Roberts            exclusive. If neither are provided, all processes are scanned to
625*2444172cSRyan Roberts            provide system-wide information.""")
626*2444172cSRyan Roberts
627*2444172cSRyan Roberts    parser.add_argument('--rollup',
628*2444172cSRyan Roberts        required=False, default=False, action='store_true',
629*2444172cSRyan Roberts        help="""Sum the per-vma statistics to provide a summary over the whole
630*2444172cSRyan Roberts            system, process or cgroup.""")
631*2444172cSRyan Roberts
632*2444172cSRyan Roberts    parser.add_argument('--cont',
633*2444172cSRyan Roberts        metavar='size[KMG]', required=False, default=[], action='append',
634*2444172cSRyan Roberts        help="""Adds stats for memory that is mapped in contiguous blocks of
635*2444172cSRyan Roberts            <size> and also aligned to <size>. May be issued multiple times to
636*2444172cSRyan Roberts            track multiple sized blocks. Useful to infer e.g. arm64 contpte and
637*2444172cSRyan Roberts            hpa mappings. Size must be a power-of-2 number of pages.""")
638*2444172cSRyan Roberts
639*2444172cSRyan Roberts    parser.add_argument('--inc-smaps',
640*2444172cSRyan Roberts        required=False, default=False, action='store_true',
641*2444172cSRyan Roberts        help="""Include all numerical, additive /proc/<pid>/smaps stats in the
642*2444172cSRyan Roberts            output.""")
643*2444172cSRyan Roberts
644*2444172cSRyan Roberts    parser.add_argument('--inc-empty',
645*2444172cSRyan Roberts        required=False, default=False, action='store_true',
646*2444172cSRyan Roberts        help="""Show all statistics including those whose value is 0.""")
647*2444172cSRyan Roberts
648*2444172cSRyan Roberts    parser.add_argument('--periodic',
649*2444172cSRyan Roberts        metavar='sleep_ms', required=False, type=int,
650*2444172cSRyan Roberts        help="""Run in a loop, polling every sleep_ms milliseconds.""")
651*2444172cSRyan Roberts
652*2444172cSRyan Roberts    args = parser.parse_args()
653*2444172cSRyan Roberts
654*2444172cSRyan Roberts    try:
655*2444172cSRyan Roberts        args.cont = [size2order(cont) for cont in args.cont]
656*2444172cSRyan Roberts    except ArgException as e:
657*2444172cSRyan Roberts        parser.print_usage()
658*2444172cSRyan Roberts        raise
659*2444172cSRyan Roberts
660*2444172cSRyan Roberts    if args.periodic:
661*2444172cSRyan Roberts        while True:
662*2444172cSRyan Roberts            do_main(args)
663*2444172cSRyan Roberts            print()
664*2444172cSRyan Roberts            time.sleep(args.periodic / 1000)
665*2444172cSRyan Roberts    else:
666*2444172cSRyan Roberts        do_main(args)
667*2444172cSRyan Roberts
668*2444172cSRyan Roberts
669*2444172cSRyan Robertsif __name__ == "__main__":
670*2444172cSRyan Roberts    try:
671*2444172cSRyan Roberts        main()
672*2444172cSRyan Roberts    except Exception as e:
673*2444172cSRyan Roberts        prog = os.path.basename(sys.argv[0])
674*2444172cSRyan Roberts        print(f'{prog}: {e}')
675*2444172cSRyan Roberts        exit(1)
676