676 lines
		
	
	
		
			25 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			676 lines
		
	
	
		
			25 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python3
 | |
| # SPDX-License-Identifier: GPL-2.0-only
 | |
| # Copyright (C) 2024 ARM Ltd.
 | |
| #
 | |
| # Utility providing smaps-like output detailing transparent hugepage usage.
 | |
| # For more info, run:
 | |
| # ./thpmaps --help
 | |
| #
 | |
| # Requires numpy:
 | |
| # pip3 install numpy
 | |
| 
 | |
| 
 | |
| import argparse
 | |
| import collections
 | |
| import math
 | |
| import os
 | |
| import re
 | |
| import resource
 | |
| import shutil
 | |
| import sys
 | |
| import textwrap
 | |
| import time
 | |
| import numpy as np
 | |
| 
 | |
| 
 | |
| with open('/sys/kernel/mm/transparent_hugepage/hpage_pmd_size') as f:
 | |
|     PAGE_SIZE = resource.getpagesize()
 | |
|     PAGE_SHIFT = int(math.log2(PAGE_SIZE))
 | |
|     PMD_SIZE = int(f.read())
 | |
|     PMD_ORDER = int(math.log2(PMD_SIZE / PAGE_SIZE))
 | |
| 
 | |
| 
 | |
| def align_forward(v, a):
 | |
|     return (v + (a - 1)) & ~(a - 1)
 | |
| 
 | |
| 
 | |
| def align_offset(v, a):
 | |
|     return v & (a - 1)
 | |
| 
 | |
| 
 | |
| def kbnr(kb):
 | |
|     # Convert KB to number of pages.
 | |
|     return (kb << 10) >> PAGE_SHIFT
 | |
| 
 | |
| 
 | |
| def nrkb(nr):
 | |
|     # Convert number of pages to KB.
 | |
|     return (nr << PAGE_SHIFT) >> 10
 | |
| 
 | |
| 
 | |
| def odkb(order):
 | |
|     # Convert page order to KB.
 | |
|     return (PAGE_SIZE << order) >> 10
 | |
| 
 | |
| 
 | |
| def cont_ranges_all(search, index):
 | |
|     # Given a list of arrays, find the ranges for which values are monotonically
 | |
|     # incrementing in all arrays. all arrays in search and index must be the
 | |
|     # same size.
 | |
|     sz = len(search[0])
 | |
|     r = np.full(sz, 2)
 | |
|     d = np.diff(search[0]) == 1
 | |
|     for dd in [np.diff(arr) == 1 for arr in search[1:]]:
 | |
|         d &= dd
 | |
|     r[1:] -= d
 | |
|     r[:-1] -= d
 | |
|     return [np.repeat(arr, r).reshape(-1, 2) for arr in index]
 | |
| 
 | |
| 
 | |
| class ArgException(Exception):
 | |
|     pass
 | |
| 
 | |
| 
 | |
| class FileIOException(Exception):
 | |
|     pass
 | |
| 
 | |
| 
 | |
| class BinArrayFile:
 | |
|     # Base class used to read /proc/<pid>/pagemap and /proc/kpageflags into a
 | |
|     # numpy array. Use inherrited class in a with clause to ensure file is
 | |
|     # closed when it goes out of scope.
 | |
|     def __init__(self, filename, element_size):
 | |
|         self.element_size = element_size
 | |
|         self.filename = filename
 | |
|         self.fd = os.open(self.filename, os.O_RDONLY)
 | |
| 
 | |
|     def cleanup(self):
 | |
|         os.close(self.fd)
 | |
| 
 | |
|     def __enter__(self):
 | |
|         return self
 | |
| 
 | |
|     def __exit__(self, exc_type, exc_val, exc_tb):
 | |
|         self.cleanup()
 | |
| 
 | |
|     def _readin(self, offset, buffer):
 | |
|         length = os.preadv(self.fd, (buffer,), offset)
 | |
|         if len(buffer) != length:
 | |
|             raise FileIOException('error: {} failed to read {} bytes at {:x}'
 | |
|                             .format(self.filename, len(buffer), offset))
 | |
| 
 | |
|     def _toarray(self, buf):
 | |
|         assert(self.element_size == 8)
 | |
|         return np.frombuffer(buf, dtype=np.uint64)
 | |
| 
 | |
|     def getv(self, vec):
 | |
|         vec *= self.element_size
 | |
|         offsets = vec[:, 0]
 | |
|         lengths = (np.diff(vec) + self.element_size).reshape(len(vec))
 | |
|         buf = bytearray(int(np.sum(lengths)))
 | |
|         view = memoryview(buf)
 | |
|         pos = 0
 | |
|         for offset, length in zip(offsets, lengths):
 | |
|             offset = int(offset)
 | |
|             length = int(length)
 | |
|             self._readin(offset, view[pos:pos+length])
 | |
|             pos += length
 | |
|         return self._toarray(buf)
 | |
| 
 | |
|     def get(self, index, nr=1):
 | |
|         offset = index * self.element_size
 | |
|         length = nr * self.element_size
 | |
|         buf = bytearray(length)
 | |
|         self._readin(offset, buf)
 | |
|         return self._toarray(buf)
 | |
| 
 | |
| 
 | |
| PM_PAGE_PRESENT = 1 << 63
 | |
| PM_PFN_MASK = (1 << 55) - 1
 | |
| 
 | |
| class PageMap(BinArrayFile):
 | |
|     # Read ranges of a given pid's pagemap into a numpy array.
 | |
|     def __init__(self, pid='self'):
 | |
|         super().__init__(f'/proc/{pid}/pagemap', 8)
 | |
| 
 | |
| 
 | |
| KPF_ANON = 1 << 12
 | |
| KPF_COMPOUND_HEAD = 1 << 15
 | |
| KPF_COMPOUND_TAIL = 1 << 16
 | |
| KPF_THP = 1 << 22
 | |
| 
 | |
| class KPageFlags(BinArrayFile):
 | |
|     # Read ranges of /proc/kpageflags into a numpy array.
 | |
|     def __init__(self):
 | |
|          super().__init__(f'/proc/kpageflags', 8)
 | |
| 
 | |
| 
 | |
| vma_all_stats = set([
 | |
|     "Size",
 | |
|     "Rss",
 | |
|     "Pss",
 | |
|     "Pss_Dirty",
 | |
|     "Shared_Clean",
 | |
|     "Shared_Dirty",
 | |
|     "Private_Clean",
 | |
|     "Private_Dirty",
 | |
|     "Referenced",
 | |
|     "Anonymous",
 | |
|     "KSM",
 | |
|     "LazyFree",
 | |
|     "AnonHugePages",
 | |
|     "ShmemPmdMapped",
 | |
|     "FilePmdMapped",
 | |
|     "Shared_Hugetlb",
 | |
|     "Private_Hugetlb",
 | |
|     "Swap",
 | |
|     "SwapPss",
 | |
|     "Locked",
 | |
| ])
 | |
| 
 | |
| vma_min_stats = set([
 | |
|     "Rss",
 | |
|     "Anonymous",
 | |
|     "AnonHugePages",
 | |
|     "ShmemPmdMapped",
 | |
|     "FilePmdMapped",
 | |
| ])
 | |
| 
 | |
| VMA = collections.namedtuple('VMA', [
 | |
|     'name',
 | |
|     'start',
 | |
|     'end',
 | |
|     'read',
 | |
|     'write',
 | |
|     'execute',
 | |
|     'private',
 | |
|     'pgoff',
 | |
|     'major',
 | |
|     'minor',
 | |
|     'inode',
 | |
|     'stats',
 | |
| ])
 | |
| 
 | |
| class VMAList:
 | |
|     # A container for VMAs, parsed from /proc/<pid>/smaps. Iterate over the
 | |
|     # instance to receive VMAs.
 | |
|     def __init__(self, pid='self', stats=[]):
 | |
|         self.vmas = []
 | |
|         with open(f'/proc/{pid}/smaps', 'r') as file:
 | |
|             for line in file:
 | |
|                 elements = line.split()
 | |
|                 if '-' in elements[0]:
 | |
|                     start, end = map(lambda x: int(x, 16), elements[0].split('-'))
 | |
|                     major, minor = map(lambda x: int(x, 16), elements[3].split(':'))
 | |
|                     self.vmas.append(VMA(
 | |
|                         name=elements[5] if len(elements) == 6 else '',
 | |
|                         start=start,
 | |
|                         end=end,
 | |
|                         read=elements[1][0] == 'r',
 | |
|                         write=elements[1][1] == 'w',
 | |
|                         execute=elements[1][2] == 'x',
 | |
|                         private=elements[1][3] == 'p',
 | |
|                         pgoff=int(elements[2], 16),
 | |
|                         major=major,
 | |
|                         minor=minor,
 | |
|                         inode=int(elements[4], 16),
 | |
|                         stats={},
 | |
|                     ))
 | |
|                 else:
 | |
|                     param = elements[0][:-1]
 | |
|                     if param in stats:
 | |
|                         value = int(elements[1])
 | |
|                         self.vmas[-1].stats[param] = {'type': None, 'value': value}
 | |
| 
 | |
|     def __iter__(self):
 | |
|         yield from self.vmas
 | |
| 
 | |
| 
 | |
| def thp_parse(vma, kpageflags, ranges, indexes, vfns, pfns, anons, heads):
 | |
|     # Given 4 same-sized arrays representing a range within a page table backed
 | |
|     # by THPs (vfns: virtual frame numbers, pfns: physical frame numbers, anons:
 | |
|     # True if page is anonymous, heads: True if page is head of a THP), return a
 | |
|     # dictionary of statistics describing the mapped THPs.
 | |
|     stats = {
 | |
|         'file': {
 | |
|             'partial': 0,
 | |
|             'aligned': [0] * (PMD_ORDER + 1),
 | |
|             'unaligned': [0] * (PMD_ORDER + 1),
 | |
|         },
 | |
|         'anon': {
 | |
|             'partial': 0,
 | |
|             'aligned': [0] * (PMD_ORDER + 1),
 | |
|             'unaligned': [0] * (PMD_ORDER + 1),
 | |
|         },
 | |
|     }
 | |
| 
 | |
|     for rindex, rpfn in zip(ranges[0], ranges[2]):
 | |
|         index_next = int(rindex[0])
 | |
|         index_end = int(rindex[1]) + 1
 | |
|         pfn_end = int(rpfn[1]) + 1
 | |
| 
 | |
|         folios = indexes[index_next:index_end][heads[index_next:index_end]]
 | |
| 
 | |
|         # Account pages for any partially mapped THP at the front. In that case,
 | |
|         # the first page of the range is a tail.
 | |
|         nr = (int(folios[0]) if len(folios) else index_end) - index_next
 | |
|         stats['anon' if anons[index_next] else 'file']['partial'] += nr
 | |
| 
 | |
|         # Account pages for any partially mapped THP at the back. In that case,
 | |
|         # the next page after the range is a tail.
 | |
|         if len(folios):
 | |
|             flags = int(kpageflags.get(pfn_end)[0])
 | |
|             if flags & KPF_COMPOUND_TAIL:
 | |
|                 nr = index_end - int(folios[-1])
 | |
|                 folios = folios[:-1]
 | |
|                 index_end -= nr
 | |
|                 stats['anon' if anons[index_end - 1] else 'file']['partial'] += nr
 | |
| 
 | |
|         # Account fully mapped THPs in the middle of the range.
 | |
|         if len(folios):
 | |
|             folio_nrs = np.append(np.diff(folios), np.uint64(index_end - folios[-1]))
 | |
|             folio_orders = np.log2(folio_nrs).astype(np.uint64)
 | |
|             for index, order in zip(folios, folio_orders):
 | |
|                 index = int(index)
 | |
|                 order = int(order)
 | |
|                 nr = 1 << order
 | |
|                 vfn = int(vfns[index])
 | |
|                 align = 'aligned' if align_forward(vfn, nr) == vfn else 'unaligned'
 | |
|                 anon = 'anon' if anons[index] else 'file'
 | |
|                 stats[anon][align][order] += nr
 | |
| 
 | |
|     # Account PMD-mapped THPs spearately, so filter out of the stats. There is a
 | |
|     # race between acquiring the smaps stats and reading pagemap, where memory
 | |
|     # could be deallocated. So clamp to zero incase it would have gone negative.
 | |
|     anon_pmd_mapped = vma.stats['AnonHugePages']['value']
 | |
|     file_pmd_mapped = vma.stats['ShmemPmdMapped']['value'] + \
 | |
|                       vma.stats['FilePmdMapped']['value']
 | |
|     stats['anon']['aligned'][PMD_ORDER] = max(0, stats['anon']['aligned'][PMD_ORDER] - kbnr(anon_pmd_mapped))
 | |
|     stats['file']['aligned'][PMD_ORDER] = max(0, stats['file']['aligned'][PMD_ORDER] - kbnr(file_pmd_mapped))
 | |
| 
 | |
|     rstats = {
 | |
|         f"anon-thp-pmd-aligned-{odkb(PMD_ORDER)}kB": {'type': 'anon', 'value': anon_pmd_mapped},
 | |
|         f"file-thp-pmd-aligned-{odkb(PMD_ORDER)}kB": {'type': 'file', 'value': file_pmd_mapped},
 | |
|     }
 | |
| 
 | |
|     def flatten_sub(type, subtype, stats):
 | |
|         param = f"{type}-thp-pte-{subtype}-{{}}kB"
 | |
|         for od, nr in enumerate(stats[2:], 2):
 | |
|             rstats[param.format(odkb(od))] = {'type': type, 'value': nrkb(nr)}
 | |
| 
 | |
|     def flatten_type(type, stats):
 | |
|         flatten_sub(type, 'aligned', stats['aligned'])
 | |
|         flatten_sub(type, 'unaligned', stats['unaligned'])
 | |
|         rstats[f"{type}-thp-pte-partial"] = {'type': type, 'value': nrkb(stats['partial'])}
 | |
| 
 | |
|     flatten_type('anon', stats['anon'])
 | |
|     flatten_type('file', stats['file'])
 | |
| 
 | |
|     return rstats
 | |
| 
 | |
| 
 | |
| def cont_parse(vma, order, ranges, anons, heads):
 | |
|     # Given 4 same-sized arrays representing a range within a page table backed
 | |
|     # by THPs (vfns: virtual frame numbers, pfns: physical frame numbers, anons:
 | |
|     # True if page is anonymous, heads: True if page is head of a THP), return a
 | |
|     # dictionary of statistics describing the contiguous blocks.
 | |
|     nr_cont = 1 << order
 | |
|     nr_anon = 0
 | |
|     nr_file = 0
 | |
| 
 | |
|     for rindex, rvfn, rpfn in zip(*ranges):
 | |
|         index_next = int(rindex[0])
 | |
|         index_end = int(rindex[1]) + 1
 | |
|         vfn_start = int(rvfn[0])
 | |
|         pfn_start = int(rpfn[0])
 | |
| 
 | |
|         if align_offset(pfn_start, nr_cont) != align_offset(vfn_start, nr_cont):
 | |
|             continue
 | |
| 
 | |
|         off = align_forward(vfn_start, nr_cont) - vfn_start
 | |
|         index_next += off
 | |
| 
 | |
|         while index_next + nr_cont <= index_end:
 | |
|             folio_boundary = heads[index_next+1:index_next+nr_cont].any()
 | |
|             if not folio_boundary:
 | |
|                 if anons[index_next]:
 | |
|                     nr_anon += nr_cont
 | |
|                 else:
 | |
|                     nr_file += nr_cont
 | |
|             index_next += nr_cont
 | |
| 
 | |
|     # Account blocks that are PMD-mapped spearately, so filter out of the stats.
 | |
|     # There is a race between acquiring the smaps stats and reading pagemap,
 | |
|     # where memory could be deallocated. So clamp to zero incase it would have
 | |
|     # gone negative.
 | |
|     anon_pmd_mapped = vma.stats['AnonHugePages']['value']
 | |
|     file_pmd_mapped = vma.stats['ShmemPmdMapped']['value'] + \
 | |
|                     vma.stats['FilePmdMapped']['value']
 | |
|     nr_anon = max(0, nr_anon - kbnr(anon_pmd_mapped))
 | |
|     nr_file = max(0, nr_file - kbnr(file_pmd_mapped))
 | |
| 
 | |
|     rstats = {
 | |
|         f"anon-cont-pmd-aligned-{nrkb(nr_cont)}kB": {'type': 'anon', 'value': anon_pmd_mapped},
 | |
|         f"file-cont-pmd-aligned-{nrkb(nr_cont)}kB": {'type': 'file', 'value': file_pmd_mapped},
 | |
|     }
 | |
| 
 | |
|     rstats[f"anon-cont-pte-aligned-{nrkb(nr_cont)}kB"] = {'type': 'anon', 'value': nrkb(nr_anon)}
 | |
|     rstats[f"file-cont-pte-aligned-{nrkb(nr_cont)}kB"] = {'type': 'file', 'value': nrkb(nr_file)}
 | |
| 
 | |
|     return rstats
 | |
| 
 | |
| 
 | |
| def vma_print(vma, pid):
 | |
|     # Prints a VMA instance in a format similar to smaps. The main difference is
 | |
|     # that the pid is included as the first value.
 | |
|     print("{:010d}: {:016x}-{:016x} {}{}{}{} {:08x} {:02x}:{:02x} {:08x} {}"
 | |
|         .format(
 | |
|             pid, vma.start, vma.end,
 | |
|             'r' if vma.read else '-', 'w' if vma.write else '-',
 | |
|             'x' if vma.execute else '-', 'p' if vma.private else 's',
 | |
|             vma.pgoff, vma.major, vma.minor, vma.inode, vma.name
 | |
|         ))
 | |
| 
 | |
| 
 | |
| def stats_print(stats, tot_anon, tot_file, inc_empty):
 | |
|     # Print a statistics dictionary.
 | |
|     label_field = 32
 | |
|     for label, stat in stats.items():
 | |
|         type = stat['type']
 | |
|         value = stat['value']
 | |
|         if value or inc_empty:
 | |
|             pad = max(0, label_field - len(label) - 1)
 | |
|             if type == 'anon' and tot_anon > 0:
 | |
|                 percent = f' ({value / tot_anon:3.0%})'
 | |
|             elif type == 'file' and tot_file > 0:
 | |
|                 percent = f' ({value / tot_file:3.0%})'
 | |
|             else:
 | |
|                 percent = ''
 | |
|             print(f"{label}:{' ' * pad}{value:8} kB{percent}")
 | |
| 
 | |
| 
 | |
| def vma_parse(vma, pagemap, kpageflags, contorders):
 | |
|     # Generate thp and cont statistics for a single VMA.
 | |
|     start = vma.start >> PAGE_SHIFT
 | |
|     end = vma.end >> PAGE_SHIFT
 | |
| 
 | |
|     pmes = pagemap.get(start, end - start)
 | |
|     present = pmes & PM_PAGE_PRESENT != 0
 | |
|     pfns = pmes & PM_PFN_MASK
 | |
|     pfns = pfns[present]
 | |
|     vfns = np.arange(start, end, dtype=np.uint64)
 | |
|     vfns = vfns[present]
 | |
| 
 | |
|     pfn_vec = cont_ranges_all([pfns], [pfns])[0]
 | |
|     flags = kpageflags.getv(pfn_vec)
 | |
|     anons = flags & KPF_ANON != 0
 | |
|     heads = flags & KPF_COMPOUND_HEAD != 0
 | |
|     thps = flags & KPF_THP != 0
 | |
| 
 | |
|     vfns = vfns[thps]
 | |
|     pfns = pfns[thps]
 | |
|     anons = anons[thps]
 | |
|     heads = heads[thps]
 | |
| 
 | |
|     indexes = np.arange(len(vfns), dtype=np.uint64)
 | |
|     ranges = cont_ranges_all([vfns, pfns], [indexes, vfns, pfns])
 | |
| 
 | |
|     thpstats = thp_parse(vma, kpageflags, ranges, indexes, vfns, pfns, anons, heads)
 | |
|     contstats = [cont_parse(vma, order, ranges, anons, heads) for order in contorders]
 | |
| 
 | |
|     tot_anon = vma.stats['Anonymous']['value']
 | |
|     tot_file = vma.stats['Rss']['value'] - tot_anon
 | |
| 
 | |
|     return {
 | |
|         **thpstats,
 | |
|         **{k: v for s in contstats for k, v in s.items()}
 | |
|     }, tot_anon, tot_file
 | |
| 
 | |
| 
 | |
| def do_main(args):
 | |
|     pids = set()
 | |
|     rollup = {}
 | |
|     rollup_anon = 0
 | |
|     rollup_file = 0
 | |
| 
 | |
|     if args.cgroup:
 | |
|         strict = False
 | |
|         for walk_info in os.walk(args.cgroup):
 | |
|             cgroup = walk_info[0]
 | |
|             with open(f'{cgroup}/cgroup.procs') as pidfile:
 | |
|                 for line in pidfile.readlines():
 | |
|                     pids.add(int(line.strip()))
 | |
|     elif args.pid:
 | |
|         strict = True
 | |
|         pids = pids.union(args.pid)
 | |
|     else:
 | |
|         strict = False
 | |
|         for pid in os.listdir('/proc'):
 | |
|             if pid.isdigit():
 | |
|                 pids.add(int(pid))
 | |
| 
 | |
|     if not args.rollup:
 | |
|         print("       PID             START              END PROT   OFFSET   DEV    INODE OBJECT")
 | |
| 
 | |
|     for pid in pids:
 | |
|         try:
 | |
|             with PageMap(pid) as pagemap:
 | |
|                 with KPageFlags() as kpageflags:
 | |
|                     for vma in VMAList(pid, vma_all_stats if args.inc_smaps else vma_min_stats):
 | |
|                         if (vma.read or vma.write or vma.execute) and vma.stats['Rss']['value'] > 0:
 | |
|                             stats, vma_anon, vma_file = vma_parse(vma, pagemap, kpageflags, args.cont)
 | |
|                         else:
 | |
|                             stats = {}
 | |
|                             vma_anon = 0
 | |
|                             vma_file = 0
 | |
|                         if args.inc_smaps:
 | |
|                             stats = {**vma.stats, **stats}
 | |
|                         if args.rollup:
 | |
|                             for k, v in stats.items():
 | |
|                                 if k in rollup:
 | |
|                                     assert(rollup[k]['type'] == v['type'])
 | |
|                                     rollup[k]['value'] += v['value']
 | |
|                                 else:
 | |
|                                     rollup[k] = v
 | |
|                             rollup_anon += vma_anon
 | |
|                             rollup_file += vma_file
 | |
|                         else:
 | |
|                             vma_print(vma, pid)
 | |
|                             stats_print(stats, vma_anon, vma_file, args.inc_empty)
 | |
|         except (FileNotFoundError, ProcessLookupError, FileIOException):
 | |
|             if strict:
 | |
|                 raise
 | |
| 
 | |
|     if args.rollup:
 | |
|         stats_print(rollup, rollup_anon, rollup_file, args.inc_empty)
 | |
| 
 | |
| 
 | |
| def main():
 | |
|     docs_width = shutil.get_terminal_size().columns
 | |
|     docs_width -= 2
 | |
|     docs_width = min(80, docs_width)
 | |
| 
 | |
|     def format(string):
 | |
|         text = re.sub(r'\s+', ' ', string)
 | |
|         text = re.sub(r'\s*\\n\s*', '\n', text)
 | |
|         paras = text.split('\n')
 | |
|         paras = [textwrap.fill(p, width=docs_width) for p in paras]
 | |
|         return '\n'.join(paras)
 | |
| 
 | |
|     def formatter(prog):
 | |
|         return argparse.RawDescriptionHelpFormatter(prog, width=docs_width)
 | |
| 
 | |
|     def size2order(human):
 | |
|         units = {
 | |
|             "K": 2**10, "M": 2**20, "G": 2**30,
 | |
|             "k": 2**10, "m": 2**20, "g": 2**30,
 | |
|         }
 | |
|         unit = 1
 | |
|         if human[-1] in units:
 | |
|             unit = units[human[-1]]
 | |
|             human = human[:-1]
 | |
|         try:
 | |
|             size = int(human)
 | |
|         except ValueError:
 | |
|             raise ArgException('error: --cont value must be integer size with optional KMG unit')
 | |
|         size *= unit
 | |
|         order = int(math.log2(size / PAGE_SIZE))
 | |
|         if order < 1:
 | |
|             raise ArgException('error: --cont value must be size of at least 2 pages')
 | |
|         if (1 << order) * PAGE_SIZE != size:
 | |
|             raise ArgException('error: --cont value must be size of power-of-2 pages')
 | |
|         if order > PMD_ORDER:
 | |
|             raise ArgException('error: --cont value must be less than or equal to PMD order')
 | |
|         return order
 | |
| 
 | |
|     parser = argparse.ArgumentParser(formatter_class=formatter,
 | |
|         description=format("""Prints information about how transparent huge
 | |
|                     pages are mapped, either system-wide, or for a specified
 | |
|                     process or cgroup.\\n
 | |
|                     \\n
 | |
|                     When run with --pid, the user explicitly specifies the set
 | |
|                     of pids to scan. e.g. "--pid 10 [--pid 134 ...]". When run
 | |
|                     with --cgroup, the user passes either a v1 or v2 cgroup and
 | |
|                     all pids that belong to the cgroup subtree are scanned. When
 | |
|                     run with neither --pid nor --cgroup, the full set of pids on
 | |
|                     the system is gathered from /proc and scanned as if the user
 | |
|                     had provided "--pid 1 --pid 2 ...".\\n
 | |
|                     \\n
 | |
|                     A default set of statistics is always generated for THP
 | |
|                     mappings. However, it is also possible to generate
 | |
|                     additional statistics for "contiguous block mappings" where
 | |
|                     the block size is user-defined.\\n
 | |
|                     \\n
 | |
|                     Statistics are maintained independently for anonymous and
 | |
|                     file-backed (pagecache) memory and are shown both in kB and
 | |
|                     as a percentage of either total anonymous or total
 | |
|                     file-backed memory as appropriate.\\n
 | |
|                     \\n
 | |
|                     THP Statistics\\n
 | |
|                     --------------\\n
 | |
|                     \\n
 | |
|                     Statistics are always generated for fully- and
 | |
|                     contiguously-mapped THPs whose mapping address is aligned to
 | |
|                     their size, for each <size> supported by the system.
 | |
|                     Separate counters describe THPs mapped by PTE vs those
 | |
|                     mapped by PMD. (Although note a THP can only be mapped by
 | |
|                     PMD if it is PMD-sized):\\n
 | |
|                     \\n
 | |
|                     - anon-thp-pte-aligned-<size>kB\\n
 | |
|                     - file-thp-pte-aligned-<size>kB\\n
 | |
|                     - anon-thp-pmd-aligned-<size>kB\\n
 | |
|                     - file-thp-pmd-aligned-<size>kB\\n
 | |
|                     \\n
 | |
|                     Similarly, statistics are always generated for fully- and
 | |
|                     contiguously-mapped THPs whose mapping address is *not*
 | |
|                     aligned to their size, for each <size> supported by the
 | |
|                     system. Due to the unaligned mapping, it is impossible to
 | |
|                     map by PMD, so there are only PTE counters for this case:\\n
 | |
|                     \\n
 | |
|                     - anon-thp-pte-unaligned-<size>kB\\n
 | |
|                     - file-thp-pte-unaligned-<size>kB\\n
 | |
|                     \\n
 | |
|                     Statistics are also always generated for mapped pages that
 | |
|                     belong to a THP but where the is THP is *not* fully- and
 | |
|                     contiguously- mapped. These "partial" mappings are all
 | |
|                     counted in the same counter regardless of the size of the
 | |
|                     THP that is partially mapped:\\n
 | |
|                     \\n
 | |
|                     - anon-thp-pte-partial\\n
 | |
|                     - file-thp-pte-partial\\n
 | |
|                     \\n
 | |
|                     Contiguous Block Statistics\\n
 | |
|                     ---------------------------\\n
 | |
|                     \\n
 | |
|                     An optional, additional set of statistics is generated for
 | |
|                     every contiguous block size specified with `--cont <size>`.
 | |
|                     These statistics show how much memory is mapped in
 | |
|                     contiguous blocks of <size> and also aligned to <size>. A
 | |
|                     given contiguous block must all belong to the same THP, but
 | |
|                     there is no requirement for it to be the *whole* THP.
 | |
|                     Separate counters describe contiguous blocks mapped by PTE
 | |
|                     vs those mapped by PMD:\\n
 | |
|                     \\n
 | |
|                     - anon-cont-pte-aligned-<size>kB\\n
 | |
|                     - file-cont-pte-aligned-<size>kB\\n
 | |
|                     - anon-cont-pmd-aligned-<size>kB\\n
 | |
|                     - file-cont-pmd-aligned-<size>kB\\n
 | |
|                     \\n
 | |
|                     As an example, if monitoring 64K contiguous blocks (--cont
 | |
|                     64K), there are a number of sources that could provide such
 | |
|                     blocks: a fully- and contiguously-mapped 64K THP that is
 | |
|                     aligned to a 64K boundary would provide 1 block. A fully-
 | |
|                     and contiguously-mapped 128K THP that is aligned to at least
 | |
|                     a 64K boundary would provide 2 blocks. Or a 128K THP that
 | |
|                     maps its first 100K, but contiguously and starting at a 64K
 | |
|                     boundary would provide 1 block. A fully- and
 | |
|                     contiguously-mapped 2M THP would provide 32 blocks. There
 | |
|                     are many other possible permutations.\\n"""),
 | |
|         epilog=format("""Requires root privilege to access pagemap and
 | |
|                     kpageflags."""))
 | |
| 
 | |
|     group = parser.add_mutually_exclusive_group(required=False)
 | |
|     group.add_argument('--pid',
 | |
|         metavar='pid', required=False, type=int, default=[], action='append',
 | |
|         help="""Process id of the target process. Maybe issued multiple times to
 | |
|             scan multiple processes. --pid and --cgroup are mutually exclusive.
 | |
|             If neither are provided, all processes are scanned to provide
 | |
|             system-wide information.""")
 | |
| 
 | |
|     group.add_argument('--cgroup',
 | |
|         metavar='path', required=False,
 | |
|         help="""Path to the target cgroup in sysfs. Iterates over every pid in
 | |
|             the cgroup and its children. --pid and --cgroup are mutually
 | |
|             exclusive. If neither are provided, all processes are scanned to
 | |
|             provide system-wide information.""")
 | |
| 
 | |
|     parser.add_argument('--rollup',
 | |
|         required=False, default=False, action='store_true',
 | |
|         help="""Sum the per-vma statistics to provide a summary over the whole
 | |
|             system, process or cgroup.""")
 | |
| 
 | |
|     parser.add_argument('--cont',
 | |
|         metavar='size[KMG]', required=False, default=[], action='append',
 | |
|         help="""Adds stats for memory that is mapped in contiguous blocks of
 | |
|             <size> and also aligned to <size>. May be issued multiple times to
 | |
|             track multiple sized blocks. Useful to infer e.g. arm64 contpte and
 | |
|             hpa mappings. Size must be a power-of-2 number of pages.""")
 | |
| 
 | |
|     parser.add_argument('--inc-smaps',
 | |
|         required=False, default=False, action='store_true',
 | |
|         help="""Include all numerical, additive /proc/<pid>/smaps stats in the
 | |
|             output.""")
 | |
| 
 | |
|     parser.add_argument('--inc-empty',
 | |
|         required=False, default=False, action='store_true',
 | |
|         help="""Show all statistics including those whose value is 0.""")
 | |
| 
 | |
|     parser.add_argument('--periodic',
 | |
|         metavar='sleep_ms', required=False, type=int,
 | |
|         help="""Run in a loop, polling every sleep_ms milliseconds.""")
 | |
| 
 | |
|     args = parser.parse_args()
 | |
| 
 | |
|     try:
 | |
|         args.cont = [size2order(cont) for cont in args.cont]
 | |
|     except ArgException as e:
 | |
|         parser.print_usage()
 | |
|         raise
 | |
| 
 | |
|     if args.periodic:
 | |
|         while True:
 | |
|             do_main(args)
 | |
|             print()
 | |
|             time.sleep(args.periodic / 1000)
 | |
|     else:
 | |
|         do_main(args)
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     try:
 | |
|         main()
 | |
|     except Exception as e:
 | |
|         prog = os.path.basename(sys.argv[0])
 | |
|         print(f'{prog}: {e}')
 | |
|         exit(1)
 |