xref: /linux/tools/perf/pmu-events/jevents.py (revision 5afca7e996c42aed1b4a42d4712817601ba42aff)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
3"""Convert directories of JSON events to C code."""
4import argparse
5import csv
6from functools import lru_cache
7import json
8import metric
9import os
10import sys
11from typing import (Callable, Dict, Optional, Sequence, Set, Tuple)
12import collections
13
14# Global command line arguments.
15_args = None
16# List of regular event tables.
17_event_tables = []
18# List of event tables generated from "/sys" directories.
19_sys_event_tables = []
20# List of regular metric tables.
21_metric_tables = []
22# List of metric tables generated from "/sys" directories.
23_sys_metric_tables = []
24# Mapping between sys event table names and sys metric table names.
25_sys_event_table_to_metric_table_mapping = {}
26# Map from an event name to an architecture standard
27# JsonEvent. Architecture standard events are in json files in the top
28# f'{_args.starting_dir}/{_args.arch}' directory.
29_arch_std_events = {}
30# Events to write out when the table is closed
31_pending_events = []
32# Name of events table to be written out
33_pending_events_tblname = None
34# Metrics to write out when the table is closed
35_pending_metrics = []
36# Name of metrics table to be written out
37_pending_metrics_tblname = None
38# Global BigCString shared by all structures.
39_bcs = None
40# Map from the name of a metric group to a description of the group.
41_metricgroups = {}
42# Order specific JsonEvent attributes will be visited.
43_json_event_attributes = [
44    # cmp_sevent related attributes.
45    'name', 'topic', 'desc',
46    # Seems useful, put it early.
47    'event',
48    # Short things in alphabetical order.
49    'compat', 'deprecated', 'perpkg', 'unit',
50    # Longer things (the last won't be iterated over during decompress).
51    'long_desc'
52]
53
54# Attributes that are in pmu_metric rather than pmu_event.
55_json_metric_attributes = [
56    'metric_name', 'metric_group', 'metric_expr', 'metric_threshold',
57    'desc', 'long_desc', 'unit', 'compat', 'metricgroup_no_group',
58    'default_metricgroup_name', 'aggr_mode', 'event_grouping'
59]
60# Attributes that are bools or enum int values, encoded as '0', '1',...
61_json_enum_attributes = ['aggr_mode', 'deprecated', 'event_grouping', 'perpkg']
62
63def removesuffix(s: str, suffix: str) -> str:
64  """Remove the suffix from a string
65
66  The removesuffix function is added to str in Python 3.9. We aim for 3.6
67  compatibility and so provide our own function here.
68  """
69  return s[0:-len(suffix)] if s.endswith(suffix) else s
70
71
72def file_name_to_table_name(prefix: str, parents: Sequence[str],
73                            dirname: str) -> str:
74  """Generate a C table name from directory names."""
75  tblname = prefix
76  for p in parents:
77    tblname += '_' + p
78  tblname += '_' + dirname
79  return tblname.replace('-', '_')
80
81
82def c_len(s: str) -> int:
83  """Return the length of s a C string
84
85  This doesn't handle all escape characters properly. It first assumes
86  all \\ are for escaping, it then adjusts as it will have over counted
87  \\. The code uses \000 rather than \0 as a terminator as an adjacent
88  number would be folded into a string of \0 (ie. "\0" + "5" doesn't
89  equal a terminator followed by the number 5 but the escape of
90  \05). The code adjusts for \000 but not properly for all octal, hex
91  or unicode values.
92  """
93  try:
94    utf = s.encode(encoding='utf-8',errors='strict')
95  except:
96    print(f'broken string {s}')
97    raise
98  return len(utf) - utf.count(b'\\') + utf.count(b'\\\\') - (utf.count(b'\\000') * 2)
99
100class BigCString:
101  """A class to hold many strings concatenated together.
102
103  Generating a large number of stand-alone C strings creates a large
104  number of relocations in position independent code. The BigCString
105  is a helper for this case. It builds a single string which within it
106  are all the other C strings (to avoid memory issues the string
107  itself is held as a list of strings). The offsets within the big
108  string are recorded and when stored to disk these don't need
109  relocation. To reduce the size of the string further, identical
110  strings are merged. If a longer string ends-with the same value as a
111  shorter string, these entries are also merged.
112  """
113  strings: Set[str]
114  big_string: Sequence[str]
115  offsets: Dict[str, int]
116  insert_number: int
117  insert_point: Dict[str, int]
118  metrics: Set[str]
119
120  def __init__(self):
121    self.strings = set()
122    self.insert_number = 0;
123    self.insert_point = {}
124    self.metrics = set()
125
126  def add(self, s: str, metric: bool) -> None:
127    """Called to add to the big string."""
128    if s not in self.strings:
129      self.strings.add(s)
130      self.insert_point[s] = self.insert_number
131      self.insert_number += 1
132      if metric:
133        self.metrics.add(s)
134
135  def compute(self) -> None:
136    """Called once all strings are added to compute the string and offsets."""
137
138    folded_strings = {}
139    # Determine if two strings can be folded, ie. let 1 string use the
140    # end of another. First reverse all strings and sort them.
141    sorted_reversed_strings = sorted([x[::-1] for x in self.strings])
142
143    # Strings 'xyz' and 'yz' will now be [ 'zy', 'zyx' ]. Scan forward
144    # for each string to see if there is a better candidate to fold it
145    # into, in the example rather than using 'yz' we can use'xyz' at
146    # an offset of 1. We record which string can be folded into which
147    # in folded_strings, we don't need to record the offset as it is
148    # trivially computed from the string lengths.
149    for pos,s in enumerate(sorted_reversed_strings):
150      best_pos = pos
151      for check_pos in range(pos + 1, len(sorted_reversed_strings)):
152        if sorted_reversed_strings[check_pos].startswith(s):
153          best_pos = check_pos
154        else:
155          break
156      if pos != best_pos:
157        folded_strings[s[::-1]] = sorted_reversed_strings[best_pos][::-1]
158
159    # Compute reverse mappings for debugging.
160    fold_into_strings = collections.defaultdict(set)
161    for key, val in folded_strings.items():
162      if key != val:
163        fold_into_strings[val].add(key)
164
165    # big_string_offset is the current location within the C string
166    # being appended to - comments, etc. don't count. big_string is
167    # the string contents represented as a list. Strings are immutable
168    # in Python and so appending to one causes memory issues, while
169    # lists are mutable.
170    big_string_offset = 0
171    self.big_string = []
172    self.offsets = {}
173
174    def string_cmp_key(s: str) -> Tuple[bool, int, str]:
175      return (s in self.metrics, self.insert_point[s], s)
176
177    # Emit all strings that aren't folded in a sorted manner.
178    for s in sorted(self.strings, key=string_cmp_key):
179      if s not in folded_strings:
180        self.offsets[s] = big_string_offset
181        self.big_string.append(f'/* offset={big_string_offset} */ "')
182        self.big_string.append(s)
183        self.big_string.append('"')
184        if s in fold_into_strings:
185          self.big_string.append(' /* also: ' + ', '.join(fold_into_strings[s]) + ' */')
186        self.big_string.append('\n')
187        big_string_offset += c_len(s)
188        continue
189
190    # Compute the offsets of the folded strings.
191    for s in folded_strings.keys():
192      assert s not in self.offsets
193      folded_s = folded_strings[s]
194      self.offsets[s] = self.offsets[folded_s] + c_len(folded_s) - c_len(s)
195
196_bcs = BigCString()
197
198class JsonEvent:
199  """Representation of an event loaded from a json file dictionary."""
200
201  def __init__(self, jd: dict):
202    """Constructor passed the dictionary of parsed json values."""
203
204    def llx(x: int) -> str:
205      """Convert an int to a string similar to a printf modifier of %#llx."""
206      return str(x) if x >= 0 and x < 10 else hex(x)
207
208    def fixdesc(s: str) -> str:
209      """Fix formatting issue for the desc string."""
210      if s is None:
211        return None
212      return removesuffix(removesuffix(removesuffix(s, '.  '),
213                                       '. '), '.').replace('\n', '\\n').replace(
214                                           '\"', '\\"').replace('\r', '\\r')
215
216    def convert_aggr_mode(aggr_mode: str) -> Optional[str]:
217      """Returns the aggr_mode_class enum value associated with the JSON string."""
218      if not aggr_mode:
219        return None
220      aggr_mode_to_enum = {
221          'PerChip': '1',
222          'PerCore': '2',
223      }
224      return aggr_mode_to_enum[aggr_mode]
225
226    def convert_metric_constraint(metric_constraint: str) -> Optional[str]:
227      """Returns the metric_event_groups enum value associated with the JSON string."""
228      if not metric_constraint:
229        return None
230      metric_constraint_to_enum = {
231          'NO_GROUP_EVENTS': '1',
232          'NO_GROUP_EVENTS_NMI': '2',
233          'NO_NMI_WATCHDOG': '2',
234          'NO_GROUP_EVENTS_SMT': '3',
235      }
236      return metric_constraint_to_enum[metric_constraint]
237
238    def lookup_msr(num: str) -> Optional[str]:
239      """Converts the msr number, or first in a list to the appropriate event field."""
240      if not num:
241        return None
242      msrmap = {
243          0x3F6: 'ldlat=',
244          0x1A6: 'offcore_rsp=',
245          0x1A7: 'offcore_rsp=',
246          0x3F7: 'frontend=',
247      }
248      return msrmap[int(num.split(',', 1)[0], 0)]
249
250    def real_event(name: str, event: str) -> Optional[str]:
251      """Convert well known event names to an event string otherwise use the event argument."""
252      fixed = {
253          'inst_retired.any': 'event=0xc0,period=2000003',
254          'inst_retired.any_p': 'event=0xc0,period=2000003',
255          'cpu_clk_unhalted.ref': 'event=0x0,umask=0x03,period=2000003',
256          'cpu_clk_unhalted.thread': 'event=0x3c,period=2000003',
257          'cpu_clk_unhalted.core': 'event=0x3c,period=2000003',
258          'cpu_clk_unhalted.thread_any': 'event=0x3c,any=1,period=2000003',
259      }
260      if not name:
261        return None
262      if name.lower() in fixed:
263        return fixed[name.lower()]
264      return event
265
266    def unit_to_pmu(unit: str) -> Optional[str]:
267      """Convert a JSON Unit to Linux PMU name."""
268      if not unit:
269        return 'default_core'
270      # Comment brought over from jevents.c:
271      # it's not realistic to keep adding these, we need something more scalable ...
272      table = {
273          'CBO': 'uncore_cbox',
274          'QPI LL': 'uncore_qpi',
275          'SBO': 'uncore_sbox',
276          'iMPH-U': 'uncore_arb',
277          'CPU-M-CF': 'cpum_cf',
278          'CPU-M-SF': 'cpum_sf',
279          'PAI-CRYPTO' : 'pai_crypto',
280          'PAI-EXT' : 'pai_ext',
281          'UPI LL': 'uncore_upi',
282          'hisi_sicl,cpa': 'hisi_sicl,cpa',
283          'hisi_sccl,ddrc': 'hisi_sccl,ddrc',
284          'hisi_sccl,hha': 'hisi_sccl,hha',
285          'hisi_sccl,l3c': 'hisi_sccl,l3c',
286          'imx8_ddr': 'imx8_ddr',
287          'imx9_ddr': 'imx9_ddr',
288          'L3PMC': 'amd_l3',
289          'DFPMC': 'amd_df',
290          'UMCPMC': 'amd_umc',
291          'cpu_core': 'cpu_core',
292          'cpu_atom': 'cpu_atom',
293          'ali_drw': 'ali_drw',
294          'arm_cmn': 'arm_cmn',
295      }
296      return table[unit] if unit in table else f'uncore_{unit.lower()}'
297
298    def is_zero(val: str) -> bool:
299        try:
300            if val.startswith('0x'):
301                return int(val, 16) == 0
302            else:
303                return int(val) == 0
304        except e:
305            return False
306
307    def canonicalize_value(val: str) -> str:
308        try:
309            if val.startswith('0x'):
310                return llx(int(val, 16))
311            return str(int(val))
312        except e:
313            return val
314
315    eventcode = 0
316    if 'EventCode' in jd:
317      eventcode = int(jd['EventCode'].split(',', 1)[0], 0)
318    if 'ExtSel' in jd:
319      eventcode |= int(jd['ExtSel']) << 8
320    configcode = int(jd['ConfigCode'], 0) if 'ConfigCode' in jd else None
321    eventidcode = int(jd['EventidCode'], 0) if 'EventidCode' in jd else None
322    self.name = jd['EventName'].lower() if 'EventName' in jd else None
323    self.topic = ''
324    self.compat = jd.get('Compat')
325    self.desc = fixdesc(jd.get('BriefDescription'))
326    self.long_desc = fixdesc(jd.get('PublicDescription'))
327    precise = jd.get('PEBS')
328    msr = lookup_msr(jd.get('MSRIndex'))
329    msrval = jd.get('MSRValue')
330    extra_desc = ''
331    if 'Data_LA' in jd:
332      extra_desc += '  Supports address when precise'
333      if 'Errata' in jd:
334        extra_desc += '.'
335    if 'Errata' in jd:
336      extra_desc += '  Spec update: ' + jd['Errata']
337    self.pmu = unit_to_pmu(jd.get('Unit'))
338    filter = jd.get('Filter')
339    self.unit = jd.get('ScaleUnit')
340    self.perpkg = jd.get('PerPkg')
341    self.aggr_mode = convert_aggr_mode(jd.get('AggregationMode'))
342    self.deprecated = jd.get('Deprecated')
343    self.metric_name = jd.get('MetricName')
344    self.metric_group = jd.get('MetricGroup')
345    self.metricgroup_no_group = jd.get('MetricgroupNoGroup')
346    self.default_metricgroup_name = jd.get('DefaultMetricgroupName')
347    self.event_grouping = convert_metric_constraint(jd.get('MetricConstraint'))
348    self.metric_expr = None
349    if 'MetricExpr' in jd:
350      self.metric_expr = metric.ParsePerfJson(jd['MetricExpr']).Simplify()
351    # Note, the metric formula for the threshold isn't parsed as the &
352    # and > have incorrect precedence.
353    self.metric_threshold = jd.get('MetricThreshold')
354
355    arch_std = jd.get('ArchStdEvent')
356    if precise and self.desc and '(Precise Event)' not in self.desc:
357      extra_desc += ' (Must be precise)' if precise == '2' else (' (Precise '
358                                                                 'event)')
359    event = None
360    if configcode is not None:
361      event = f'config={llx(configcode)}'
362    elif eventidcode is not None:
363      event = f'eventid={llx(eventidcode)}'
364    else:
365      event = f'event={llx(eventcode)}'
366    event_fields = [
367        ('AnyThread', 'any='),
368        ('PortMask', 'ch_mask='),
369        ('CounterMask', 'cmask='),
370        ('EdgeDetect', 'edge='),
371        ('FCMask', 'fc_mask='),
372        ('Invert', 'inv='),
373        ('SampleAfterValue', 'period='),
374        ('UMask', 'umask='),
375        ('NodeType', 'type='),
376        ('RdWrMask', 'rdwrmask='),
377        ('EnAllCores', 'enallcores='),
378        ('EnAllSlices', 'enallslices='),
379        ('SliceId', 'sliceid='),
380        ('ThreadMask', 'threadmask='),
381    ]
382    for key, value in event_fields:
383      if key in jd and not is_zero(jd[key]):
384        event += f',{value}{canonicalize_value(jd[key])}'
385    if filter:
386      event += f',{filter}'
387    if msr:
388      event += f',{msr}{msrval}'
389    if self.desc and extra_desc:
390      self.desc += extra_desc
391    if self.long_desc and extra_desc:
392      self.long_desc += extra_desc
393    if arch_std:
394      if arch_std.lower() in _arch_std_events:
395        event = _arch_std_events[arch_std.lower()].event
396        # Copy from the architecture standard event to self for undefined fields.
397        for attr, value in _arch_std_events[arch_std.lower()].__dict__.items():
398          if hasattr(self, attr) and not getattr(self, attr):
399            setattr(self, attr, value)
400      else:
401        raise argparse.ArgumentTypeError('Cannot find arch std event:', arch_std)
402
403    self.event = real_event(self.name, event)
404
405  def __repr__(self) -> str:
406    """String representation primarily for debugging."""
407    s = '{\n'
408    for attr, value in self.__dict__.items():
409      if value:
410        s += f'\t{attr} = {value},\n'
411    return s + '}'
412
413  def build_c_string(self, metric: bool) -> str:
414    s = ''
415    for attr in _json_metric_attributes if metric else _json_event_attributes:
416      x = getattr(self, attr)
417      if metric and x and attr == 'metric_expr':
418        # Convert parsed metric expressions into a string. Slashes
419        # must be doubled in the file.
420        x = x.ToPerfJson().replace('\\', '\\\\')
421      if metric and x and attr == 'metric_threshold':
422        x = x.replace('\\', '\\\\')
423      if attr in _json_enum_attributes:
424        s += x if x else '0'
425      else:
426        s += f'{x}\\000' if x else '\\000'
427    return s
428
429  def to_c_string(self, metric: bool) -> str:
430    """Representation of the event as a C struct initializer."""
431
432    s = self.build_c_string(metric)
433    return f'{{ { _bcs.offsets[s] } }}, /* {s} */\n'
434
435
436@lru_cache(maxsize=None)
437def read_json_events(path: str, topic: str) -> Sequence[JsonEvent]:
438  """Read json events from the specified file."""
439  try:
440    events = json.load(open(path), object_hook=JsonEvent)
441  except BaseException as err:
442    print(f"Exception processing {path}")
443    raise
444  metrics: list[Tuple[str, str, metric.Expression]] = []
445  for event in events:
446    event.topic = topic
447    if event.metric_name and '-' not in event.metric_name:
448      metrics.append((event.pmu, event.metric_name, event.metric_expr))
449  updates = metric.RewriteMetricsInTermsOfOthers(metrics)
450  if updates:
451    for event in events:
452      if event.metric_name in updates:
453        # print(f'Updated {event.metric_name} from\n"{event.metric_expr}"\n'
454        #       f'to\n"{updates[event.metric_name]}"')
455        event.metric_expr = updates[event.metric_name]
456
457  return events
458
459def preprocess_arch_std_files(archpath: str) -> None:
460  """Read in all architecture standard events."""
461  global _arch_std_events
462  for item in os.scandir(archpath):
463    if item.is_file() and item.name.endswith('.json'):
464      for event in read_json_events(item.path, topic=''):
465        if event.name:
466          _arch_std_events[event.name.lower()] = event
467        if event.metric_name:
468          _arch_std_events[event.metric_name.lower()] = event
469
470
471def add_events_table_entries(item: os.DirEntry, topic: str) -> None:
472  """Add contents of file to _pending_events table."""
473  for e in read_json_events(item.path, topic):
474    if e.name:
475      _pending_events.append(e)
476    if e.metric_name:
477      _pending_metrics.append(e)
478
479
480def print_pending_events() -> None:
481  """Optionally close events table."""
482
483  def event_cmp_key(j: JsonEvent) -> Tuple[str, str, bool, str, str]:
484    def fix_none(s: Optional[str]) -> str:
485      if s is None:
486        return ''
487      return s
488
489    return (fix_none(j.pmu).replace(',','_'), fix_none(j.name), j.desc is not None, fix_none(j.topic),
490            fix_none(j.metric_name))
491
492  global _pending_events
493  if not _pending_events:
494    return
495
496  global _pending_events_tblname
497  if _pending_events_tblname.endswith('_sys'):
498    global _sys_event_tables
499    _sys_event_tables.append(_pending_events_tblname)
500  else:
501    global event_tables
502    _event_tables.append(_pending_events_tblname)
503
504  first = True
505  last_pmu = None
506  last_name = None
507  pmus = set()
508  for event in sorted(_pending_events, key=event_cmp_key):
509    if last_pmu and last_pmu == event.pmu:
510      assert event.name != last_name, f"Duplicate event: {last_pmu}/{last_name}/ in {_pending_events_tblname}"
511    if event.pmu != last_pmu:
512      if not first:
513        _args.output_file.write('};\n')
514      pmu_name = event.pmu.replace(',', '_')
515      _args.output_file.write(
516          f'static const struct compact_pmu_event {_pending_events_tblname}_{pmu_name}[] = {{\n')
517      first = False
518      last_pmu = event.pmu
519      pmus.add((event.pmu, pmu_name))
520
521    _args.output_file.write(event.to_c_string(metric=False))
522    last_name = event.name
523  _pending_events = []
524
525  _args.output_file.write(f"""
526}};
527
528const struct pmu_table_entry {_pending_events_tblname}[] = {{
529""")
530  for (pmu, tbl_pmu) in sorted(pmus):
531    pmu_name = f"{pmu}\\000"
532    _args.output_file.write(f"""{{
533     .entries = {_pending_events_tblname}_{tbl_pmu},
534     .num_entries = ARRAY_SIZE({_pending_events_tblname}_{tbl_pmu}),
535     .pmu_name = {{ {_bcs.offsets[pmu_name]} /* {pmu_name} */ }},
536}},
537""")
538  _args.output_file.write('};\n\n')
539
540def print_pending_metrics() -> None:
541  """Optionally close metrics table."""
542
543  def metric_cmp_key(j: JsonEvent) -> Tuple[bool, str, str]:
544    def fix_none(s: Optional[str]) -> str:
545      if s is None:
546        return ''
547      return s
548
549    return (j.desc is not None, fix_none(j.pmu), fix_none(j.metric_name))
550
551  global _pending_metrics
552  if not _pending_metrics:
553    return
554
555  global _pending_metrics_tblname
556  if _pending_metrics_tblname.endswith('_sys'):
557    global _sys_metric_tables
558    _sys_metric_tables.append(_pending_metrics_tblname)
559  else:
560    global metric_tables
561    _metric_tables.append(_pending_metrics_tblname)
562
563  first = True
564  last_pmu = None
565  pmus = set()
566  for metric in sorted(_pending_metrics, key=metric_cmp_key):
567    if metric.pmu != last_pmu:
568      if not first:
569        _args.output_file.write('};\n')
570      pmu_name = metric.pmu.replace(',', '_')
571      _args.output_file.write(
572          f'static const struct compact_pmu_event {_pending_metrics_tblname}_{pmu_name}[] = {{\n')
573      first = False
574      last_pmu = metric.pmu
575      pmus.add((metric.pmu, pmu_name))
576
577    _args.output_file.write(metric.to_c_string(metric=True))
578  _pending_metrics = []
579
580  _args.output_file.write(f"""
581}};
582
583const struct pmu_table_entry {_pending_metrics_tblname}[] = {{
584""")
585  for (pmu, tbl_pmu) in sorted(pmus):
586    pmu_name = f"{pmu}\\000"
587    _args.output_file.write(f"""{{
588     .entries = {_pending_metrics_tblname}_{tbl_pmu},
589     .num_entries = ARRAY_SIZE({_pending_metrics_tblname}_{tbl_pmu}),
590     .pmu_name = {{ {_bcs.offsets[pmu_name]} /* {pmu_name} */ }},
591}},
592""")
593  _args.output_file.write('};\n\n')
594
595def get_topic(topic: str) -> str:
596  if topic.endswith('metrics.json'):
597    return 'metrics'
598  return removesuffix(topic, '.json').replace('-', ' ')
599
600def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
601
602  if item.is_dir():
603    return
604
605  # base dir or too deep
606  level = len(parents)
607  if level == 0 or level > 4:
608    return
609
610  # Ignore other directories. If the file name does not have a .json
611  # extension, ignore it. It could be a readme.txt for instance.
612  if not item.is_file() or not item.name.endswith('.json'):
613    return
614
615  if item.name == 'metricgroups.json':
616    metricgroup_descriptions = json.load(open(item.path))
617    for mgroup in metricgroup_descriptions:
618      assert len(mgroup) > 1, parents
619      description = f"{metricgroup_descriptions[mgroup]}\\000"
620      mgroup = f"{mgroup}\\000"
621      _bcs.add(mgroup, metric=True)
622      _bcs.add(description, metric=True)
623      _metricgroups[mgroup] = description
624    return
625
626  topic = get_topic(item.name)
627  for event in read_json_events(item.path, topic):
628    pmu_name = f"{event.pmu}\\000"
629    if event.name:
630      _bcs.add(pmu_name, metric=False)
631      _bcs.add(event.build_c_string(metric=False), metric=False)
632    if event.metric_name:
633      _bcs.add(pmu_name, metric=True)
634      _bcs.add(event.build_c_string(metric=True), metric=True)
635
636def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
637  """Process a JSON file during the main walk."""
638  def is_leaf_dir_ignoring_sys(path: str) -> bool:
639    for item in os.scandir(path):
640      if item.is_dir() and item.name != 'sys':
641        return False
642    return True
643
644  # Model directories are leaves (ignoring possible sys
645  # directories). The FTW will walk into the directory next. Flush
646  # pending events and metrics and update the table names for the new
647  # model directory.
648  if item.is_dir() and is_leaf_dir_ignoring_sys(item.path):
649    print_pending_events()
650    print_pending_metrics()
651
652    global _pending_events_tblname
653    _pending_events_tblname = file_name_to_table_name('pmu_events_', parents, item.name)
654    global _pending_metrics_tblname
655    _pending_metrics_tblname = file_name_to_table_name('pmu_metrics_', parents, item.name)
656
657    if item.name == 'sys':
658      _sys_event_table_to_metric_table_mapping[_pending_events_tblname] = _pending_metrics_tblname
659    return
660
661  # base dir or too deep
662  level = len(parents)
663  if level == 0 or level > 4:
664    return
665
666  # Ignore other directories. If the file name does not have a .json
667  # extension, ignore it. It could be a readme.txt for instance.
668  if not item.is_file() or not item.name.endswith('.json') or item.name == 'metricgroups.json':
669    return
670
671  add_events_table_entries(item, get_topic(item.name))
672
673
674def print_mapping_table(archs: Sequence[str]) -> None:
675  """Read the mapfile and generate the struct from cpuid string to event table."""
676  _args.output_file.write("""
677/* Struct used to make the PMU event table implementation opaque to callers. */
678struct pmu_events_table {
679        const struct pmu_table_entry *pmus;
680        uint32_t num_pmus;
681};
682
683/* Struct used to make the PMU metric table implementation opaque to callers. */
684struct pmu_metrics_table {
685        const struct pmu_table_entry *pmus;
686        uint32_t num_pmus;
687};
688
689/*
690 * Map a CPU to its table of PMU events. The CPU is identified by the
691 * cpuid field, which is an arch-specific identifier for the CPU.
692 * The identifier specified in tools/perf/pmu-events/arch/xxx/mapfile
693 * must match the get_cpuid_str() in tools/perf/arch/xxx/util/header.c)
694 *
695 * The  cpuid can contain any character other than the comma.
696 */
697struct pmu_events_map {
698        const char *arch;
699        const char *cpuid;
700        struct pmu_events_table event_table;
701        struct pmu_metrics_table metric_table;
702};
703
704/*
705 * Global table mapping each known CPU for the architecture to its
706 * table of PMU events.
707 */
708const struct pmu_events_map pmu_events_map[] = {
709""")
710  for arch in archs:
711    if arch == 'test':
712      _args.output_file.write("""{
713\t.arch = "testarch",
714\t.cpuid = "testcpu",
715\t.event_table = {
716\t\t.pmus = pmu_events__test_soc_cpu,
717\t\t.num_pmus = ARRAY_SIZE(pmu_events__test_soc_cpu),
718\t},
719\t.metric_table = {
720\t\t.pmus = pmu_metrics__test_soc_cpu,
721\t\t.num_pmus = ARRAY_SIZE(pmu_metrics__test_soc_cpu),
722\t}
723},
724""")
725    else:
726      with open(f'{_args.starting_dir}/{arch}/mapfile.csv') as csvfile:
727        table = csv.reader(csvfile)
728        first = True
729        for row in table:
730          # Skip the first row or any row beginning with #.
731          if not first and len(row) > 0 and not row[0].startswith('#'):
732            event_tblname = file_name_to_table_name('pmu_events_', [], row[2].replace('/', '_'))
733            if event_tblname in _event_tables:
734              event_size = f'ARRAY_SIZE({event_tblname})'
735            else:
736              event_tblname = 'NULL'
737              event_size = '0'
738            metric_tblname = file_name_to_table_name('pmu_metrics_', [], row[2].replace('/', '_'))
739            if metric_tblname in _metric_tables:
740              metric_size = f'ARRAY_SIZE({metric_tblname})'
741            else:
742              metric_tblname = 'NULL'
743              metric_size = '0'
744            if event_size == '0' and metric_size == '0':
745              continue
746            cpuid = row[0].replace('\\', '\\\\')
747            _args.output_file.write(f"""{{
748\t.arch = "{arch}",
749\t.cpuid = "{cpuid}",
750\t.event_table = {{
751\t\t.pmus = {event_tblname},
752\t\t.num_pmus = {event_size}
753\t}},
754\t.metric_table = {{
755\t\t.pmus = {metric_tblname},
756\t\t.num_pmus = {metric_size}
757\t}}
758}},
759""")
760          first = False
761
762  _args.output_file.write("""{
763\t.arch = 0,
764\t.cpuid = 0,
765\t.event_table = { 0, 0 },
766\t.metric_table = { 0, 0 },
767}
768};
769""")
770
771
772def print_system_mapping_table() -> None:
773  """C struct mapping table array for tables from /sys directories."""
774  _args.output_file.write("""
775struct pmu_sys_events {
776\tconst char *name;
777\tstruct pmu_events_table event_table;
778\tstruct pmu_metrics_table metric_table;
779};
780
781static const struct pmu_sys_events pmu_sys_event_tables[] = {
782""")
783  printed_metric_tables = []
784  for tblname in _sys_event_tables:
785    _args.output_file.write(f"""\t{{
786\t\t.event_table = {{
787\t\t\t.pmus = {tblname},
788\t\t\t.num_pmus = ARRAY_SIZE({tblname})
789\t\t}},""")
790    metric_tblname = _sys_event_table_to_metric_table_mapping[tblname]
791    if metric_tblname in _sys_metric_tables:
792      _args.output_file.write(f"""
793\t\t.metric_table = {{
794\t\t\t.pmus = {metric_tblname},
795\t\t\t.num_pmus = ARRAY_SIZE({metric_tblname})
796\t\t}},""")
797      printed_metric_tables.append(metric_tblname)
798    _args.output_file.write(f"""
799\t\t.name = \"{tblname}\",
800\t}},
801""")
802  for tblname in _sys_metric_tables:
803    if tblname in printed_metric_tables:
804      continue
805    _args.output_file.write(f"""\t{{
806\t\t.metric_table = {{
807\t\t\t.pmus = {tblname},
808\t\t\t.num_pmus = ARRAY_SIZE({tblname})
809\t\t}},
810\t\t.name = \"{tblname}\",
811\t}},
812""")
813  _args.output_file.write("""\t{
814\t\t.event_table = { 0, 0 },
815\t\t.metric_table = { 0, 0 },
816\t},
817};
818
819static void decompress_event(int offset, struct pmu_event *pe)
820{
821\tconst char *p = &big_c_string[offset];
822""")
823  for attr in _json_event_attributes:
824    _args.output_file.write(f'\n\tpe->{attr} = ')
825    if attr in _json_enum_attributes:
826      _args.output_file.write("*p - '0';\n")
827    else:
828      _args.output_file.write("(*p == '\\0' ? NULL : p);\n")
829    if attr == _json_event_attributes[-1]:
830      continue
831    if attr in _json_enum_attributes:
832      _args.output_file.write('\tp++;')
833    else:
834      _args.output_file.write('\twhile (*p++);')
835  _args.output_file.write("""}
836
837static void decompress_metric(int offset, struct pmu_metric *pm)
838{
839\tconst char *p = &big_c_string[offset];
840""")
841  for attr in _json_metric_attributes:
842    _args.output_file.write(f'\n\tpm->{attr} = ')
843    if attr in _json_enum_attributes:
844      _args.output_file.write("*p - '0';\n")
845    else:
846      _args.output_file.write("(*p == '\\0' ? NULL : p);\n")
847    if attr == _json_metric_attributes[-1]:
848      continue
849    if attr in _json_enum_attributes:
850      _args.output_file.write('\tp++;')
851    else:
852      _args.output_file.write('\twhile (*p++);')
853  _args.output_file.write("""}
854
855static int pmu_events_table__for_each_event_pmu(const struct pmu_events_table *table,
856                                                const struct pmu_table_entry *pmu,
857                                                pmu_event_iter_fn fn,
858                                                void *data)
859{
860        int ret;
861        struct pmu_event pe = {
862                .pmu = &big_c_string[pmu->pmu_name.offset],
863        };
864
865        for (uint32_t i = 0; i < pmu->num_entries; i++) {
866                decompress_event(pmu->entries[i].offset, &pe);
867                if (!pe.name)
868                        continue;
869                ret = fn(&pe, table, data);
870                if (ret)
871                        return ret;
872        }
873        return 0;
874 }
875
876static int pmu_events_table__find_event_pmu(const struct pmu_events_table *table,
877                                            const struct pmu_table_entry *pmu,
878                                            const char *name,
879                                            pmu_event_iter_fn fn,
880                                            void *data)
881{
882        struct pmu_event pe = {
883                .pmu = &big_c_string[pmu->pmu_name.offset],
884        };
885        int low = 0, high = pmu->num_entries - 1;
886
887        while (low <= high) {
888                int cmp, mid = (low + high) / 2;
889
890                decompress_event(pmu->entries[mid].offset, &pe);
891
892                if (!pe.name && !name)
893                        goto do_call;
894
895                if (!pe.name && name) {
896                        low = mid + 1;
897                        continue;
898                }
899                if (pe.name && !name) {
900                        high = mid - 1;
901                        continue;
902                }
903
904                cmp = strcasecmp(pe.name, name);
905                if (cmp < 0) {
906                        low = mid + 1;
907                        continue;
908                }
909                if (cmp > 0) {
910                        high = mid - 1;
911                        continue;
912                }
913  do_call:
914                return fn ? fn(&pe, table, data) : 0;
915        }
916        return PMU_EVENTS__NOT_FOUND;
917}
918
919int pmu_events_table__for_each_event(const struct pmu_events_table *table,
920                                    struct perf_pmu *pmu,
921                                    pmu_event_iter_fn fn,
922                                    void *data)
923{
924        for (size_t i = 0; i < table->num_pmus; i++) {
925                const struct pmu_table_entry *table_pmu = &table->pmus[i];
926                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
927                int ret;
928
929                if (pmu && !pmu__name_match(pmu, pmu_name))
930                        continue;
931
932                ret = pmu_events_table__for_each_event_pmu(table, table_pmu, fn, data);
933                if (pmu || ret)
934                        return ret;
935        }
936        return 0;
937}
938
939int pmu_events_table__find_event(const struct pmu_events_table *table,
940                                 struct perf_pmu *pmu,
941                                 const char *name,
942                                 pmu_event_iter_fn fn,
943                                 void *data)
944{
945        for (size_t i = 0; i < table->num_pmus; i++) {
946                const struct pmu_table_entry *table_pmu = &table->pmus[i];
947                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
948                int ret;
949
950                if (!pmu__name_match(pmu, pmu_name))
951                        continue;
952
953                ret = pmu_events_table__find_event_pmu(table, table_pmu, name, fn, data);
954                if (ret != PMU_EVENTS__NOT_FOUND)
955                        return ret;
956        }
957        return PMU_EVENTS__NOT_FOUND;
958}
959
960size_t pmu_events_table__num_events(const struct pmu_events_table *table,
961                                    struct perf_pmu *pmu)
962{
963        size_t count = 0;
964
965        for (size_t i = 0; i < table->num_pmus; i++) {
966                const struct pmu_table_entry *table_pmu = &table->pmus[i];
967                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
968
969                if (pmu__name_match(pmu, pmu_name))
970                        count += table_pmu->num_entries;
971        }
972        return count;
973}
974
975static int pmu_metrics_table__for_each_metric_pmu(const struct pmu_metrics_table *table,
976                                                const struct pmu_table_entry *pmu,
977                                                pmu_metric_iter_fn fn,
978                                                void *data)
979{
980        int ret;
981        struct pmu_metric pm = {
982                .pmu = &big_c_string[pmu->pmu_name.offset],
983        };
984
985        for (uint32_t i = 0; i < pmu->num_entries; i++) {
986                decompress_metric(pmu->entries[i].offset, &pm);
987                if (!pm.metric_expr)
988                        continue;
989                ret = fn(&pm, table, data);
990                if (ret)
991                        return ret;
992        }
993        return 0;
994}
995
996int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
997                                     pmu_metric_iter_fn fn,
998                                     void *data)
999{
1000        for (size_t i = 0; i < table->num_pmus; i++) {
1001                int ret = pmu_metrics_table__for_each_metric_pmu(table, &table->pmus[i],
1002                                                                 fn, data);
1003
1004                if (ret)
1005                        return ret;
1006        }
1007        return 0;
1008}
1009
1010static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu)
1011{
1012        static struct {
1013                const struct pmu_events_map *map;
1014                struct perf_pmu *pmu;
1015        } last_result;
1016        static struct {
1017                const struct pmu_events_map *map;
1018                char *cpuid;
1019        } last_map_search;
1020        static bool has_last_result, has_last_map_search;
1021        const struct pmu_events_map *map = NULL;
1022        char *cpuid = NULL;
1023        size_t i;
1024
1025        if (has_last_result && last_result.pmu == pmu)
1026                return last_result.map;
1027
1028        cpuid = perf_pmu__getcpuid(pmu);
1029
1030        /*
1031         * On some platforms which uses cpus map, cpuid can be NULL for
1032         * PMUs other than CORE PMUs.
1033         */
1034        if (!cpuid)
1035                goto out_update_last_result;
1036
1037        if (has_last_map_search && !strcmp(last_map_search.cpuid, cpuid)) {
1038                map = last_map_search.map;
1039                free(cpuid);
1040        } else {
1041                i = 0;
1042                for (;;) {
1043                        map = &pmu_events_map[i++];
1044
1045                        if (!map->arch) {
1046                                map = NULL;
1047                                break;
1048                        }
1049
1050                        if (!strcmp_cpuid_str(map->cpuid, cpuid))
1051                                break;
1052               }
1053               free(last_map_search.cpuid);
1054               last_map_search.cpuid = cpuid;
1055               last_map_search.map = map;
1056               has_last_map_search = true;
1057        }
1058out_update_last_result:
1059        last_result.pmu = pmu;
1060        last_result.map = map;
1061        has_last_result = true;
1062        return map;
1063}
1064
1065const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
1066{
1067        const struct pmu_events_map *map = map_for_pmu(pmu);
1068
1069        if (!map)
1070                return NULL;
1071
1072        if (!pmu)
1073                return &map->event_table;
1074
1075        for (size_t i = 0; i < map->event_table.num_pmus; i++) {
1076                const struct pmu_table_entry *table_pmu = &map->event_table.pmus[i];
1077                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
1078
1079                if (pmu__name_match(pmu, pmu_name))
1080                         return &map->event_table;
1081        }
1082        return NULL;
1083}
1084
1085const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu)
1086{
1087        const struct pmu_events_map *map = map_for_pmu(pmu);
1088
1089        if (!map)
1090                return NULL;
1091
1092        if (!pmu)
1093                return &map->metric_table;
1094
1095        for (size_t i = 0; i < map->metric_table.num_pmus; i++) {
1096                const struct pmu_table_entry *table_pmu = &map->metric_table.pmus[i];
1097                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
1098
1099                if (pmu__name_match(pmu, pmu_name))
1100                           return &map->metric_table;
1101        }
1102        return NULL;
1103}
1104
1105const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid)
1106{
1107        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1108             tables->arch;
1109             tables++) {
1110                if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
1111                        return &tables->event_table;
1112        }
1113        return NULL;
1114}
1115
1116const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const char *cpuid)
1117{
1118        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1119             tables->arch;
1120             tables++) {
1121                if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
1122                        return &tables->metric_table;
1123        }
1124        return NULL;
1125}
1126
1127int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data)
1128{
1129        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1130             tables->arch;
1131             tables++) {
1132                int ret = pmu_events_table__for_each_event(&tables->event_table,
1133                                                           /*pmu=*/ NULL, fn, data);
1134
1135                if (ret)
1136                        return ret;
1137        }
1138        return 0;
1139}
1140
1141int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data)
1142{
1143        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1144             tables->arch;
1145             tables++) {
1146                int ret = pmu_metrics_table__for_each_metric(&tables->metric_table, fn, data);
1147
1148                if (ret)
1149                        return ret;
1150        }
1151        return 0;
1152}
1153
1154const struct pmu_events_table *find_sys_events_table(const char *name)
1155{
1156        for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
1157             tables->name;
1158             tables++) {
1159                if (!strcmp(tables->name, name))
1160                        return &tables->event_table;
1161        }
1162        return NULL;
1163}
1164
1165int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data)
1166{
1167        for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
1168             tables->name;
1169             tables++) {
1170                int ret = pmu_events_table__for_each_event(&tables->event_table,
1171                                                           /*pmu=*/ NULL, fn, data);
1172
1173                if (ret)
1174                        return ret;
1175        }
1176        return 0;
1177}
1178
1179int pmu_for_each_sys_metric(pmu_metric_iter_fn fn, void *data)
1180{
1181        for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
1182             tables->name;
1183             tables++) {
1184                int ret = pmu_metrics_table__for_each_metric(&tables->metric_table, fn, data);
1185
1186                if (ret)
1187                        return ret;
1188        }
1189        return 0;
1190}
1191""")
1192
1193def print_metricgroups() -> None:
1194  _args.output_file.write("""
1195static const int metricgroups[][2] = {
1196""")
1197  for mgroup in sorted(_metricgroups):
1198    description = _metricgroups[mgroup]
1199    _args.output_file.write(
1200        f'\t{{ {_bcs.offsets[mgroup]}, {_bcs.offsets[description]} }}, /* {mgroup} => {description} */\n'
1201    )
1202  _args.output_file.write("""
1203};
1204
1205const char *describe_metricgroup(const char *group)
1206{
1207        int low = 0, high = (int)ARRAY_SIZE(metricgroups) - 1;
1208
1209        while (low <= high) {
1210                int mid = (low + high) / 2;
1211                const char *mgroup = &big_c_string[metricgroups[mid][0]];
1212                int cmp = strcmp(mgroup, group);
1213
1214                if (cmp == 0) {
1215                        return &big_c_string[metricgroups[mid][1]];
1216                } else if (cmp < 0) {
1217                        low = mid + 1;
1218                } else {
1219                        high = mid - 1;
1220                }
1221        }
1222        return NULL;
1223}
1224""")
1225
1226def main() -> None:
1227  global _args
1228
1229  def dir_path(path: str) -> str:
1230    """Validate path is a directory for argparse."""
1231    if os.path.isdir(path):
1232      return path
1233    raise argparse.ArgumentTypeError(f'\'{path}\' is not a valid directory')
1234
1235  def ftw(path: str, parents: Sequence[str],
1236          action: Callable[[Sequence[str], os.DirEntry], None]) -> None:
1237    """Replicate the directory/file walking behavior of C's file tree walk."""
1238    for item in sorted(os.scandir(path), key=lambda e: e.name):
1239      if _args.model != 'all' and item.is_dir():
1240        # Check if the model matches one in _args.model.
1241        if len(parents) == _args.model.split(',')[0].count('/'):
1242          # We're testing the correct directory.
1243          item_path = '/'.join(parents) + ('/' if len(parents) > 0 else '') + item.name
1244          if 'test' not in item_path and item_path not in _args.model.split(','):
1245            continue
1246      action(parents, item)
1247      if item.is_dir():
1248        ftw(item.path, parents + [item.name], action)
1249
1250  ap = argparse.ArgumentParser()
1251  ap.add_argument('arch', help='Architecture name like x86')
1252  ap.add_argument('model', help='''Select a model such as skylake to
1253reduce the code size.  Normally set to "all". For architectures like
1254ARM64 with an implementor/model, the model must include the implementor
1255such as "arm/cortex-a34".''',
1256                  default='all')
1257  ap.add_argument(
1258      'starting_dir',
1259      type=dir_path,
1260      help='Root of tree containing architecture directories containing json files'
1261  )
1262  ap.add_argument(
1263      'output_file', type=argparse.FileType('w', encoding='utf-8'), nargs='?', default=sys.stdout)
1264  _args = ap.parse_args()
1265
1266  _args.output_file.write(f"""
1267/* SPDX-License-Identifier: GPL-2.0 */
1268/* THIS FILE WAS AUTOGENERATED BY jevents.py arch={_args.arch} model={_args.model} ! */
1269""")
1270  _args.output_file.write("""
1271#include <pmu-events/pmu-events.h>
1272#include "util/header.h"
1273#include "util/pmu.h"
1274#include <string.h>
1275#include <stddef.h>
1276
1277struct compact_pmu_event {
1278        int offset;
1279};
1280
1281struct pmu_table_entry {
1282        const struct compact_pmu_event *entries;
1283        uint32_t num_entries;
1284        struct compact_pmu_event pmu_name;
1285};
1286
1287""")
1288  archs = []
1289  for item in os.scandir(_args.starting_dir):
1290    if not item.is_dir():
1291      continue
1292    if item.name == _args.arch or _args.arch == 'all' or item.name == 'test':
1293      archs.append(item.name)
1294
1295  if len(archs) < 2 and _args.arch != 'none':
1296    raise IOError(f'Missing architecture directory \'{_args.arch}\'')
1297
1298  archs.sort()
1299  for arch in archs:
1300    arch_path = f'{_args.starting_dir}/{arch}'
1301    preprocess_arch_std_files(arch_path)
1302    ftw(arch_path, [], preprocess_one_file)
1303
1304  _bcs.compute()
1305  _args.output_file.write('static const char *const big_c_string =\n')
1306  for s in _bcs.big_string:
1307    _args.output_file.write(s)
1308  _args.output_file.write(';\n\n')
1309  for arch in archs:
1310    arch_path = f'{_args.starting_dir}/{arch}'
1311    ftw(arch_path, [], process_one_file)
1312    print_pending_events()
1313    print_pending_metrics()
1314
1315  print_mapping_table(archs)
1316  print_system_mapping_table()
1317  print_metricgroups()
1318
1319if __name__ == '__main__':
1320  main()
1321