xref: /linux/tools/perf/pmu-events/jevents.py (revision e3b2949e3fa2fd8c19cd5fbb0424d38f70a70e9c)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
3"""Convert directories of JSON events to C code."""
4import argparse
5import csv
6from functools import lru_cache
7import json
8import metric
9import os
10import sys
11from typing import (Callable, Dict, Optional, Sequence, Set, Tuple)
12import collections
13
14# Global command line arguments.
15_args = None
16# List of regular event tables.
17_event_tables = []
18# List of event tables generated from "/sys" directories.
19_sys_event_tables = []
20# List of regular metric tables.
21_metric_tables = []
22# List of metric tables generated from "/sys" directories.
23_sys_metric_tables = []
24# Mapping between sys event table names and sys metric table names.
25_sys_event_table_to_metric_table_mapping = {}
26# Map from an event name to an architecture standard
27# JsonEvent. Architecture standard events are in json files in the top
28# f'{_args.starting_dir}/{_args.arch}' directory.
29_arch_std_events = {}
30# Events to write out when the table is closed
31_pending_events = []
32# Name of events table to be written out
33_pending_events_tblname = None
34# Metrics to write out when the table is closed
35_pending_metrics = []
36# Name of metrics table to be written out
37_pending_metrics_tblname = None
38# Global BigCString shared by all structures.
39_bcs = None
40# Map from the name of a metric group to a description of the group.
41_metricgroups = {}
42# Order specific JsonEvent attributes will be visited.
43_json_event_attributes = [
44    # cmp_sevent related attributes.
45    'name', 'topic', 'desc',
46    # Seems useful, put it early.
47    'event',
48    # Short things in alphabetical order.
49    'compat', 'deprecated', 'perpkg', 'unit',
50    # Longer things (the last won't be iterated over during decompress).
51    'long_desc'
52]
53
54# Attributes that are in pmu_metric rather than pmu_event.
55_json_metric_attributes = [
56    'metric_name', 'metric_group', 'metric_expr', 'metric_threshold',
57    'desc', 'long_desc', 'unit', 'compat', 'metricgroup_no_group',
58    'default_metricgroup_name', 'aggr_mode', 'event_grouping'
59]
60# Attributes that are bools or enum int values, encoded as '0', '1',...
61_json_enum_attributes = ['aggr_mode', 'deprecated', 'event_grouping', 'perpkg']
62
63def removesuffix(s: str, suffix: str) -> str:
64  """Remove the suffix from a string
65
66  The removesuffix function is added to str in Python 3.9. We aim for 3.6
67  compatibility and so provide our own function here.
68  """
69  return s[0:-len(suffix)] if s.endswith(suffix) else s
70
71
72def file_name_to_table_name(prefix: str, parents: Sequence[str],
73                            dirname: str) -> str:
74  """Generate a C table name from directory names."""
75  tblname = prefix
76  for p in parents:
77    tblname += '_' + p
78  tblname += '_' + dirname
79  return tblname.replace('-', '_')
80
81
82def c_len(s: str) -> int:
83  """Return the length of s a C string
84
85  This doesn't handle all escape characters properly. It first assumes
86  all \\ are for escaping, it then adjusts as it will have over counted
87  \\. The code uses \000 rather than \0 as a terminator as an adjacent
88  number would be folded into a string of \0 (ie. "\0" + "5" doesn't
89  equal a terminator followed by the number 5 but the escape of
90  \05). The code adjusts for \000 but not properly for all octal, hex
91  or unicode values.
92  """
93  try:
94    utf = s.encode(encoding='utf-8',errors='strict')
95  except:
96    print(f'broken string {s}')
97    raise
98  return len(utf) - utf.count(b'\\') + utf.count(b'\\\\') - (utf.count(b'\\000') * 2)
99
100class BigCString:
101  """A class to hold many strings concatenated together.
102
103  Generating a large number of stand-alone C strings creates a large
104  number of relocations in position independent code. The BigCString
105  is a helper for this case. It builds a single string which within it
106  are all the other C strings (to avoid memory issues the string
107  itself is held as a list of strings). The offsets within the big
108  string are recorded and when stored to disk these don't need
109  relocation. To reduce the size of the string further, identical
110  strings are merged. If a longer string ends-with the same value as a
111  shorter string, these entries are also merged.
112  """
113  strings: Set[str]
114  big_string: Sequence[str]
115  offsets: Dict[str, int]
116  insert_number: int
117  insert_point: Dict[str, int]
118  metrics: Set[str]
119
120  def __init__(self):
121    self.strings = set()
122    self.insert_number = 0;
123    self.insert_point = {}
124    self.metrics = set()
125
126  def add(self, s: str, metric: bool) -> None:
127    """Called to add to the big string."""
128    if s not in self.strings:
129      self.strings.add(s)
130      self.insert_point[s] = self.insert_number
131      self.insert_number += 1
132      if metric:
133        self.metrics.add(s)
134
135  def compute(self) -> None:
136    """Called once all strings are added to compute the string and offsets."""
137
138    folded_strings = {}
139    # Determine if two strings can be folded, ie. let 1 string use the
140    # end of another. First reverse all strings and sort them.
141    sorted_reversed_strings = sorted([x[::-1] for x in self.strings])
142
143    # Strings 'xyz' and 'yz' will now be [ 'zy', 'zyx' ]. Scan forward
144    # for each string to see if there is a better candidate to fold it
145    # into, in the example rather than using 'yz' we can use'xyz' at
146    # an offset of 1. We record which string can be folded into which
147    # in folded_strings, we don't need to record the offset as it is
148    # trivially computed from the string lengths.
149    for pos,s in enumerate(sorted_reversed_strings):
150      best_pos = pos
151      for check_pos in range(pos + 1, len(sorted_reversed_strings)):
152        if sorted_reversed_strings[check_pos].startswith(s):
153          best_pos = check_pos
154        else:
155          break
156      if pos != best_pos:
157        folded_strings[s[::-1]] = sorted_reversed_strings[best_pos][::-1]
158
159    # Compute reverse mappings for debugging.
160    fold_into_strings = collections.defaultdict(set)
161    for key, val in folded_strings.items():
162      if key != val:
163        fold_into_strings[val].add(key)
164
165    # big_string_offset is the current location within the C string
166    # being appended to - comments, etc. don't count. big_string is
167    # the string contents represented as a list. Strings are immutable
168    # in Python and so appending to one causes memory issues, while
169    # lists are mutable.
170    big_string_offset = 0
171    self.big_string = []
172    self.offsets = {}
173
174    def string_cmp_key(s: str) -> Tuple[bool, int, str]:
175      return (s in self.metrics, self.insert_point[s], s)
176
177    # Emit all strings that aren't folded in a sorted manner.
178    for s in sorted(self.strings, key=string_cmp_key):
179      if s not in folded_strings:
180        self.offsets[s] = big_string_offset
181        self.big_string.append(f'/* offset={big_string_offset} */ "')
182        self.big_string.append(s)
183        self.big_string.append('"')
184        if s in fold_into_strings:
185          self.big_string.append(' /* also: ' + ', '.join(fold_into_strings[s]) + ' */')
186        self.big_string.append('\n')
187        big_string_offset += c_len(s)
188        continue
189
190    # Compute the offsets of the folded strings.
191    for s in folded_strings.keys():
192      assert s not in self.offsets
193      folded_s = folded_strings[s]
194      self.offsets[s] = self.offsets[folded_s] + c_len(folded_s) - c_len(s)
195
196_bcs = BigCString()
197
198class JsonEvent:
199  """Representation of an event loaded from a json file dictionary."""
200
201  def __init__(self, jd: dict):
202    """Constructor passed the dictionary of parsed json values."""
203
204    def llx(x: int) -> str:
205      """Convert an int to a string similar to a printf modifier of %#llx."""
206      return str(x) if x >= 0 and x < 10 else hex(x)
207
208    def fixdesc(s: str) -> str:
209      """Fix formatting issue for the desc string."""
210      if s is None:
211        return None
212      return removesuffix(removesuffix(removesuffix(s, '.  '),
213                                       '. '), '.').replace('\n', '\\n').replace(
214                                           '\"', '\\"').replace('\r', '\\r')
215
216    def convert_aggr_mode(aggr_mode: str) -> Optional[str]:
217      """Returns the aggr_mode_class enum value associated with the JSON string."""
218      if not aggr_mode:
219        return None
220      aggr_mode_to_enum = {
221          'PerChip': '1',
222          'PerCore': '2',
223      }
224      return aggr_mode_to_enum[aggr_mode]
225
226    def convert_metric_constraint(metric_constraint: str) -> Optional[str]:
227      """Returns the metric_event_groups enum value associated with the JSON string."""
228      if not metric_constraint:
229        return None
230      metric_constraint_to_enum = {
231          'NO_GROUP_EVENTS': '1',
232          'NO_GROUP_EVENTS_NMI': '2',
233          'NO_NMI_WATCHDOG': '2',
234          'NO_GROUP_EVENTS_SMT': '3',
235      }
236      return metric_constraint_to_enum[metric_constraint]
237
238    def lookup_msr(num: str) -> Optional[str]:
239      """Converts the msr number, or first in a list to the appropriate event field."""
240      if not num:
241        return None
242      msrmap = {
243          0x3F6: 'ldlat=',
244          0x1A6: 'offcore_rsp=',
245          0x1A7: 'offcore_rsp=',
246          0x3F7: 'frontend=',
247      }
248      return msrmap[int(num.split(',', 1)[0], 0)]
249
250    def real_event(name: str, event: str) -> Optional[str]:
251      """Convert well known event names to an event string otherwise use the event argument."""
252      fixed = {
253          'inst_retired.any': 'event=0xc0,period=2000003',
254          'inst_retired.any_p': 'event=0xc0,period=2000003',
255          'cpu_clk_unhalted.ref': 'event=0x0,umask=0x03,period=2000003',
256          'cpu_clk_unhalted.thread': 'event=0x3c,period=2000003',
257          'cpu_clk_unhalted.core': 'event=0x3c,period=2000003',
258          'cpu_clk_unhalted.thread_any': 'event=0x3c,any=1,period=2000003',
259      }
260      if not name:
261        return None
262      if name.lower() in fixed:
263        return fixed[name.lower()]
264      return event
265
266    def unit_to_pmu(unit: str) -> Optional[str]:
267      """Convert a JSON Unit to Linux PMU name."""
268      if not unit:
269        return 'default_core'
270      # Comment brought over from jevents.c:
271      # it's not realistic to keep adding these, we need something more scalable ...
272      table = {
273          'CBO': 'uncore_cbox',
274          'QPI LL': 'uncore_qpi',
275          'SBO': 'uncore_sbox',
276          'iMPH-U': 'uncore_arb',
277          'CPU-M-CF': 'cpum_cf',
278          'CPU-M-SF': 'cpum_sf',
279          'PAI-CRYPTO' : 'pai_crypto',
280          'PAI-EXT' : 'pai_ext',
281          'UPI LL': 'uncore_upi',
282          'hisi_sicl,cpa': 'hisi_sicl,cpa',
283          'hisi_sccl,ddrc': 'hisi_sccl,ddrc',
284          'hisi_sccl,hha': 'hisi_sccl,hha',
285          'hisi_sccl,l3c': 'hisi_sccl,l3c',
286          'imx8_ddr': 'imx8_ddr',
287          'imx9_ddr': 'imx9_ddr',
288          'L3PMC': 'amd_l3',
289          'DFPMC': 'amd_df',
290          'UMCPMC': 'amd_umc',
291          'cpu_core': 'cpu_core',
292          'cpu_atom': 'cpu_atom',
293          'ali_drw': 'ali_drw',
294          'arm_cmn': 'arm_cmn',
295          'tool': 'tool',
296      }
297      return table[unit] if unit in table else f'uncore_{unit.lower()}'
298
299    def is_zero(val: str) -> bool:
300        try:
301            if val.startswith('0x'):
302                return int(val, 16) == 0
303            else:
304                return int(val) == 0
305        except e:
306            return False
307
308    def canonicalize_value(val: str) -> str:
309        try:
310            if val.startswith('0x'):
311                return llx(int(val, 16))
312            return str(int(val))
313        except e:
314            return val
315
316    eventcode = 0
317    if 'EventCode' in jd:
318      eventcode = int(jd['EventCode'].split(',', 1)[0], 0)
319    if 'ExtSel' in jd:
320      eventcode |= int(jd['ExtSel']) << 8
321    configcode = int(jd['ConfigCode'], 0) if 'ConfigCode' in jd else None
322    eventidcode = int(jd['EventidCode'], 0) if 'EventidCode' in jd else None
323    self.name = jd['EventName'].lower() if 'EventName' in jd else None
324    self.topic = ''
325    self.compat = jd.get('Compat')
326    self.desc = fixdesc(jd.get('BriefDescription'))
327    self.long_desc = fixdesc(jd.get('PublicDescription'))
328    precise = jd.get('PEBS')
329    msr = lookup_msr(jd.get('MSRIndex'))
330    msrval = jd.get('MSRValue')
331    extra_desc = ''
332    if 'Data_LA' in jd:
333      extra_desc += '  Supports address when precise'
334      if 'Errata' in jd:
335        extra_desc += '.'
336    if 'Errata' in jd:
337      extra_desc += '  Spec update: ' + jd['Errata']
338    self.pmu = unit_to_pmu(jd.get('Unit'))
339    filter = jd.get('Filter')
340    self.unit = jd.get('ScaleUnit')
341    self.perpkg = jd.get('PerPkg')
342    self.aggr_mode = convert_aggr_mode(jd.get('AggregationMode'))
343    self.deprecated = jd.get('Deprecated')
344    self.metric_name = jd.get('MetricName')
345    self.metric_group = jd.get('MetricGroup')
346    self.metricgroup_no_group = jd.get('MetricgroupNoGroup')
347    self.default_metricgroup_name = jd.get('DefaultMetricgroupName')
348    self.event_grouping = convert_metric_constraint(jd.get('MetricConstraint'))
349    self.metric_expr = None
350    if 'MetricExpr' in jd:
351      self.metric_expr = metric.ParsePerfJson(jd['MetricExpr']).Simplify()
352    # Note, the metric formula for the threshold isn't parsed as the &
353    # and > have incorrect precedence.
354    self.metric_threshold = jd.get('MetricThreshold')
355
356    arch_std = jd.get('ArchStdEvent')
357    if precise and self.desc and '(Precise Event)' not in self.desc:
358      extra_desc += ' (Must be precise)' if precise == '2' else (' (Precise '
359                                                                 'event)')
360    event = None
361    if configcode is not None:
362      event = f'config={llx(configcode)}'
363    elif eventidcode is not None:
364      event = f'eventid={llx(eventidcode)}'
365    else:
366      event = f'event={llx(eventcode)}'
367    event_fields = [
368        ('AnyThread', 'any='),
369        ('PortMask', 'ch_mask='),
370        ('CounterMask', 'cmask='),
371        ('EdgeDetect', 'edge='),
372        ('FCMask', 'fc_mask='),
373        ('Invert', 'inv='),
374        ('SampleAfterValue', 'period='),
375        ('UMask', 'umask='),
376        ('NodeType', 'type='),
377        ('RdWrMask', 'rdwrmask='),
378        ('EnAllCores', 'enallcores='),
379        ('EnAllSlices', 'enallslices='),
380        ('SliceId', 'sliceid='),
381        ('ThreadMask', 'threadmask='),
382    ]
383    for key, value in event_fields:
384      if key in jd and not is_zero(jd[key]):
385        event += f',{value}{canonicalize_value(jd[key])}'
386    if filter:
387      event += f',{filter}'
388    if msr:
389      event += f',{msr}{msrval}'
390    if self.desc and extra_desc:
391      self.desc += extra_desc
392    if self.long_desc and extra_desc:
393      self.long_desc += extra_desc
394    if arch_std:
395      if arch_std.lower() in _arch_std_events:
396        event = _arch_std_events[arch_std.lower()].event
397        # Copy from the architecture standard event to self for undefined fields.
398        for attr, value in _arch_std_events[arch_std.lower()].__dict__.items():
399          if hasattr(self, attr) and not getattr(self, attr):
400            setattr(self, attr, value)
401      else:
402        raise argparse.ArgumentTypeError('Cannot find arch std event:', arch_std)
403
404    self.event = real_event(self.name, event)
405
406  def __repr__(self) -> str:
407    """String representation primarily for debugging."""
408    s = '{\n'
409    for attr, value in self.__dict__.items():
410      if value:
411        s += f'\t{attr} = {value},\n'
412    return s + '}'
413
414  def build_c_string(self, metric: bool) -> str:
415    s = ''
416    for attr in _json_metric_attributes if metric else _json_event_attributes:
417      x = getattr(self, attr)
418      if metric and x and attr == 'metric_expr':
419        # Convert parsed metric expressions into a string. Slashes
420        # must be doubled in the file.
421        x = x.ToPerfJson().replace('\\', '\\\\')
422      if metric and x and attr == 'metric_threshold':
423        x = x.replace('\\', '\\\\')
424      if attr in _json_enum_attributes:
425        s += x if x else '0'
426      else:
427        s += f'{x}\\000' if x else '\\000'
428    return s
429
430  def to_c_string(self, metric: bool) -> str:
431    """Representation of the event as a C struct initializer."""
432
433    s = self.build_c_string(metric)
434    return f'{{ { _bcs.offsets[s] } }}, /* {s} */\n'
435
436
437@lru_cache(maxsize=None)
438def read_json_events(path: str, topic: str) -> Sequence[JsonEvent]:
439  """Read json events from the specified file."""
440  try:
441    events = json.load(open(path), object_hook=JsonEvent)
442  except BaseException as err:
443    print(f"Exception processing {path}")
444    raise
445  metrics: list[Tuple[str, str, metric.Expression]] = []
446  for event in events:
447    event.topic = topic
448    if event.metric_name and '-' not in event.metric_name:
449      metrics.append((event.pmu, event.metric_name, event.metric_expr))
450  updates = metric.RewriteMetricsInTermsOfOthers(metrics)
451  if updates:
452    for event in events:
453      if event.metric_name in updates:
454        # print(f'Updated {event.metric_name} from\n"{event.metric_expr}"\n'
455        #       f'to\n"{updates[event.metric_name]}"')
456        event.metric_expr = updates[event.metric_name]
457
458  return events
459
460def preprocess_arch_std_files(archpath: str) -> None:
461  """Read in all architecture standard events."""
462  global _arch_std_events
463  for item in os.scandir(archpath):
464    if item.is_file() and item.name.endswith('.json'):
465      for event in read_json_events(item.path, topic=''):
466        if event.name:
467          _arch_std_events[event.name.lower()] = event
468        if event.metric_name:
469          _arch_std_events[event.metric_name.lower()] = event
470
471
472def add_events_table_entries(item: os.DirEntry, topic: str) -> None:
473  """Add contents of file to _pending_events table."""
474  for e in read_json_events(item.path, topic):
475    if e.name:
476      _pending_events.append(e)
477    if e.metric_name:
478      _pending_metrics.append(e)
479
480
481def print_pending_events() -> None:
482  """Optionally close events table."""
483
484  def event_cmp_key(j: JsonEvent) -> Tuple[str, str, bool, str, str]:
485    def fix_none(s: Optional[str]) -> str:
486      if s is None:
487        return ''
488      return s
489
490    return (fix_none(j.pmu).replace(',','_'), fix_none(j.name), j.desc is not None, fix_none(j.topic),
491            fix_none(j.metric_name))
492
493  global _pending_events
494  if not _pending_events:
495    return
496
497  global _pending_events_tblname
498  if _pending_events_tblname.endswith('_sys'):
499    global _sys_event_tables
500    _sys_event_tables.append(_pending_events_tblname)
501  else:
502    global event_tables
503    _event_tables.append(_pending_events_tblname)
504
505  first = True
506  last_pmu = None
507  last_name = None
508  pmus = set()
509  for event in sorted(_pending_events, key=event_cmp_key):
510    if last_pmu and last_pmu == event.pmu:
511      assert event.name != last_name, f"Duplicate event: {last_pmu}/{last_name}/ in {_pending_events_tblname}"
512    if event.pmu != last_pmu:
513      if not first:
514        _args.output_file.write('};\n')
515      pmu_name = event.pmu.replace(',', '_')
516      _args.output_file.write(
517          f'static const struct compact_pmu_event {_pending_events_tblname}_{pmu_name}[] = {{\n')
518      first = False
519      last_pmu = event.pmu
520      pmus.add((event.pmu, pmu_name))
521
522    _args.output_file.write(event.to_c_string(metric=False))
523    last_name = event.name
524  _pending_events = []
525
526  _args.output_file.write(f"""
527}};
528
529const struct pmu_table_entry {_pending_events_tblname}[] = {{
530""")
531  for (pmu, tbl_pmu) in sorted(pmus):
532    pmu_name = f"{pmu}\\000"
533    _args.output_file.write(f"""{{
534     .entries = {_pending_events_tblname}_{tbl_pmu},
535     .num_entries = ARRAY_SIZE({_pending_events_tblname}_{tbl_pmu}),
536     .pmu_name = {{ {_bcs.offsets[pmu_name]} /* {pmu_name} */ }},
537}},
538""")
539  _args.output_file.write('};\n\n')
540
541def print_pending_metrics() -> None:
542  """Optionally close metrics table."""
543
544  def metric_cmp_key(j: JsonEvent) -> Tuple[bool, str, str]:
545    def fix_none(s: Optional[str]) -> str:
546      if s is None:
547        return ''
548      return s
549
550    return (j.desc is not None, fix_none(j.pmu), fix_none(j.metric_name))
551
552  global _pending_metrics
553  if not _pending_metrics:
554    return
555
556  global _pending_metrics_tblname
557  if _pending_metrics_tblname.endswith('_sys'):
558    global _sys_metric_tables
559    _sys_metric_tables.append(_pending_metrics_tblname)
560  else:
561    global metric_tables
562    _metric_tables.append(_pending_metrics_tblname)
563
564  first = True
565  last_pmu = None
566  pmus = set()
567  for metric in sorted(_pending_metrics, key=metric_cmp_key):
568    if metric.pmu != last_pmu:
569      if not first:
570        _args.output_file.write('};\n')
571      pmu_name = metric.pmu.replace(',', '_')
572      _args.output_file.write(
573          f'static const struct compact_pmu_event {_pending_metrics_tblname}_{pmu_name}[] = {{\n')
574      first = False
575      last_pmu = metric.pmu
576      pmus.add((metric.pmu, pmu_name))
577
578    _args.output_file.write(metric.to_c_string(metric=True))
579  _pending_metrics = []
580
581  _args.output_file.write(f"""
582}};
583
584const struct pmu_table_entry {_pending_metrics_tblname}[] = {{
585""")
586  for (pmu, tbl_pmu) in sorted(pmus):
587    pmu_name = f"{pmu}\\000"
588    _args.output_file.write(f"""{{
589     .entries = {_pending_metrics_tblname}_{tbl_pmu},
590     .num_entries = ARRAY_SIZE({_pending_metrics_tblname}_{tbl_pmu}),
591     .pmu_name = {{ {_bcs.offsets[pmu_name]} /* {pmu_name} */ }},
592}},
593""")
594  _args.output_file.write('};\n\n')
595
596def get_topic(topic: str) -> str:
597  if topic.endswith('metrics.json'):
598    return 'metrics'
599  return removesuffix(topic, '.json').replace('-', ' ')
600
601def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
602
603  if item.is_dir():
604    return
605
606  # base dir or too deep
607  level = len(parents)
608  if level == 0 or level > 4:
609    return
610
611  # Ignore other directories. If the file name does not have a .json
612  # extension, ignore it. It could be a readme.txt for instance.
613  if not item.is_file() or not item.name.endswith('.json'):
614    return
615
616  if item.name == 'metricgroups.json':
617    metricgroup_descriptions = json.load(open(item.path))
618    for mgroup in metricgroup_descriptions:
619      assert len(mgroup) > 1, parents
620      description = f"{metricgroup_descriptions[mgroup]}\\000"
621      mgroup = f"{mgroup}\\000"
622      _bcs.add(mgroup, metric=True)
623      _bcs.add(description, metric=True)
624      _metricgroups[mgroup] = description
625    return
626
627  topic = get_topic(item.name)
628  for event in read_json_events(item.path, topic):
629    pmu_name = f"{event.pmu}\\000"
630    if event.name:
631      _bcs.add(pmu_name, metric=False)
632      _bcs.add(event.build_c_string(metric=False), metric=False)
633    if event.metric_name:
634      _bcs.add(pmu_name, metric=True)
635      _bcs.add(event.build_c_string(metric=True), metric=True)
636
637def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
638  """Process a JSON file during the main walk."""
639  def is_leaf_dir_ignoring_sys(path: str) -> bool:
640    for item in os.scandir(path):
641      if item.is_dir() and item.name != 'sys':
642        return False
643    return True
644
645  # Model directories are leaves (ignoring possible sys
646  # directories). The FTW will walk into the directory next. Flush
647  # pending events and metrics and update the table names for the new
648  # model directory.
649  if item.is_dir() and is_leaf_dir_ignoring_sys(item.path):
650    print_pending_events()
651    print_pending_metrics()
652
653    global _pending_events_tblname
654    _pending_events_tblname = file_name_to_table_name('pmu_events_', parents, item.name)
655    global _pending_metrics_tblname
656    _pending_metrics_tblname = file_name_to_table_name('pmu_metrics_', parents, item.name)
657
658    if item.name == 'sys':
659      _sys_event_table_to_metric_table_mapping[_pending_events_tblname] = _pending_metrics_tblname
660    return
661
662  # base dir or too deep
663  level = len(parents)
664  if level == 0 or level > 4:
665    return
666
667  # Ignore other directories. If the file name does not have a .json
668  # extension, ignore it. It could be a readme.txt for instance.
669  if not item.is_file() or not item.name.endswith('.json') or item.name == 'metricgroups.json':
670    return
671
672  add_events_table_entries(item, get_topic(item.name))
673
674
675def print_mapping_table(archs: Sequence[str]) -> None:
676  """Read the mapfile and generate the struct from cpuid string to event table."""
677  _args.output_file.write("""
678/* Struct used to make the PMU event table implementation opaque to callers. */
679struct pmu_events_table {
680        const struct pmu_table_entry *pmus;
681        uint32_t num_pmus;
682};
683
684/* Struct used to make the PMU metric table implementation opaque to callers. */
685struct pmu_metrics_table {
686        const struct pmu_table_entry *pmus;
687        uint32_t num_pmus;
688};
689
690/*
691 * Map a CPU to its table of PMU events. The CPU is identified by the
692 * cpuid field, which is an arch-specific identifier for the CPU.
693 * The identifier specified in tools/perf/pmu-events/arch/xxx/mapfile
694 * must match the get_cpuid_str() in tools/perf/arch/xxx/util/header.c)
695 *
696 * The  cpuid can contain any character other than the comma.
697 */
698struct pmu_events_map {
699        const char *arch;
700        const char *cpuid;
701        struct pmu_events_table event_table;
702        struct pmu_metrics_table metric_table;
703};
704
705/*
706 * Global table mapping each known CPU for the architecture to its
707 * table of PMU events.
708 */
709const struct pmu_events_map pmu_events_map[] = {
710""")
711  for arch in archs:
712    if arch == 'test':
713      _args.output_file.write("""{
714\t.arch = "testarch",
715\t.cpuid = "testcpu",
716\t.event_table = {
717\t\t.pmus = pmu_events__test_soc_cpu,
718\t\t.num_pmus = ARRAY_SIZE(pmu_events__test_soc_cpu),
719\t},
720\t.metric_table = {
721\t\t.pmus = pmu_metrics__test_soc_cpu,
722\t\t.num_pmus = ARRAY_SIZE(pmu_metrics__test_soc_cpu),
723\t}
724},
725""")
726    elif arch == 'common':
727      _args.output_file.write("""{
728\t.arch = "common",
729\t.cpuid = "common",
730\t.event_table = {
731\t\t.pmus = pmu_events__common,
732\t\t.num_pmus = ARRAY_SIZE(pmu_events__common),
733\t},
734\t.metric_table = {},
735},
736""")
737    else:
738      with open(f'{_args.starting_dir}/{arch}/mapfile.csv') as csvfile:
739        table = csv.reader(csvfile)
740        first = True
741        for row in table:
742          # Skip the first row or any row beginning with #.
743          if not first and len(row) > 0 and not row[0].startswith('#'):
744            event_tblname = file_name_to_table_name('pmu_events_', [], row[2].replace('/', '_'))
745            if event_tblname in _event_tables:
746              event_size = f'ARRAY_SIZE({event_tblname})'
747            else:
748              event_tblname = 'NULL'
749              event_size = '0'
750            metric_tblname = file_name_to_table_name('pmu_metrics_', [], row[2].replace('/', '_'))
751            if metric_tblname in _metric_tables:
752              metric_size = f'ARRAY_SIZE({metric_tblname})'
753            else:
754              metric_tblname = 'NULL'
755              metric_size = '0'
756            if event_size == '0' and metric_size == '0':
757              continue
758            cpuid = row[0].replace('\\', '\\\\')
759            _args.output_file.write(f"""{{
760\t.arch = "{arch}",
761\t.cpuid = "{cpuid}",
762\t.event_table = {{
763\t\t.pmus = {event_tblname},
764\t\t.num_pmus = {event_size}
765\t}},
766\t.metric_table = {{
767\t\t.pmus = {metric_tblname},
768\t\t.num_pmus = {metric_size}
769\t}}
770}},
771""")
772          first = False
773
774  _args.output_file.write("""{
775\t.arch = 0,
776\t.cpuid = 0,
777\t.event_table = { 0, 0 },
778\t.metric_table = { 0, 0 },
779}
780};
781""")
782
783
784def print_system_mapping_table() -> None:
785  """C struct mapping table array for tables from /sys directories."""
786  _args.output_file.write("""
787struct pmu_sys_events {
788\tconst char *name;
789\tstruct pmu_events_table event_table;
790\tstruct pmu_metrics_table metric_table;
791};
792
793static const struct pmu_sys_events pmu_sys_event_tables[] = {
794""")
795  printed_metric_tables = []
796  for tblname in _sys_event_tables:
797    _args.output_file.write(f"""\t{{
798\t\t.event_table = {{
799\t\t\t.pmus = {tblname},
800\t\t\t.num_pmus = ARRAY_SIZE({tblname})
801\t\t}},""")
802    metric_tblname = _sys_event_table_to_metric_table_mapping[tblname]
803    if metric_tblname in _sys_metric_tables:
804      _args.output_file.write(f"""
805\t\t.metric_table = {{
806\t\t\t.pmus = {metric_tblname},
807\t\t\t.num_pmus = ARRAY_SIZE({metric_tblname})
808\t\t}},""")
809      printed_metric_tables.append(metric_tblname)
810    _args.output_file.write(f"""
811\t\t.name = \"{tblname}\",
812\t}},
813""")
814  for tblname in _sys_metric_tables:
815    if tblname in printed_metric_tables:
816      continue
817    _args.output_file.write(f"""\t{{
818\t\t.metric_table = {{
819\t\t\t.pmus = {tblname},
820\t\t\t.num_pmus = ARRAY_SIZE({tblname})
821\t\t}},
822\t\t.name = \"{tblname}\",
823\t}},
824""")
825  _args.output_file.write("""\t{
826\t\t.event_table = { 0, 0 },
827\t\t.metric_table = { 0, 0 },
828\t},
829};
830
831static void decompress_event(int offset, struct pmu_event *pe)
832{
833\tconst char *p = &big_c_string[offset];
834""")
835  for attr in _json_event_attributes:
836    _args.output_file.write(f'\n\tpe->{attr} = ')
837    if attr in _json_enum_attributes:
838      _args.output_file.write("*p - '0';\n")
839    else:
840      _args.output_file.write("(*p == '\\0' ? NULL : p);\n")
841    if attr == _json_event_attributes[-1]:
842      continue
843    if attr in _json_enum_attributes:
844      _args.output_file.write('\tp++;')
845    else:
846      _args.output_file.write('\twhile (*p++);')
847  _args.output_file.write("""}
848
849static void decompress_metric(int offset, struct pmu_metric *pm)
850{
851\tconst char *p = &big_c_string[offset];
852""")
853  for attr in _json_metric_attributes:
854    _args.output_file.write(f'\n\tpm->{attr} = ')
855    if attr in _json_enum_attributes:
856      _args.output_file.write("*p - '0';\n")
857    else:
858      _args.output_file.write("(*p == '\\0' ? NULL : p);\n")
859    if attr == _json_metric_attributes[-1]:
860      continue
861    if attr in _json_enum_attributes:
862      _args.output_file.write('\tp++;')
863    else:
864      _args.output_file.write('\twhile (*p++);')
865  _args.output_file.write("""}
866
867static int pmu_events_table__for_each_event_pmu(const struct pmu_events_table *table,
868                                                const struct pmu_table_entry *pmu,
869                                                pmu_event_iter_fn fn,
870                                                void *data)
871{
872        int ret;
873        struct pmu_event pe = {
874                .pmu = &big_c_string[pmu->pmu_name.offset],
875        };
876
877        for (uint32_t i = 0; i < pmu->num_entries; i++) {
878                decompress_event(pmu->entries[i].offset, &pe);
879                if (!pe.name)
880                        continue;
881                ret = fn(&pe, table, data);
882                if (ret)
883                        return ret;
884        }
885        return 0;
886 }
887
888static int pmu_events_table__find_event_pmu(const struct pmu_events_table *table,
889                                            const struct pmu_table_entry *pmu,
890                                            const char *name,
891                                            pmu_event_iter_fn fn,
892                                            void *data)
893{
894        struct pmu_event pe = {
895                .pmu = &big_c_string[pmu->pmu_name.offset],
896        };
897        int low = 0, high = pmu->num_entries - 1;
898
899        while (low <= high) {
900                int cmp, mid = (low + high) / 2;
901
902                decompress_event(pmu->entries[mid].offset, &pe);
903
904                if (!pe.name && !name)
905                        goto do_call;
906
907                if (!pe.name && name) {
908                        low = mid + 1;
909                        continue;
910                }
911                if (pe.name && !name) {
912                        high = mid - 1;
913                        continue;
914                }
915
916                cmp = strcasecmp(pe.name, name);
917                if (cmp < 0) {
918                        low = mid + 1;
919                        continue;
920                }
921                if (cmp > 0) {
922                        high = mid - 1;
923                        continue;
924                }
925  do_call:
926                return fn ? fn(&pe, table, data) : 0;
927        }
928        return PMU_EVENTS__NOT_FOUND;
929}
930
931int pmu_events_table__for_each_event(const struct pmu_events_table *table,
932                                    struct perf_pmu *pmu,
933                                    pmu_event_iter_fn fn,
934                                    void *data)
935{
936        for (size_t i = 0; i < table->num_pmus; i++) {
937                const struct pmu_table_entry *table_pmu = &table->pmus[i];
938                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
939                int ret;
940
941                if (pmu && !pmu__name_match(pmu, pmu_name))
942                        continue;
943
944                ret = pmu_events_table__for_each_event_pmu(table, table_pmu, fn, data);
945                if (ret)
946                        return ret;
947        }
948        return 0;
949}
950
951int pmu_events_table__find_event(const struct pmu_events_table *table,
952                                 struct perf_pmu *pmu,
953                                 const char *name,
954                                 pmu_event_iter_fn fn,
955                                 void *data)
956{
957        for (size_t i = 0; i < table->num_pmus; i++) {
958                const struct pmu_table_entry *table_pmu = &table->pmus[i];
959                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
960                int ret;
961
962                if (!pmu__name_match(pmu, pmu_name))
963                        continue;
964
965                ret = pmu_events_table__find_event_pmu(table, table_pmu, name, fn, data);
966                if (ret != PMU_EVENTS__NOT_FOUND)
967                        return ret;
968        }
969        return PMU_EVENTS__NOT_FOUND;
970}
971
972size_t pmu_events_table__num_events(const struct pmu_events_table *table,
973                                    struct perf_pmu *pmu)
974{
975        size_t count = 0;
976
977        for (size_t i = 0; i < table->num_pmus; i++) {
978                const struct pmu_table_entry *table_pmu = &table->pmus[i];
979                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
980
981                if (pmu__name_match(pmu, pmu_name))
982                        count += table_pmu->num_entries;
983        }
984        return count;
985}
986
987static int pmu_metrics_table__for_each_metric_pmu(const struct pmu_metrics_table *table,
988                                                const struct pmu_table_entry *pmu,
989                                                pmu_metric_iter_fn fn,
990                                                void *data)
991{
992        int ret;
993        struct pmu_metric pm = {
994                .pmu = &big_c_string[pmu->pmu_name.offset],
995        };
996
997        for (uint32_t i = 0; i < pmu->num_entries; i++) {
998                decompress_metric(pmu->entries[i].offset, &pm);
999                if (!pm.metric_expr)
1000                        continue;
1001                ret = fn(&pm, table, data);
1002                if (ret)
1003                        return ret;
1004        }
1005        return 0;
1006}
1007
1008int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
1009                                     pmu_metric_iter_fn fn,
1010                                     void *data)
1011{
1012        for (size_t i = 0; i < table->num_pmus; i++) {
1013                int ret = pmu_metrics_table__for_each_metric_pmu(table, &table->pmus[i],
1014                                                                 fn, data);
1015
1016                if (ret)
1017                        return ret;
1018        }
1019        return 0;
1020}
1021
1022static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu)
1023{
1024        static struct {
1025                const struct pmu_events_map *map;
1026                struct perf_pmu *pmu;
1027        } last_result;
1028        static struct {
1029                const struct pmu_events_map *map;
1030                char *cpuid;
1031        } last_map_search;
1032        static bool has_last_result, has_last_map_search;
1033        const struct pmu_events_map *map = NULL;
1034        char *cpuid = NULL;
1035        size_t i;
1036
1037        if (has_last_result && last_result.pmu == pmu)
1038                return last_result.map;
1039
1040        cpuid = perf_pmu__getcpuid(pmu);
1041
1042        /*
1043         * On some platforms which uses cpus map, cpuid can be NULL for
1044         * PMUs other than CORE PMUs.
1045         */
1046        if (!cpuid)
1047                goto out_update_last_result;
1048
1049        if (has_last_map_search && !strcmp(last_map_search.cpuid, cpuid)) {
1050                map = last_map_search.map;
1051                free(cpuid);
1052        } else {
1053                i = 0;
1054                for (;;) {
1055                        map = &pmu_events_map[i++];
1056
1057                        if (!map->arch) {
1058                                map = NULL;
1059                                break;
1060                        }
1061
1062                        if (!strcmp_cpuid_str(map->cpuid, cpuid))
1063                                break;
1064               }
1065               free(last_map_search.cpuid);
1066               last_map_search.cpuid = cpuid;
1067               last_map_search.map = map;
1068               has_last_map_search = true;
1069        }
1070out_update_last_result:
1071        last_result.pmu = pmu;
1072        last_result.map = map;
1073        has_last_result = true;
1074        return map;
1075}
1076
1077const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
1078{
1079        const struct pmu_events_map *map = map_for_pmu(pmu);
1080
1081        if (!map)
1082                return NULL;
1083
1084        if (!pmu)
1085                return &map->event_table;
1086
1087        for (size_t i = 0; i < map->event_table.num_pmus; i++) {
1088                const struct pmu_table_entry *table_pmu = &map->event_table.pmus[i];
1089                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
1090
1091                if (pmu__name_match(pmu, pmu_name))
1092                         return &map->event_table;
1093        }
1094        return NULL;
1095}
1096
1097const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu)
1098{
1099        const struct pmu_events_map *map = map_for_pmu(pmu);
1100
1101        if (!map)
1102                return NULL;
1103
1104        if (!pmu)
1105                return &map->metric_table;
1106
1107        for (size_t i = 0; i < map->metric_table.num_pmus; i++) {
1108                const struct pmu_table_entry *table_pmu = &map->metric_table.pmus[i];
1109                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
1110
1111                if (pmu__name_match(pmu, pmu_name))
1112                           return &map->metric_table;
1113        }
1114        return NULL;
1115}
1116
1117const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid)
1118{
1119        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1120             tables->arch;
1121             tables++) {
1122                if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
1123                        return &tables->event_table;
1124        }
1125        return NULL;
1126}
1127
1128const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const char *cpuid)
1129{
1130        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1131             tables->arch;
1132             tables++) {
1133                if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
1134                        return &tables->metric_table;
1135        }
1136        return NULL;
1137}
1138
1139int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data)
1140{
1141        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1142             tables->arch;
1143             tables++) {
1144                int ret = pmu_events_table__for_each_event(&tables->event_table,
1145                                                           /*pmu=*/ NULL, fn, data);
1146
1147                if (ret)
1148                        return ret;
1149        }
1150        return 0;
1151}
1152
1153int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data)
1154{
1155        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1156             tables->arch;
1157             tables++) {
1158                int ret = pmu_metrics_table__for_each_metric(&tables->metric_table, fn, data);
1159
1160                if (ret)
1161                        return ret;
1162        }
1163        return 0;
1164}
1165
1166const struct pmu_events_table *find_sys_events_table(const char *name)
1167{
1168        for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
1169             tables->name;
1170             tables++) {
1171                if (!strcmp(tables->name, name))
1172                        return &tables->event_table;
1173        }
1174        return NULL;
1175}
1176
1177int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data)
1178{
1179        for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
1180             tables->name;
1181             tables++) {
1182                int ret = pmu_events_table__for_each_event(&tables->event_table,
1183                                                           /*pmu=*/ NULL, fn, data);
1184
1185                if (ret)
1186                        return ret;
1187        }
1188        return 0;
1189}
1190
1191int pmu_for_each_sys_metric(pmu_metric_iter_fn fn, void *data)
1192{
1193        for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
1194             tables->name;
1195             tables++) {
1196                int ret = pmu_metrics_table__for_each_metric(&tables->metric_table, fn, data);
1197
1198                if (ret)
1199                        return ret;
1200        }
1201        return 0;
1202}
1203""")
1204
1205def print_metricgroups() -> None:
1206  _args.output_file.write("""
1207static const int metricgroups[][2] = {
1208""")
1209  for mgroup in sorted(_metricgroups):
1210    description = _metricgroups[mgroup]
1211    _args.output_file.write(
1212        f'\t{{ {_bcs.offsets[mgroup]}, {_bcs.offsets[description]} }}, /* {mgroup} => {description} */\n'
1213    )
1214  _args.output_file.write("""
1215};
1216
1217const char *describe_metricgroup(const char *group)
1218{
1219        int low = 0, high = (int)ARRAY_SIZE(metricgroups) - 1;
1220
1221        while (low <= high) {
1222                int mid = (low + high) / 2;
1223                const char *mgroup = &big_c_string[metricgroups[mid][0]];
1224                int cmp = strcmp(mgroup, group);
1225
1226                if (cmp == 0) {
1227                        return &big_c_string[metricgroups[mid][1]];
1228                } else if (cmp < 0) {
1229                        low = mid + 1;
1230                } else {
1231                        high = mid - 1;
1232                }
1233        }
1234        return NULL;
1235}
1236""")
1237
1238def main() -> None:
1239  global _args
1240
1241  def dir_path(path: str) -> str:
1242    """Validate path is a directory for argparse."""
1243    if os.path.isdir(path):
1244      return path
1245    raise argparse.ArgumentTypeError(f'\'{path}\' is not a valid directory')
1246
1247  def ftw(path: str, parents: Sequence[str],
1248          action: Callable[[Sequence[str], os.DirEntry], None]) -> None:
1249    """Replicate the directory/file walking behavior of C's file tree walk."""
1250    for item in sorted(os.scandir(path), key=lambda e: e.name):
1251      if _args.model != 'all' and item.is_dir():
1252        # Check if the model matches one in _args.model.
1253        if len(parents) == _args.model.split(',')[0].count('/'):
1254          # We're testing the correct directory.
1255          item_path = '/'.join(parents) + ('/' if len(parents) > 0 else '') + item.name
1256          if 'test' not in item_path and 'common' not in item_path and item_path not in _args.model.split(','):
1257            continue
1258      action(parents, item)
1259      if item.is_dir():
1260        ftw(item.path, parents + [item.name], action)
1261
1262  ap = argparse.ArgumentParser()
1263  ap.add_argument('arch', help='Architecture name like x86')
1264  ap.add_argument('model', help='''Select a model such as skylake to
1265reduce the code size.  Normally set to "all". For architectures like
1266ARM64 with an implementor/model, the model must include the implementor
1267such as "arm/cortex-a34".''',
1268                  default='all')
1269  ap.add_argument(
1270      'starting_dir',
1271      type=dir_path,
1272      help='Root of tree containing architecture directories containing json files'
1273  )
1274  ap.add_argument(
1275      'output_file', type=argparse.FileType('w', encoding='utf-8'), nargs='?', default=sys.stdout)
1276  _args = ap.parse_args()
1277
1278  _args.output_file.write(f"""
1279/* SPDX-License-Identifier: GPL-2.0 */
1280/* THIS FILE WAS AUTOGENERATED BY jevents.py arch={_args.arch} model={_args.model} ! */
1281""")
1282  _args.output_file.write("""
1283#include <pmu-events/pmu-events.h>
1284#include "util/header.h"
1285#include "util/pmu.h"
1286#include <string.h>
1287#include <stddef.h>
1288
1289struct compact_pmu_event {
1290        int offset;
1291};
1292
1293struct pmu_table_entry {
1294        const struct compact_pmu_event *entries;
1295        uint32_t num_entries;
1296        struct compact_pmu_event pmu_name;
1297};
1298
1299""")
1300  archs = []
1301  for item in os.scandir(_args.starting_dir):
1302    if not item.is_dir():
1303      continue
1304    if item.name == _args.arch or _args.arch == 'all' or item.name == 'test' or item.name == 'common':
1305      archs.append(item.name)
1306
1307  if len(archs) < 2 and _args.arch != 'none':
1308    raise IOError(f'Missing architecture directory \'{_args.arch}\'')
1309
1310  archs.sort()
1311  for arch in archs:
1312    arch_path = f'{_args.starting_dir}/{arch}'
1313    preprocess_arch_std_files(arch_path)
1314    ftw(arch_path, [], preprocess_one_file)
1315
1316  _bcs.compute()
1317  _args.output_file.write('static const char *const big_c_string =\n')
1318  for s in _bcs.big_string:
1319    _args.output_file.write(s)
1320  _args.output_file.write(';\n\n')
1321  for arch in archs:
1322    arch_path = f'{_args.starting_dir}/{arch}'
1323    ftw(arch_path, [], process_one_file)
1324    print_pending_events()
1325    print_pending_metrics()
1326
1327  print_mapping_table(archs)
1328  print_system_mapping_table()
1329  print_metricgroups()
1330
1331if __name__ == '__main__':
1332  main()
1333