xref: /linux/tools/perf/pmu-events/jevents.py (revision 0e7eb23668948585f3f0ea8c6249338f33fde872)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
3"""Convert directories of JSON events to C code."""
4import argparse
5import csv
6from functools import lru_cache
7import json
8import metric
9import os
10import sys
11from typing import (Callable, Dict, Optional, Sequence, Set, Tuple)
12import collections
13
14# Global command line arguments.
15_args = None
16# List of regular event tables.
17_event_tables = []
18# List of event tables generated from "/sys" directories.
19_sys_event_tables = []
20# List of regular metric tables.
21_metric_tables = []
22# List of metric tables generated from "/sys" directories.
23_sys_metric_tables = []
24# Mapping between sys event table names and sys metric table names.
25_sys_event_table_to_metric_table_mapping = {}
26# Map from an event name to an architecture standard
27# JsonEvent. Architecture standard events are in json files in the top
28# f'{_args.starting_dir}/{_args.arch}' directory.
29_arch_std_events = {}
30# Events to write out when the table is closed
31_pending_events = []
32# Name of events table to be written out
33_pending_events_tblname = None
34# Metrics to write out when the table is closed
35_pending_metrics = []
36# Name of metrics table to be written out
37_pending_metrics_tblname = None
38# Global BigCString shared by all structures.
39_bcs = None
40# Map from the name of a metric group to a description of the group.
41_metricgroups = {}
42# Order specific JsonEvent attributes will be visited.
43_json_event_attributes = [
44    # cmp_sevent related attributes.
45    'name', 'topic', 'desc',
46    # Seems useful, put it early.
47    'event',
48    # Short things in alphabetical order.
49    'compat', 'deprecated', 'perpkg', 'unit',
50    # Longer things (the last won't be iterated over during decompress).
51    'long_desc'
52]
53
54# Attributes that are in pmu_metric rather than pmu_event.
55_json_metric_attributes = [
56    'metric_name', 'metric_group', 'metric_expr', 'metric_threshold',
57    'desc', 'long_desc', 'unit', 'compat', 'metricgroup_no_group',
58    'default_metricgroup_name', 'aggr_mode', 'event_grouping'
59]
60# Attributes that are bools or enum int values, encoded as '0', '1',...
61_json_enum_attributes = ['aggr_mode', 'deprecated', 'event_grouping', 'perpkg']
62
63def removesuffix(s: str, suffix: str) -> str:
64  """Remove the suffix from a string
65
66  The removesuffix function is added to str in Python 3.9. We aim for 3.6
67  compatibility and so provide our own function here.
68  """
69  return s[0:-len(suffix)] if s.endswith(suffix) else s
70
71
72def file_name_to_table_name(prefix: str, parents: Sequence[str],
73                            dirname: str) -> str:
74  """Generate a C table name from directory names."""
75  tblname = prefix
76  for p in parents:
77    tblname += '_' + p
78  tblname += '_' + dirname
79  return tblname.replace('-', '_')
80
81
82def c_len(s: str) -> int:
83  """Return the length of s a C string
84
85  This doesn't handle all escape characters properly. It first assumes
86  all \\ are for escaping, it then adjusts as it will have over counted
87  \\. The code uses \000 rather than \0 as a terminator as an adjacent
88  number would be folded into a string of \0 (ie. "\0" + "5" doesn't
89  equal a terminator followed by the number 5 but the escape of
90  \05). The code adjusts for \000 but not properly for all octal, hex
91  or unicode values.
92  """
93  try:
94    utf = s.encode(encoding='utf-8',errors='strict')
95  except:
96    print(f'broken string {s}')
97    raise
98  return len(utf) - utf.count(b'\\') + utf.count(b'\\\\') - (utf.count(b'\\000') * 2)
99
100class BigCString:
101  """A class to hold many strings concatenated together.
102
103  Generating a large number of stand-alone C strings creates a large
104  number of relocations in position independent code. The BigCString
105  is a helper for this case. It builds a single string which within it
106  are all the other C strings (to avoid memory issues the string
107  itself is held as a list of strings). The offsets within the big
108  string are recorded and when stored to disk these don't need
109  relocation. To reduce the size of the string further, identical
110  strings are merged. If a longer string ends-with the same value as a
111  shorter string, these entries are also merged.
112  """
113  strings: Set[str]
114  big_string: Sequence[str]
115  offsets: Dict[str, int]
116  insert_number: int
117  insert_point: Dict[str, int]
118  metrics: Set[str]
119
120  def __init__(self):
121    self.strings = set()
122    self.insert_number = 0;
123    self.insert_point = {}
124    self.metrics = set()
125
126  def add(self, s: str, metric: bool) -> None:
127    """Called to add to the big string."""
128    if s not in self.strings:
129      self.strings.add(s)
130      self.insert_point[s] = self.insert_number
131      self.insert_number += 1
132      if metric:
133        self.metrics.add(s)
134
135  def compute(self) -> None:
136    """Called once all strings are added to compute the string and offsets."""
137
138    folded_strings = {}
139    # Determine if two strings can be folded, ie. let 1 string use the
140    # end of another. First reverse all strings and sort them.
141    sorted_reversed_strings = sorted([x[::-1] for x in self.strings])
142
143    # Strings 'xyz' and 'yz' will now be [ 'zy', 'zyx' ]. Scan forward
144    # for each string to see if there is a better candidate to fold it
145    # into, in the example rather than using 'yz' we can use'xyz' at
146    # an offset of 1. We record which string can be folded into which
147    # in folded_strings, we don't need to record the offset as it is
148    # trivially computed from the string lengths.
149    for pos,s in enumerate(sorted_reversed_strings):
150      best_pos = pos
151      for check_pos in range(pos + 1, len(sorted_reversed_strings)):
152        if sorted_reversed_strings[check_pos].startswith(s):
153          best_pos = check_pos
154        else:
155          break
156      if pos != best_pos:
157        folded_strings[s[::-1]] = sorted_reversed_strings[best_pos][::-1]
158
159    # Compute reverse mappings for debugging.
160    fold_into_strings = collections.defaultdict(set)
161    for key, val in folded_strings.items():
162      if key != val:
163        fold_into_strings[val].add(key)
164
165    # big_string_offset is the current location within the C string
166    # being appended to - comments, etc. don't count. big_string is
167    # the string contents represented as a list. Strings are immutable
168    # in Python and so appending to one causes memory issues, while
169    # lists are mutable.
170    big_string_offset = 0
171    self.big_string = []
172    self.offsets = {}
173
174    def string_cmp_key(s: str) -> Tuple[bool, int, str]:
175      return (s in self.metrics, self.insert_point[s], s)
176
177    # Emit all strings that aren't folded in a sorted manner.
178    for s in sorted(self.strings, key=string_cmp_key):
179      if s not in folded_strings:
180        self.offsets[s] = big_string_offset
181        self.big_string.append(f'/* offset={big_string_offset} */ "')
182        self.big_string.append(s)
183        self.big_string.append('"')
184        if s in fold_into_strings:
185          self.big_string.append(' /* also: ' + ', '.join(fold_into_strings[s]) + ' */')
186        self.big_string.append('\n')
187        big_string_offset += c_len(s)
188        continue
189
190    # Compute the offsets of the folded strings.
191    for s in folded_strings.keys():
192      assert s not in self.offsets
193      folded_s = folded_strings[s]
194      self.offsets[s] = self.offsets[folded_s] + c_len(folded_s) - c_len(s)
195
196_bcs = BigCString()
197
198class JsonEvent:
199  """Representation of an event loaded from a json file dictionary."""
200
201  def __init__(self, jd: dict):
202    """Constructor passed the dictionary of parsed json values."""
203
204    def llx(x: int) -> str:
205      """Convert an int to a string similar to a printf modifier of %#llx."""
206      return str(x) if x >= 0 and x < 10 else hex(x)
207
208    def fixdesc(s: str) -> str:
209      """Fix formatting issue for the desc string."""
210      if s is None:
211        return None
212      return removesuffix(removesuffix(removesuffix(s, '.  '),
213                                       '. '), '.').replace('\n', '\\n').replace(
214                                           '\"', '\\"').replace('\r', '\\r')
215
216    def convert_aggr_mode(aggr_mode: str) -> Optional[str]:
217      """Returns the aggr_mode_class enum value associated with the JSON string."""
218      if not aggr_mode:
219        return None
220      aggr_mode_to_enum = {
221          'PerChip': '1',
222          'PerCore': '2',
223      }
224      return aggr_mode_to_enum[aggr_mode]
225
226    def convert_metric_constraint(metric_constraint: str) -> Optional[str]:
227      """Returns the metric_event_groups enum value associated with the JSON string."""
228      if not metric_constraint:
229        return None
230      metric_constraint_to_enum = {
231          'NO_GROUP_EVENTS': '1',
232          'NO_GROUP_EVENTS_NMI': '2',
233          'NO_NMI_WATCHDOG': '2',
234          'NO_GROUP_EVENTS_SMT': '3',
235      }
236      return metric_constraint_to_enum[metric_constraint]
237
238    def lookup_msr(num: str) -> Optional[str]:
239      """Converts the msr number, or first in a list to the appropriate event field."""
240      if not num:
241        return None
242      msrmap = {
243          0x3F6: 'ldlat=',
244          0x1A6: 'offcore_rsp=',
245          0x1A7: 'offcore_rsp=',
246          0x3F7: 'frontend=',
247      }
248      return msrmap[int(num.split(',', 1)[0], 0)]
249
250    def real_event(name: str, event: str) -> Optional[str]:
251      """Convert well known event names to an event string otherwise use the event argument."""
252      fixed = {
253          'inst_retired.any': 'event=0xc0,period=2000003',
254          'inst_retired.any_p': 'event=0xc0,period=2000003',
255          'cpu_clk_unhalted.ref': 'event=0x0,umask=0x03,period=2000003',
256          'cpu_clk_unhalted.thread': 'event=0x3c,period=2000003',
257          'cpu_clk_unhalted.core': 'event=0x3c,period=2000003',
258          'cpu_clk_unhalted.thread_any': 'event=0x3c,any=1,period=2000003',
259      }
260      if not name:
261        return None
262      if name.lower() in fixed:
263        return fixed[name.lower()]
264      return event
265
266    def unit_to_pmu(unit: str) -> Optional[str]:
267      """Convert a JSON Unit to Linux PMU name."""
268      if not unit:
269        return 'default_core'
270      # Comment brought over from jevents.c:
271      # it's not realistic to keep adding these, we need something more scalable ...
272      table = {
273          'CBO': 'uncore_cbox',
274          'QPI LL': 'uncore_qpi',
275          'SBO': 'uncore_sbox',
276          'iMPH-U': 'uncore_arb',
277          'CPU-M-CF': 'cpum_cf',
278          'CPU-M-SF': 'cpum_sf',
279          'PAI-CRYPTO' : 'pai_crypto',
280          'PAI-EXT' : 'pai_ext',
281          'UPI LL': 'uncore_upi',
282          'hisi_sicl,cpa': 'hisi_sicl,cpa',
283          'hisi_sccl,ddrc': 'hisi_sccl,ddrc',
284          'hisi_sccl,hha': 'hisi_sccl,hha',
285          'hisi_sccl,l3c': 'hisi_sccl,l3c',
286          'imx8_ddr': 'imx8_ddr',
287          'imx9_ddr': 'imx9_ddr',
288          'L3PMC': 'amd_l3',
289          'DFPMC': 'amd_df',
290          'UMCPMC': 'amd_umc',
291          'cpu_core': 'cpu_core',
292          'cpu_atom': 'cpu_atom',
293          'ali_drw': 'ali_drw',
294          'arm_cmn': 'arm_cmn',
295      }
296      return table[unit] if unit in table else f'uncore_{unit.lower()}'
297
298    def is_zero(val: str) -> bool:
299        try:
300            if val.startswith('0x'):
301                return int(val, 16) == 0
302            else:
303                return int(val) == 0
304        except e:
305            return False
306
307    def canonicalize_value(val: str) -> str:
308        try:
309            if val.startswith('0x'):
310                return llx(int(val, 16))
311            return str(int(val))
312        except e:
313            return val
314
315    eventcode = 0
316    if 'EventCode' in jd:
317      eventcode = int(jd['EventCode'].split(',', 1)[0], 0)
318    if 'ExtSel' in jd:
319      eventcode |= int(jd['ExtSel']) << 8
320    configcode = int(jd['ConfigCode'], 0) if 'ConfigCode' in jd else None
321    eventidcode = int(jd['EventidCode'], 0) if 'EventidCode' in jd else None
322    self.name = jd['EventName'].lower() if 'EventName' in jd else None
323    self.topic = ''
324    self.compat = jd.get('Compat')
325    self.desc = fixdesc(jd.get('BriefDescription'))
326    self.long_desc = fixdesc(jd.get('PublicDescription'))
327    precise = jd.get('PEBS')
328    msr = lookup_msr(jd.get('MSRIndex'))
329    msrval = jd.get('MSRValue')
330    extra_desc = ''
331    if 'Data_LA' in jd:
332      extra_desc += '  Supports address when precise'
333      if 'Errata' in jd:
334        extra_desc += '.'
335    if 'Errata' in jd:
336      extra_desc += '  Spec update: ' + jd['Errata']
337    self.pmu = unit_to_pmu(jd.get('Unit'))
338    filter = jd.get('Filter')
339    self.unit = jd.get('ScaleUnit')
340    self.perpkg = jd.get('PerPkg')
341    self.aggr_mode = convert_aggr_mode(jd.get('AggregationMode'))
342    self.deprecated = jd.get('Deprecated')
343    self.metric_name = jd.get('MetricName')
344    self.metric_group = jd.get('MetricGroup')
345    self.metricgroup_no_group = jd.get('MetricgroupNoGroup')
346    self.default_metricgroup_name = jd.get('DefaultMetricgroupName')
347    self.event_grouping = convert_metric_constraint(jd.get('MetricConstraint'))
348    self.metric_expr = None
349    if 'MetricExpr' in jd:
350      self.metric_expr = metric.ParsePerfJson(jd['MetricExpr']).Simplify()
351    # Note, the metric formula for the threshold isn't parsed as the &
352    # and > have incorrect precedence.
353    self.metric_threshold = jd.get('MetricThreshold')
354
355    arch_std = jd.get('ArchStdEvent')
356    if precise and self.desc and '(Precise Event)' not in self.desc:
357      extra_desc += ' (Must be precise)' if precise == '2' else (' (Precise '
358                                                                 'event)')
359    event = None
360    if configcode is not None:
361      event = f'config={llx(configcode)}'
362    elif eventidcode is not None:
363      event = f'eventid={llx(eventidcode)}'
364    else:
365      event = f'event={llx(eventcode)}'
366    event_fields = [
367        ('AnyThread', 'any='),
368        ('PortMask', 'ch_mask='),
369        ('CounterMask', 'cmask='),
370        ('EdgeDetect', 'edge='),
371        ('FCMask', 'fc_mask='),
372        ('Invert', 'inv='),
373        ('SampleAfterValue', 'period='),
374        ('UMask', 'umask='),
375        ('NodeType', 'type='),
376        ('RdWrMask', 'rdwrmask='),
377        ('EnAllCores', 'enallcores='),
378        ('EnAllSlices', 'enallslices='),
379        ('SliceId', 'sliceid='),
380        ('ThreadMask', 'threadmask='),
381    ]
382    for key, value in event_fields:
383      if key in jd and not is_zero(jd[key]):
384        event += f',{value}{canonicalize_value(jd[key])}'
385    if filter:
386      event += f',{filter}'
387    if msr:
388      event += f',{msr}{msrval}'
389    if self.desc and extra_desc:
390      self.desc += extra_desc
391    if self.long_desc and extra_desc:
392      self.long_desc += extra_desc
393    if arch_std:
394      if arch_std.lower() in _arch_std_events:
395        event = _arch_std_events[arch_std.lower()].event
396        # Copy from the architecture standard event to self for undefined fields.
397        for attr, value in _arch_std_events[arch_std.lower()].__dict__.items():
398          if hasattr(self, attr) and not getattr(self, attr):
399            setattr(self, attr, value)
400      else:
401        raise argparse.ArgumentTypeError('Cannot find arch std event:', arch_std)
402
403    self.event = real_event(self.name, event)
404
405  def __repr__(self) -> str:
406    """String representation primarily for debugging."""
407    s = '{\n'
408    for attr, value in self.__dict__.items():
409      if value:
410        s += f'\t{attr} = {value},\n'
411    return s + '}'
412
413  def build_c_string(self, metric: bool) -> str:
414    s = ''
415    for attr in _json_metric_attributes if metric else _json_event_attributes:
416      x = getattr(self, attr)
417      if metric and x and attr == 'metric_expr':
418        # Convert parsed metric expressions into a string. Slashes
419        # must be doubled in the file.
420        x = x.ToPerfJson().replace('\\', '\\\\')
421      if metric and x and attr == 'metric_threshold':
422        x = x.replace('\\', '\\\\')
423      if attr in _json_enum_attributes:
424        s += x if x else '0'
425      else:
426        s += f'{x}\\000' if x else '\\000'
427    return s
428
429  def to_c_string(self, metric: bool) -> str:
430    """Representation of the event as a C struct initializer."""
431
432    s = self.build_c_string(metric)
433    return f'{{ { _bcs.offsets[s] } }}, /* {s} */\n'
434
435
436@lru_cache(maxsize=None)
437def read_json_events(path: str, topic: str) -> Sequence[JsonEvent]:
438  """Read json events from the specified file."""
439  try:
440    events = json.load(open(path), object_hook=JsonEvent)
441  except BaseException as err:
442    print(f"Exception processing {path}")
443    raise
444  metrics: list[Tuple[str, str, metric.Expression]] = []
445  for event in events:
446    event.topic = topic
447    if event.metric_name and '-' not in event.metric_name:
448      metrics.append((event.pmu, event.metric_name, event.metric_expr))
449  updates = metric.RewriteMetricsInTermsOfOthers(metrics)
450  if updates:
451    for event in events:
452      if event.metric_name in updates:
453        # print(f'Updated {event.metric_name} from\n"{event.metric_expr}"\n'
454        #       f'to\n"{updates[event.metric_name]}"')
455        event.metric_expr = updates[event.metric_name]
456
457  return events
458
459def preprocess_arch_std_files(archpath: str) -> None:
460  """Read in all architecture standard events."""
461  global _arch_std_events
462  for item in os.scandir(archpath):
463    if item.is_file() and item.name.endswith('.json'):
464      for event in read_json_events(item.path, topic=''):
465        if event.name:
466          _arch_std_events[event.name.lower()] = event
467        if event.metric_name:
468          _arch_std_events[event.metric_name.lower()] = event
469
470
471def add_events_table_entries(item: os.DirEntry, topic: str) -> None:
472  """Add contents of file to _pending_events table."""
473  for e in read_json_events(item.path, topic):
474    if e.name:
475      _pending_events.append(e)
476    if e.metric_name:
477      _pending_metrics.append(e)
478
479
480def print_pending_events() -> None:
481  """Optionally close events table."""
482
483  def event_cmp_key(j: JsonEvent) -> Tuple[str, str, bool, str, str]:
484    def fix_none(s: Optional[str]) -> str:
485      if s is None:
486        return ''
487      return s
488
489    return (fix_none(j.pmu).replace(',','_'), fix_none(j.name), j.desc is not None, fix_none(j.topic),
490            fix_none(j.metric_name))
491
492  global _pending_events
493  if not _pending_events:
494    return
495
496  global _pending_events_tblname
497  if _pending_events_tblname.endswith('_sys'):
498    global _sys_event_tables
499    _sys_event_tables.append(_pending_events_tblname)
500  else:
501    global event_tables
502    _event_tables.append(_pending_events_tblname)
503
504  first = True
505  last_pmu = None
506  last_name = None
507  pmus = set()
508  for event in sorted(_pending_events, key=event_cmp_key):
509    if last_pmu and last_pmu == event.pmu:
510      assert event.name != last_name, f"Duplicate event: {last_pmu}/{last_name}/ in {_pending_events_tblname}"
511    if event.pmu != last_pmu:
512      if not first:
513        _args.output_file.write('};\n')
514      pmu_name = event.pmu.replace(',', '_')
515      _args.output_file.write(
516          f'static const struct compact_pmu_event {_pending_events_tblname}_{pmu_name}[] = {{\n')
517      first = False
518      last_pmu = event.pmu
519      pmus.add((event.pmu, pmu_name))
520
521    _args.output_file.write(event.to_c_string(metric=False))
522    last_name = event.name
523  _pending_events = []
524
525  _args.output_file.write(f"""
526}};
527
528const struct pmu_table_entry {_pending_events_tblname}[] = {{
529""")
530  for (pmu, tbl_pmu) in sorted(pmus):
531    pmu_name = f"{pmu}\\000"
532    _args.output_file.write(f"""{{
533     .entries = {_pending_events_tblname}_{tbl_pmu},
534     .num_entries = ARRAY_SIZE({_pending_events_tblname}_{tbl_pmu}),
535     .pmu_name = {{ {_bcs.offsets[pmu_name]} /* {pmu_name} */ }},
536}},
537""")
538  _args.output_file.write('};\n\n')
539
540def print_pending_metrics() -> None:
541  """Optionally close metrics table."""
542
543  def metric_cmp_key(j: JsonEvent) -> Tuple[bool, str, str]:
544    def fix_none(s: Optional[str]) -> str:
545      if s is None:
546        return ''
547      return s
548
549    return (j.desc is not None, fix_none(j.pmu), fix_none(j.metric_name))
550
551  global _pending_metrics
552  if not _pending_metrics:
553    return
554
555  global _pending_metrics_tblname
556  if _pending_metrics_tblname.endswith('_sys'):
557    global _sys_metric_tables
558    _sys_metric_tables.append(_pending_metrics_tblname)
559  else:
560    global metric_tables
561    _metric_tables.append(_pending_metrics_tblname)
562
563  first = True
564  last_pmu = None
565  pmus = set()
566  for metric in sorted(_pending_metrics, key=metric_cmp_key):
567    if metric.pmu != last_pmu:
568      if not first:
569        _args.output_file.write('};\n')
570      pmu_name = metric.pmu.replace(',', '_')
571      _args.output_file.write(
572          f'static const struct compact_pmu_event {_pending_metrics_tblname}_{pmu_name}[] = {{\n')
573      first = False
574      last_pmu = metric.pmu
575      pmus.add((metric.pmu, pmu_name))
576
577    _args.output_file.write(metric.to_c_string(metric=True))
578  _pending_metrics = []
579
580  _args.output_file.write(f"""
581}};
582
583const struct pmu_table_entry {_pending_metrics_tblname}[] = {{
584""")
585  for (pmu, tbl_pmu) in sorted(pmus):
586    pmu_name = f"{pmu}\\000"
587    _args.output_file.write(f"""{{
588     .entries = {_pending_metrics_tblname}_{tbl_pmu},
589     .num_entries = ARRAY_SIZE({_pending_metrics_tblname}_{tbl_pmu}),
590     .pmu_name = {{ {_bcs.offsets[pmu_name]} /* {pmu_name} */ }},
591}},
592""")
593  _args.output_file.write('};\n\n')
594
595def get_topic(topic: str) -> str:
596  if topic.endswith('metrics.json'):
597    return 'metrics'
598  return removesuffix(topic, '.json').replace('-', ' ')
599
600def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
601
602  if item.is_dir():
603    return
604
605  # base dir or too deep
606  level = len(parents)
607  if level == 0 or level > 4:
608    return
609
610  # Ignore other directories. If the file name does not have a .json
611  # extension, ignore it. It could be a readme.txt for instance.
612  if not item.is_file() or not item.name.endswith('.json'):
613    return
614
615  if item.name == 'metricgroups.json':
616    metricgroup_descriptions = json.load(open(item.path))
617    for mgroup in metricgroup_descriptions:
618      assert len(mgroup) > 1, parents
619      description = f"{metricgroup_descriptions[mgroup]}\\000"
620      mgroup = f"{mgroup}\\000"
621      _bcs.add(mgroup, metric=True)
622      _bcs.add(description, metric=True)
623      _metricgroups[mgroup] = description
624    return
625
626  topic = get_topic(item.name)
627  for event in read_json_events(item.path, topic):
628    pmu_name = f"{event.pmu}\\000"
629    if event.name:
630      _bcs.add(pmu_name, metric=False)
631      _bcs.add(event.build_c_string(metric=False), metric=False)
632    if event.metric_name:
633      _bcs.add(pmu_name, metric=True)
634      _bcs.add(event.build_c_string(metric=True), metric=True)
635
636def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
637  """Process a JSON file during the main walk."""
638  def is_leaf_dir(path: str) -> bool:
639    for item in os.scandir(path):
640      if item.is_dir():
641        return False
642    return True
643
644  # model directory, reset topic
645  if item.is_dir() and is_leaf_dir(item.path):
646    print_pending_events()
647    print_pending_metrics()
648
649    global _pending_events_tblname
650    _pending_events_tblname = file_name_to_table_name('pmu_events_', parents, item.name)
651    global _pending_metrics_tblname
652    _pending_metrics_tblname = file_name_to_table_name('pmu_metrics_', parents, item.name)
653
654    if item.name == 'sys':
655      _sys_event_table_to_metric_table_mapping[_pending_events_tblname] = _pending_metrics_tblname
656    return
657
658  # base dir or too deep
659  level = len(parents)
660  if level == 0 or level > 4:
661    return
662
663  # Ignore other directories. If the file name does not have a .json
664  # extension, ignore it. It could be a readme.txt for instance.
665  if not item.is_file() or not item.name.endswith('.json') or item.name == 'metricgroups.json':
666    return
667
668  add_events_table_entries(item, get_topic(item.name))
669
670
671def print_mapping_table(archs: Sequence[str]) -> None:
672  """Read the mapfile and generate the struct from cpuid string to event table."""
673  _args.output_file.write("""
674/* Struct used to make the PMU event table implementation opaque to callers. */
675struct pmu_events_table {
676        const struct pmu_table_entry *pmus;
677        uint32_t num_pmus;
678};
679
680/* Struct used to make the PMU metric table implementation opaque to callers. */
681struct pmu_metrics_table {
682        const struct pmu_table_entry *pmus;
683        uint32_t num_pmus;
684};
685
686/*
687 * Map a CPU to its table of PMU events. The CPU is identified by the
688 * cpuid field, which is an arch-specific identifier for the CPU.
689 * The identifier specified in tools/perf/pmu-events/arch/xxx/mapfile
690 * must match the get_cpuid_str() in tools/perf/arch/xxx/util/header.c)
691 *
692 * The  cpuid can contain any character other than the comma.
693 */
694struct pmu_events_map {
695        const char *arch;
696        const char *cpuid;
697        struct pmu_events_table event_table;
698        struct pmu_metrics_table metric_table;
699};
700
701/*
702 * Global table mapping each known CPU for the architecture to its
703 * table of PMU events.
704 */
705const struct pmu_events_map pmu_events_map[] = {
706""")
707  for arch in archs:
708    if arch == 'test':
709      _args.output_file.write("""{
710\t.arch = "testarch",
711\t.cpuid = "testcpu",
712\t.event_table = {
713\t\t.pmus = pmu_events__test_soc_cpu,
714\t\t.num_pmus = ARRAY_SIZE(pmu_events__test_soc_cpu),
715\t},
716\t.metric_table = {
717\t\t.pmus = pmu_metrics__test_soc_cpu,
718\t\t.num_pmus = ARRAY_SIZE(pmu_metrics__test_soc_cpu),
719\t}
720},
721""")
722    else:
723      with open(f'{_args.starting_dir}/{arch}/mapfile.csv') as csvfile:
724        table = csv.reader(csvfile)
725        first = True
726        for row in table:
727          # Skip the first row or any row beginning with #.
728          if not first and len(row) > 0 and not row[0].startswith('#'):
729            event_tblname = file_name_to_table_name('pmu_events_', [], row[2].replace('/', '_'))
730            if event_tblname in _event_tables:
731              event_size = f'ARRAY_SIZE({event_tblname})'
732            else:
733              event_tblname = 'NULL'
734              event_size = '0'
735            metric_tblname = file_name_to_table_name('pmu_metrics_', [], row[2].replace('/', '_'))
736            if metric_tblname in _metric_tables:
737              metric_size = f'ARRAY_SIZE({metric_tblname})'
738            else:
739              metric_tblname = 'NULL'
740              metric_size = '0'
741            if event_size == '0' and metric_size == '0':
742              continue
743            cpuid = row[0].replace('\\', '\\\\')
744            _args.output_file.write(f"""{{
745\t.arch = "{arch}",
746\t.cpuid = "{cpuid}",
747\t.event_table = {{
748\t\t.pmus = {event_tblname},
749\t\t.num_pmus = {event_size}
750\t}},
751\t.metric_table = {{
752\t\t.pmus = {metric_tblname},
753\t\t.num_pmus = {metric_size}
754\t}}
755}},
756""")
757          first = False
758
759  _args.output_file.write("""{
760\t.arch = 0,
761\t.cpuid = 0,
762\t.event_table = { 0, 0 },
763\t.metric_table = { 0, 0 },
764}
765};
766""")
767
768
769def print_system_mapping_table() -> None:
770  """C struct mapping table array for tables from /sys directories."""
771  _args.output_file.write("""
772struct pmu_sys_events {
773\tconst char *name;
774\tstruct pmu_events_table event_table;
775\tstruct pmu_metrics_table metric_table;
776};
777
778static const struct pmu_sys_events pmu_sys_event_tables[] = {
779""")
780  printed_metric_tables = []
781  for tblname in _sys_event_tables:
782    _args.output_file.write(f"""\t{{
783\t\t.event_table = {{
784\t\t\t.pmus = {tblname},
785\t\t\t.num_pmus = ARRAY_SIZE({tblname})
786\t\t}},""")
787    metric_tblname = _sys_event_table_to_metric_table_mapping[tblname]
788    if metric_tblname in _sys_metric_tables:
789      _args.output_file.write(f"""
790\t\t.metric_table = {{
791\t\t\t.pmus = {metric_tblname},
792\t\t\t.num_pmus = ARRAY_SIZE({metric_tblname})
793\t\t}},""")
794      printed_metric_tables.append(metric_tblname)
795    _args.output_file.write(f"""
796\t\t.name = \"{tblname}\",
797\t}},
798""")
799  for tblname in _sys_metric_tables:
800    if tblname in printed_metric_tables:
801      continue
802    _args.output_file.write(f"""\t{{
803\t\t.metric_table = {{
804\t\t\t.pmus = {tblname},
805\t\t\t.num_pmus = ARRAY_SIZE({tblname})
806\t\t}},
807\t\t.name = \"{tblname}\",
808\t}},
809""")
810  _args.output_file.write("""\t{
811\t\t.event_table = { 0, 0 },
812\t\t.metric_table = { 0, 0 },
813\t},
814};
815
816static void decompress_event(int offset, struct pmu_event *pe)
817{
818\tconst char *p = &big_c_string[offset];
819""")
820  for attr in _json_event_attributes:
821    _args.output_file.write(f'\n\tpe->{attr} = ')
822    if attr in _json_enum_attributes:
823      _args.output_file.write("*p - '0';\n")
824    else:
825      _args.output_file.write("(*p == '\\0' ? NULL : p);\n")
826    if attr == _json_event_attributes[-1]:
827      continue
828    if attr in _json_enum_attributes:
829      _args.output_file.write('\tp++;')
830    else:
831      _args.output_file.write('\twhile (*p++);')
832  _args.output_file.write("""}
833
834static void decompress_metric(int offset, struct pmu_metric *pm)
835{
836\tconst char *p = &big_c_string[offset];
837""")
838  for attr in _json_metric_attributes:
839    _args.output_file.write(f'\n\tpm->{attr} = ')
840    if attr in _json_enum_attributes:
841      _args.output_file.write("*p - '0';\n")
842    else:
843      _args.output_file.write("(*p == '\\0' ? NULL : p);\n")
844    if attr == _json_metric_attributes[-1]:
845      continue
846    if attr in _json_enum_attributes:
847      _args.output_file.write('\tp++;')
848    else:
849      _args.output_file.write('\twhile (*p++);')
850  _args.output_file.write("""}
851
852static int pmu_events_table__for_each_event_pmu(const struct pmu_events_table *table,
853                                                const struct pmu_table_entry *pmu,
854                                                pmu_event_iter_fn fn,
855                                                void *data)
856{
857        int ret;
858        struct pmu_event pe = {
859                .pmu = &big_c_string[pmu->pmu_name.offset],
860        };
861
862        for (uint32_t i = 0; i < pmu->num_entries; i++) {
863                decompress_event(pmu->entries[i].offset, &pe);
864                if (!pe.name)
865                        continue;
866                ret = fn(&pe, table, data);
867                if (ret)
868                        return ret;
869        }
870        return 0;
871 }
872
873static int pmu_events_table__find_event_pmu(const struct pmu_events_table *table,
874                                            const struct pmu_table_entry *pmu,
875                                            const char *name,
876                                            pmu_event_iter_fn fn,
877                                            void *data)
878{
879        struct pmu_event pe = {
880                .pmu = &big_c_string[pmu->pmu_name.offset],
881        };
882        int low = 0, high = pmu->num_entries - 1;
883
884        while (low <= high) {
885                int cmp, mid = (low + high) / 2;
886
887                decompress_event(pmu->entries[mid].offset, &pe);
888
889                if (!pe.name && !name)
890                        goto do_call;
891
892                if (!pe.name && name) {
893                        low = mid + 1;
894                        continue;
895                }
896                if (pe.name && !name) {
897                        high = mid - 1;
898                        continue;
899                }
900
901                cmp = strcasecmp(pe.name, name);
902                if (cmp < 0) {
903                        low = mid + 1;
904                        continue;
905                }
906                if (cmp > 0) {
907                        high = mid - 1;
908                        continue;
909                }
910  do_call:
911                return fn ? fn(&pe, table, data) : 0;
912        }
913        return PMU_EVENTS__NOT_FOUND;
914}
915
916int pmu_events_table__for_each_event(const struct pmu_events_table *table,
917                                    struct perf_pmu *pmu,
918                                    pmu_event_iter_fn fn,
919                                    void *data)
920{
921        for (size_t i = 0; i < table->num_pmus; i++) {
922                const struct pmu_table_entry *table_pmu = &table->pmus[i];
923                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
924                int ret;
925
926                if (pmu && !pmu__name_match(pmu, pmu_name))
927                        continue;
928
929                ret = pmu_events_table__for_each_event_pmu(table, table_pmu, fn, data);
930                if (pmu || ret)
931                        return ret;
932        }
933        return 0;
934}
935
936int pmu_events_table__find_event(const struct pmu_events_table *table,
937                                 struct perf_pmu *pmu,
938                                 const char *name,
939                                 pmu_event_iter_fn fn,
940                                 void *data)
941{
942        for (size_t i = 0; i < table->num_pmus; i++) {
943                const struct pmu_table_entry *table_pmu = &table->pmus[i];
944                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
945                int ret;
946
947                if (!pmu__name_match(pmu, pmu_name))
948                        continue;
949
950                ret = pmu_events_table__find_event_pmu(table, table_pmu, name, fn, data);
951                if (ret != PMU_EVENTS__NOT_FOUND)
952                        return ret;
953        }
954        return PMU_EVENTS__NOT_FOUND;
955}
956
957size_t pmu_events_table__num_events(const struct pmu_events_table *table,
958                                    struct perf_pmu *pmu)
959{
960        size_t count = 0;
961
962        for (size_t i = 0; i < table->num_pmus; i++) {
963                const struct pmu_table_entry *table_pmu = &table->pmus[i];
964                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
965
966                if (pmu__name_match(pmu, pmu_name))
967                        count += table_pmu->num_entries;
968        }
969        return count;
970}
971
972static int pmu_metrics_table__for_each_metric_pmu(const struct pmu_metrics_table *table,
973                                                const struct pmu_table_entry *pmu,
974                                                pmu_metric_iter_fn fn,
975                                                void *data)
976{
977        int ret;
978        struct pmu_metric pm = {
979                .pmu = &big_c_string[pmu->pmu_name.offset],
980        };
981
982        for (uint32_t i = 0; i < pmu->num_entries; i++) {
983                decompress_metric(pmu->entries[i].offset, &pm);
984                if (!pm.metric_expr)
985                        continue;
986                ret = fn(&pm, table, data);
987                if (ret)
988                        return ret;
989        }
990        return 0;
991}
992
993int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
994                                     pmu_metric_iter_fn fn,
995                                     void *data)
996{
997        for (size_t i = 0; i < table->num_pmus; i++) {
998                int ret = pmu_metrics_table__for_each_metric_pmu(table, &table->pmus[i],
999                                                                 fn, data);
1000
1001                if (ret)
1002                        return ret;
1003        }
1004        return 0;
1005}
1006
1007static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu)
1008{
1009        static struct {
1010                const struct pmu_events_map *map;
1011                struct perf_pmu *pmu;
1012        } last_result;
1013        static struct {
1014                const struct pmu_events_map *map;
1015                char *cpuid;
1016        } last_map_search;
1017        static bool has_last_result, has_last_map_search;
1018        const struct pmu_events_map *map = NULL;
1019        char *cpuid = NULL;
1020        size_t i;
1021
1022        if (has_last_result && last_result.pmu == pmu)
1023                return last_result.map;
1024
1025        cpuid = perf_pmu__getcpuid(pmu);
1026
1027        /*
1028         * On some platforms which uses cpus map, cpuid can be NULL for
1029         * PMUs other than CORE PMUs.
1030         */
1031        if (!cpuid)
1032                goto out_update_last_result;
1033
1034        if (has_last_map_search && !strcmp(last_map_search.cpuid, cpuid)) {
1035                map = last_map_search.map;
1036                free(cpuid);
1037        } else {
1038                i = 0;
1039                for (;;) {
1040                        map = &pmu_events_map[i++];
1041
1042                        if (!map->arch) {
1043                                map = NULL;
1044                                break;
1045                        }
1046
1047                        if (!strcmp_cpuid_str(map->cpuid, cpuid))
1048                                break;
1049               }
1050               free(last_map_search.cpuid);
1051               last_map_search.cpuid = cpuid;
1052               last_map_search.map = map;
1053               has_last_map_search = true;
1054        }
1055out_update_last_result:
1056        last_result.pmu = pmu;
1057        last_result.map = map;
1058        has_last_result = true;
1059        return map;
1060}
1061
1062const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
1063{
1064        const struct pmu_events_map *map = map_for_pmu(pmu);
1065
1066        if (!map)
1067                return NULL;
1068
1069        if (!pmu)
1070                return &map->event_table;
1071
1072        for (size_t i = 0; i < map->event_table.num_pmus; i++) {
1073                const struct pmu_table_entry *table_pmu = &map->event_table.pmus[i];
1074                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
1075
1076                if (pmu__name_match(pmu, pmu_name))
1077                         return &map->event_table;
1078        }
1079        return NULL;
1080}
1081
1082const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu)
1083{
1084        const struct pmu_events_map *map = map_for_pmu(pmu);
1085
1086        if (!map)
1087                return NULL;
1088
1089        if (!pmu)
1090                return &map->metric_table;
1091
1092        for (size_t i = 0; i < map->metric_table.num_pmus; i++) {
1093                const struct pmu_table_entry *table_pmu = &map->metric_table.pmus[i];
1094                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
1095
1096                if (pmu__name_match(pmu, pmu_name))
1097                           return &map->metric_table;
1098        }
1099        return NULL;
1100}
1101
1102const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid)
1103{
1104        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1105             tables->arch;
1106             tables++) {
1107                if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
1108                        return &tables->event_table;
1109        }
1110        return NULL;
1111}
1112
1113const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const char *cpuid)
1114{
1115        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1116             tables->arch;
1117             tables++) {
1118                if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
1119                        return &tables->metric_table;
1120        }
1121        return NULL;
1122}
1123
1124int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data)
1125{
1126        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1127             tables->arch;
1128             tables++) {
1129                int ret = pmu_events_table__for_each_event(&tables->event_table,
1130                                                           /*pmu=*/ NULL, fn, data);
1131
1132                if (ret)
1133                        return ret;
1134        }
1135        return 0;
1136}
1137
1138int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data)
1139{
1140        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1141             tables->arch;
1142             tables++) {
1143                int ret = pmu_metrics_table__for_each_metric(&tables->metric_table, fn, data);
1144
1145                if (ret)
1146                        return ret;
1147        }
1148        return 0;
1149}
1150
1151const struct pmu_events_table *find_sys_events_table(const char *name)
1152{
1153        for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
1154             tables->name;
1155             tables++) {
1156                if (!strcmp(tables->name, name))
1157                        return &tables->event_table;
1158        }
1159        return NULL;
1160}
1161
1162int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data)
1163{
1164        for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
1165             tables->name;
1166             tables++) {
1167                int ret = pmu_events_table__for_each_event(&tables->event_table,
1168                                                           /*pmu=*/ NULL, fn, data);
1169
1170                if (ret)
1171                        return ret;
1172        }
1173        return 0;
1174}
1175
1176int pmu_for_each_sys_metric(pmu_metric_iter_fn fn, void *data)
1177{
1178        for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
1179             tables->name;
1180             tables++) {
1181                int ret = pmu_metrics_table__for_each_metric(&tables->metric_table, fn, data);
1182
1183                if (ret)
1184                        return ret;
1185        }
1186        return 0;
1187}
1188""")
1189
1190def print_metricgroups() -> None:
1191  _args.output_file.write("""
1192static const int metricgroups[][2] = {
1193""")
1194  for mgroup in sorted(_metricgroups):
1195    description = _metricgroups[mgroup]
1196    _args.output_file.write(
1197        f'\t{{ {_bcs.offsets[mgroup]}, {_bcs.offsets[description]} }}, /* {mgroup} => {description} */\n'
1198    )
1199  _args.output_file.write("""
1200};
1201
1202const char *describe_metricgroup(const char *group)
1203{
1204        int low = 0, high = (int)ARRAY_SIZE(metricgroups) - 1;
1205
1206        while (low <= high) {
1207                int mid = (low + high) / 2;
1208                const char *mgroup = &big_c_string[metricgroups[mid][0]];
1209                int cmp = strcmp(mgroup, group);
1210
1211                if (cmp == 0) {
1212                        return &big_c_string[metricgroups[mid][1]];
1213                } else if (cmp < 0) {
1214                        low = mid + 1;
1215                } else {
1216                        high = mid - 1;
1217                }
1218        }
1219        return NULL;
1220}
1221""")
1222
1223def main() -> None:
1224  global _args
1225
1226  def dir_path(path: str) -> str:
1227    """Validate path is a directory for argparse."""
1228    if os.path.isdir(path):
1229      return path
1230    raise argparse.ArgumentTypeError(f'\'{path}\' is not a valid directory')
1231
1232  def ftw(path: str, parents: Sequence[str],
1233          action: Callable[[Sequence[str], os.DirEntry], None]) -> None:
1234    """Replicate the directory/file walking behavior of C's file tree walk."""
1235    for item in sorted(os.scandir(path), key=lambda e: e.name):
1236      if _args.model != 'all' and item.is_dir():
1237        # Check if the model matches one in _args.model.
1238        if len(parents) == _args.model.split(',')[0].count('/'):
1239          # We're testing the correct directory.
1240          item_path = '/'.join(parents) + ('/' if len(parents) > 0 else '') + item.name
1241          if 'test' not in item_path and item_path not in _args.model.split(','):
1242            continue
1243      action(parents, item)
1244      if item.is_dir():
1245        ftw(item.path, parents + [item.name], action)
1246
1247  ap = argparse.ArgumentParser()
1248  ap.add_argument('arch', help='Architecture name like x86')
1249  ap.add_argument('model', help='''Select a model such as skylake to
1250reduce the code size.  Normally set to "all". For architectures like
1251ARM64 with an implementor/model, the model must include the implementor
1252such as "arm/cortex-a34".''',
1253                  default='all')
1254  ap.add_argument(
1255      'starting_dir',
1256      type=dir_path,
1257      help='Root of tree containing architecture directories containing json files'
1258  )
1259  ap.add_argument(
1260      'output_file', type=argparse.FileType('w', encoding='utf-8'), nargs='?', default=sys.stdout)
1261  _args = ap.parse_args()
1262
1263  _args.output_file.write(f"""
1264/* SPDX-License-Identifier: GPL-2.0 */
1265/* THIS FILE WAS AUTOGENERATED BY jevents.py arch={_args.arch} model={_args.model} ! */
1266""")
1267  _args.output_file.write("""
1268#include <pmu-events/pmu-events.h>
1269#include "util/header.h"
1270#include "util/pmu.h"
1271#include <string.h>
1272#include <stddef.h>
1273
1274struct compact_pmu_event {
1275        int offset;
1276};
1277
1278struct pmu_table_entry {
1279        const struct compact_pmu_event *entries;
1280        uint32_t num_entries;
1281        struct compact_pmu_event pmu_name;
1282};
1283
1284""")
1285  archs = []
1286  for item in os.scandir(_args.starting_dir):
1287    if not item.is_dir():
1288      continue
1289    if item.name == _args.arch or _args.arch == 'all' or item.name == 'test':
1290      archs.append(item.name)
1291
1292  if len(archs) < 2 and _args.arch != 'none':
1293    raise IOError(f'Missing architecture directory \'{_args.arch}\'')
1294
1295  archs.sort()
1296  for arch in archs:
1297    arch_path = f'{_args.starting_dir}/{arch}'
1298    preprocess_arch_std_files(arch_path)
1299    ftw(arch_path, [], preprocess_one_file)
1300
1301  _bcs.compute()
1302  _args.output_file.write('static const char *const big_c_string =\n')
1303  for s in _bcs.big_string:
1304    _args.output_file.write(s)
1305  _args.output_file.write(';\n\n')
1306  for arch in archs:
1307    arch_path = f'{_args.starting_dir}/{arch}'
1308    ftw(arch_path, [], process_one_file)
1309    print_pending_events()
1310    print_pending_metrics()
1311
1312  print_mapping_table(archs)
1313  print_system_mapping_table()
1314  print_metricgroups()
1315
1316if __name__ == '__main__':
1317  main()
1318