xref: /linux/tools/lib/python/abi/abi_regex.py (revision 23b0f90ba871f096474e1c27c3d14f455189d2d9)
1#!/usr/bin/env python3
2# xxpylint: disable=R0903
3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4# SPDX-License-Identifier: GPL-2.0
5
6"""
7Convert ABI what into regular expressions
8"""
9
10import re
11import sys
12
13from pprint import pformat
14
15from abi.abi_parser import AbiParser
16from abi.helpers import AbiDebug
17
18class AbiRegex(AbiParser):
19    """
20    Extends AbiParser to search ABI nodes with regular expressions.
21
22    There some optimizations here to allow a quick symbol search:
23    instead of trying to place all symbols altogether an doing linear
24    search which is very time consuming, create a tree with one depth,
25    grouping similar symbols altogether.
26
27    Yet, sometimes a full search will be needed, so we have a special branch
28    on such group tree where other symbols are placed.
29    """
30
31    #: Escape only ASCII visible characters.
32    escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])"
33
34    #: Special group for other nodes.
35    leave_others = "others"
36
37    # Tuples with regular expressions to be compiled and replacement data
38    re_whats = [
39        # Drop escape characters that might exist
40        (re.compile("\\\\"), ""),
41
42        # Temporarily escape dot characters
43        (re.compile(r"\."),  "\xf6"),
44
45        # Temporarily change [0-9]+ type of patterns
46        (re.compile(r"\[0\-9\]\+"),  "\xff"),
47
48        # Temporarily change [\d+-\d+] type of patterns
49        (re.compile(r"\[0\-\d+\]"),  "\xff"),
50        (re.compile(r"\[0:\d+\]"),  "\xff"),
51        (re.compile(r"\[(\d+)\]"),  "\xf4\\\\d+\xf5"),
52
53        # Temporarily change [0-9] type of patterns
54        (re.compile(r"\[(\d)\-(\d)\]"),  "\xf4\1-\2\xf5"),
55
56        # Handle multiple option patterns
57        (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"),
58
59        # Handle wildcards
60        (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"),
61        (re.compile(r"/\*/"), "/.*/"),
62        (re.compile(r"/\xf6\xf6\xf6"), "/.*"),
63        (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"),
64        (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"),
65        (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"),
66
67        (re.compile(r"XX+"), "\\\\w\xf7"),
68        (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"),
69        (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"),
70        (re.compile(r"_[AB]_"), "_\\\\w\xf7_"),
71
72        # Recover [0-9] type of patterns
73        (re.compile(r"\xf4"), "["),
74        (re.compile(r"\xf5"),  "]"),
75
76        # Remove duplicated spaces
77        (re.compile(r"\s+"), r" "),
78
79        # Special case: drop comparison as in:
80        # What: foo = <something>
81        # (this happens on a few IIO definitions)
82        (re.compile(r"\s*\=.*$"), ""),
83
84        # Escape all other symbols
85        (re.compile(escape_symbols), r"\\\1"),
86        (re.compile(r"\\\\"), r"\\"),
87        (re.compile(r"\\([\[\]\(\)\|])"), r"\1"),
88        (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"),
89
90        (re.compile(r"\xff"), r"\\d+"),
91
92        # Special case: IIO ABI which a parenthesis.
93        (re.compile(r"sqrt(.*)"), r"sqrt(.*)"),
94
95        # Simplify regexes with multiple .*
96        (re.compile(r"(?:\.\*){2,}"),  ""),
97
98        # Recover dot characters
99        (re.compile(r"\xf6"), "\\."),
100        # Recover plus characters
101        (re.compile(r"\xf7"), "+"),
102    ]
103
104    #: Regex to check if the symbol name has a number on it.
105    re_has_num = re.compile(r"\\d")
106
107    #: Symbol name after escape_chars that are considered a devnode basename.
108    re_symbol_name =  re.compile(r"(\w|\\[\.\-\:])+$")
109
110    #: List of popular group names to be skipped to minimize regex group size
111    #: Use AbiDebug.SUBGROUP_SIZE to detect those.
112    skip_names = set(["devices", "hwmon"])
113
114    def regex_append(self, what, new):
115        """
116        Get a search group for a subset of regular expressions.
117
118        As ABI may have thousands of symbols, using a for to search all
119        regular expressions is at least O(n^2). When there are wildcards,
120        the complexity increases substantially, eventually becoming exponential.
121
122        To avoid spending too much time on them, use a logic to split
123        them into groups. The smaller the group, the better, as it would
124        mean that searches will be confined to a small number of regular
125        expressions.
126
127        The conversion to a regex subset is tricky, as we need something
128        that can be easily obtained from the sysfs symbol and from the
129        regular expression. So, we need to discard nodes that have
130        wildcards.
131
132        If it can't obtain a subgroup, place the regular expression inside
133        a special group (self.leave_others).
134        """
135
136        search_group = None
137
138        for search_group in reversed(new.split("/")):
139            if not search_group or search_group in self.skip_names:
140                continue
141            if self.re_symbol_name.match(search_group):
142                break
143
144        if not search_group:
145            search_group = self.leave_others
146
147        if self.debug & AbiDebug.SUBGROUP_MAP:
148            self.log.debug("%s: mapped as %s", what, search_group)
149
150        try:
151            if search_group not in self.regex_group:
152                self.regex_group[search_group] = []
153
154            self.regex_group[search_group].append(re.compile(new))
155            if self.search_string:
156                if what.find(self.search_string) >= 0:
157                    print(f"What: {what}")
158        except re.PatternError:
159            self.log.warning("Ignoring '%s' as it produced an invalid regex:\n"
160                             "           '%s'", what, new)
161
162    def get_regexes(self, what):
163        """
164        Given an ABI devnode, return a list of all regular expressions that
165        may match it, based on the sub-groups created by regex_append().
166        """
167
168        re_list = []
169
170        patches = what.split("/")
171        patches.reverse()
172        patches.append(self.leave_others)
173
174        for search_group in patches:
175            if search_group in self.regex_group:
176                re_list += self.regex_group[search_group]
177
178        return re_list
179
180    def __init__(self, *args, **kwargs):
181        """
182        Override init method to get verbose argument
183        """
184
185        self.regex_group = None
186        self.search_string = None
187        self.re_string = None
188
189        if "search_string" in kwargs:
190            self.search_string = kwargs.get("search_string")
191            del kwargs["search_string"]
192
193            if self.search_string:
194
195                try:
196                    self.re_string = re.compile(self.search_string)
197                except re.PatternError as e:
198                    msg = f"{self.search_string} is not a valid regular expression"
199                    raise ValueError(msg) from e
200
201        super().__init__(*args, **kwargs)
202
203    def parse_abi(self, *args, **kwargs):
204
205        super().parse_abi(*args, **kwargs)
206
207        self.regex_group = {}
208
209        print("Converting ABI What fields into regexes...", file=sys.stderr)
210
211        for t in sorted(self.data.items(), key=lambda x: x[0]):
212            v = t[1]
213            if v.get("type") == "File":
214                continue
215
216            v["regex"] = []
217
218            for what in v.get("what", []):
219                if not what.startswith("/sys"):
220                    continue
221
222                new = what
223                for r, s in self.re_whats:
224                    try:
225                        new = r.sub(s, new)
226                    except re.PatternError as e:
227                        # Help debugging troubles with new regexes
228                        raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e
229
230                v["regex"].append(new)
231
232                if self.debug & AbiDebug.REGEX:
233                    self.log.debug("%-90s <== %s", new, what)
234
235                # Store regex into a subgroup to speedup searches
236                self.regex_append(what, new)
237
238        if self.debug & AbiDebug.SUBGROUP_DICT:
239            self.log.debug("%s", pformat(self.regex_group))
240
241        if self.debug & AbiDebug.SUBGROUP_SIZE:
242            biggestd_keys = sorted(self.regex_group.keys(),
243                                   key= lambda k: len(self.regex_group[k]),
244                                   reverse=True)
245
246            print("Top regex subgroups:", file=sys.stderr)
247            for k in biggestd_keys[:10]:
248                print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr)
249