1#!/usr/bin/env python3 2# xxpylint: disable=R0903 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# SPDX-License-Identifier: GPL-2.0 5 6""" 7Convert ABI what into regular expressions 8""" 9 10import re 11import sys 12 13from pprint import pformat 14 15from abi.abi_parser import AbiParser 16from abi.helpers import AbiDebug 17 18class AbiRegex(AbiParser): 19 """ 20 Extends AbiParser to search ABI nodes with regular expressions. 21 22 There some optimizations here to allow a quick symbol search: 23 instead of trying to place all symbols altogether an doing linear 24 search which is very time consuming, create a tree with one depth, 25 grouping similar symbols altogether. 26 27 Yet, sometimes a full search will be needed, so we have a special branch 28 on such group tree where other symbols are placed. 29 """ 30 31 #: Escape only ASCII visible characters. 32 escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])" 33 34 #: Special group for other nodes. 35 leave_others = "others" 36 37 # Tuples with regular expressions to be compiled and replacement data 38 re_whats = [ 39 # Drop escape characters that might exist 40 (re.compile("\\\\"), ""), 41 42 # Temporarily escape dot characters 43 (re.compile(r"\."), "\xf6"), 44 45 # Temporarily change [0-9]+ type of patterns 46 (re.compile(r"\[0\-9\]\+"), "\xff"), 47 48 # Temporarily change [\d+-\d+] type of patterns 49 (re.compile(r"\[0\-\d+\]"), "\xff"), 50 (re.compile(r"\[0:\d+\]"), "\xff"), 51 (re.compile(r"\[(\d+)\]"), "\xf4\\\\d+\xf5"), 52 53 # Temporarily change [0-9] type of patterns 54 (re.compile(r"\[(\d)\-(\d)\]"), "\xf4\1-\2\xf5"), 55 56 # Handle multiple option patterns 57 (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"), 58 59 # Handle wildcards 60 (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"), 61 (re.compile(r"/\*/"), "/.*/"), 62 (re.compile(r"/\xf6\xf6\xf6"), "/.*"), 63 (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"), 64 (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"), 65 (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"), 66 67 (re.compile(r"XX+"), "\\\\w\xf7"), 68 (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"), 69 (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"), 70 (re.compile(r"_[AB]_"), "_\\\\w\xf7_"), 71 72 # Recover [0-9] type of patterns 73 (re.compile(r"\xf4"), "["), 74 (re.compile(r"\xf5"), "]"), 75 76 # Remove duplicated spaces 77 (re.compile(r"\s+"), r" "), 78 79 # Special case: drop comparison as in: 80 # What: foo = <something> 81 # (this happens on a few IIO definitions) 82 (re.compile(r"\s*\=.*$"), ""), 83 84 # Escape all other symbols 85 (re.compile(escape_symbols), r"\\\1"), 86 (re.compile(r"\\\\"), r"\\"), 87 (re.compile(r"\\([\[\]\(\)\|])"), r"\1"), 88 (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"), 89 90 (re.compile(r"\xff"), r"\\d+"), 91 92 # Special case: IIO ABI which a parenthesis. 93 (re.compile(r"sqrt(.*)"), r"sqrt(.*)"), 94 95 # Simplify regexes with multiple .* 96 (re.compile(r"(?:\.\*){2,}"), ""), 97 98 # Recover dot characters 99 (re.compile(r"\xf6"), "\\."), 100 # Recover plus characters 101 (re.compile(r"\xf7"), "+"), 102 ] 103 104 #: Regex to check if the symbol name has a number on it. 105 re_has_num = re.compile(r"\\d") 106 107 #: Symbol name after escape_chars that are considered a devnode basename. 108 re_symbol_name = re.compile(r"(\w|\\[\.\-\:])+$") 109 110 #: List of popular group names to be skipped to minimize regex group size 111 #: Use AbiDebug.SUBGROUP_SIZE to detect those. 112 skip_names = set(["devices", "hwmon"]) 113 114 def regex_append(self, what, new): 115 """ 116 Get a search group for a subset of regular expressions. 117 118 As ABI may have thousands of symbols, using a for to search all 119 regular expressions is at least O(n^2). When there are wildcards, 120 the complexity increases substantially, eventually becoming exponential. 121 122 To avoid spending too much time on them, use a logic to split 123 them into groups. The smaller the group, the better, as it would 124 mean that searches will be confined to a small number of regular 125 expressions. 126 127 The conversion to a regex subset is tricky, as we need something 128 that can be easily obtained from the sysfs symbol and from the 129 regular expression. So, we need to discard nodes that have 130 wildcards. 131 132 If it can't obtain a subgroup, place the regular expression inside 133 a special group (self.leave_others). 134 """ 135 136 search_group = None 137 138 for search_group in reversed(new.split("/")): 139 if not search_group or search_group in self.skip_names: 140 continue 141 if self.re_symbol_name.match(search_group): 142 break 143 144 if not search_group: 145 search_group = self.leave_others 146 147 if self.debug & AbiDebug.SUBGROUP_MAP: 148 self.log.debug("%s: mapped as %s", what, search_group) 149 150 try: 151 if search_group not in self.regex_group: 152 self.regex_group[search_group] = [] 153 154 self.regex_group[search_group].append(re.compile(new)) 155 if self.search_string: 156 if what.find(self.search_string) >= 0: 157 print(f"What: {what}") 158 except re.PatternError: 159 self.log.warning("Ignoring '%s' as it produced an invalid regex:\n" 160 " '%s'", what, new) 161 162 def get_regexes(self, what): 163 """ 164 Given an ABI devnode, return a list of all regular expressions that 165 may match it, based on the sub-groups created by regex_append(). 166 """ 167 168 re_list = [] 169 170 patches = what.split("/") 171 patches.reverse() 172 patches.append(self.leave_others) 173 174 for search_group in patches: 175 if search_group in self.regex_group: 176 re_list += self.regex_group[search_group] 177 178 return re_list 179 180 def __init__(self, *args, **kwargs): 181 """ 182 Override init method to get verbose argument 183 """ 184 185 self.regex_group = None 186 self.search_string = None 187 self.re_string = None 188 189 if "search_string" in kwargs: 190 self.search_string = kwargs.get("search_string") 191 del kwargs["search_string"] 192 193 if self.search_string: 194 195 try: 196 self.re_string = re.compile(self.search_string) 197 except re.PatternError as e: 198 msg = f"{self.search_string} is not a valid regular expression" 199 raise ValueError(msg) from e 200 201 super().__init__(*args, **kwargs) 202 203 def parse_abi(self, *args, **kwargs): 204 205 super().parse_abi(*args, **kwargs) 206 207 self.regex_group = {} 208 209 print("Converting ABI What fields into regexes...", file=sys.stderr) 210 211 for t in sorted(self.data.items(), key=lambda x: x[0]): 212 v = t[1] 213 if v.get("type") == "File": 214 continue 215 216 v["regex"] = [] 217 218 for what in v.get("what", []): 219 if not what.startswith("/sys"): 220 continue 221 222 new = what 223 for r, s in self.re_whats: 224 try: 225 new = r.sub(s, new) 226 except re.PatternError as e: 227 # Help debugging troubles with new regexes 228 raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e 229 230 v["regex"].append(new) 231 232 if self.debug & AbiDebug.REGEX: 233 self.log.debug("%-90s <== %s", new, what) 234 235 # Store regex into a subgroup to speedup searches 236 self.regex_append(what, new) 237 238 if self.debug & AbiDebug.SUBGROUP_DICT: 239 self.log.debug("%s", pformat(self.regex_group)) 240 241 if self.debug & AbiDebug.SUBGROUP_SIZE: 242 biggestd_keys = sorted(self.regex_group.keys(), 243 key= lambda k: len(self.regex_group[k]), 244 reverse=True) 245 246 print("Top regex subgroups:", file=sys.stderr) 247 for k in biggestd_keys[:10]: 248 print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr) 249