1#!/usr/bin/env python3 2# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# SPDX-License-Identifier: GPL-2.0 5 6""" 7Parse ABI documentation and produce results from it. 8""" 9 10from argparse import Namespace 11import logging 12import os 13import re 14 15from pprint import pformat 16from random import randrange, seed 17 18# Import Python modules 19 20from helpers import AbiDebug, ABI_DIR 21 22 23class AbiParser: 24 """Main class to parse ABI files""" 25 26 TAGS = r"(what|where|date|kernelversion|contact|description|users)" 27 XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)" 28 29 def __init__(self, directory, logger=None, 30 enable_lineno=False, show_warnings=True, debug=0): 31 """Stores arguments for the class and initialize class vars""" 32 33 self.directory = directory 34 self.enable_lineno = enable_lineno 35 self.show_warnings = show_warnings 36 self.debug = debug 37 38 if not logger: 39 self.log = logging.getLogger("get_abi") 40 else: 41 self.log = logger 42 43 self.data = {} 44 self.what_symbols = {} 45 self.file_refs = {} 46 self.what_refs = {} 47 48 # Ignore files that contain such suffixes 49 self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~") 50 51 # Regular expressions used on parser 52 self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR) 53 self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I) 54 self.re_valid = re.compile(self.TAGS) 55 self.re_start_spc = re.compile(r"(\s*)(\S.*)") 56 self.re_whitespace = re.compile(r"^\s+") 57 58 # Regular used on print 59 self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})") 60 self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])") 61 self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)") 62 self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n") 63 self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst") 64 self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)") 65 self.re_xref_node = re.compile(self.XREF) 66 67 def warn(self, fdata, msg, extra=None): 68 """Displays a parse error if warning is enabled""" 69 70 if not self.show_warnings: 71 return 72 73 msg = f"{fdata.fname}:{fdata.ln}: {msg}" 74 if extra: 75 msg += "\n\t\t" + extra 76 77 self.log.warning(msg) 78 79 def add_symbol(self, what, fname, ln=None, xref=None): 80 """Create a reference table describing where each 'what' is located""" 81 82 if what not in self.what_symbols: 83 self.what_symbols[what] = {"file": {}} 84 85 if fname not in self.what_symbols[what]["file"]: 86 self.what_symbols[what]["file"][fname] = [] 87 88 if ln and ln not in self.what_symbols[what]["file"][fname]: 89 self.what_symbols[what]["file"][fname].append(ln) 90 91 if xref: 92 self.what_symbols[what]["xref"] = xref 93 94 def _parse_line(self, fdata, line): 95 """Parse a single line of an ABI file""" 96 97 new_what = False 98 new_tag = False 99 content = None 100 101 match = self.re_tag.match(line) 102 if match: 103 new = match.group(1).lower() 104 sep = match.group(2) 105 content = match.group(3) 106 107 match = self.re_valid.search(new) 108 if match: 109 new_tag = match.group(1) 110 else: 111 if fdata.tag == "description": 112 # New "tag" is actually part of description. 113 # Don't consider it a tag 114 new_tag = False 115 elif fdata.tag != "": 116 self.warn(fdata, f"tag '{fdata.tag}' is invalid", line) 117 118 if new_tag: 119 # "where" is Invalid, but was a common mistake. Warn if found 120 if new_tag == "where": 121 self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead") 122 new_tag = "what" 123 124 if new_tag == "what": 125 fdata.space = None 126 127 if content not in self.what_symbols: 128 self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln) 129 130 if fdata.tag == "what": 131 fdata.what.append(content.strip("\n")) 132 else: 133 if fdata.key: 134 if "description" not in self.data.get(fdata.key, {}): 135 self.warn(fdata, f"{fdata.key} doesn't have a description") 136 137 for w in fdata.what: 138 self.add_symbol(what=w, fname=fdata.fname, 139 ln=fdata.what_ln, xref=fdata.key) 140 141 fdata.label = content 142 new_what = True 143 144 key = "abi_" + content.lower() 145 fdata.key = self.re_unprintable.sub("_", key).strip("_") 146 147 # Avoid duplicated keys but using a defined seed, to make 148 # the namespace identical if there aren't changes at the 149 # ABI symbols 150 seed(42) 151 152 while fdata.key in self.data: 153 char = randrange(0, 51) + ord("A") 154 if char > ord("Z"): 155 char += ord("a") - ord("Z") - 1 156 157 fdata.key += chr(char) 158 159 if fdata.key and fdata.key not in self.data: 160 self.data[fdata.key] = { 161 "what": [content], 162 "file": [fdata.file_ref], 163 "line_no": fdata.ln, 164 } 165 166 fdata.what = self.data[fdata.key]["what"] 167 168 self.what_refs[content] = fdata.key 169 fdata.tag = new_tag 170 fdata.what_ln = fdata.ln 171 172 if fdata.nametag["what"]: 173 t = (content, fdata.key) 174 if t not in fdata.nametag["symbols"]: 175 fdata.nametag["symbols"].append(t) 176 177 return 178 179 if fdata.tag and new_tag: 180 fdata.tag = new_tag 181 182 if new_what: 183 fdata.label = "" 184 185 self.data[fdata.key]["type"] = fdata.ftype 186 187 if "description" in self.data[fdata.key]: 188 self.data[fdata.key]["description"] += "\n\n" 189 190 if fdata.file_ref not in self.data[fdata.key]["file"]: 191 self.data[fdata.key]["file"].append(fdata.file_ref) 192 193 if self.debug == AbiDebug.WHAT_PARSING: 194 self.log.debug("what: %s", fdata.what) 195 196 if not fdata.what: 197 self.warn(fdata, "'What:' should come first:", line) 198 return 199 200 if new_tag == "description": 201 fdata.space = None 202 203 if content: 204 sep = sep.replace(":", " ") 205 206 c = " " * len(new_tag) + sep + content 207 c = c.expandtabs() 208 209 match = self.re_start_spc.match(c) 210 if match: 211 # Preserve initial spaces for the first line 212 fdata.space = match.group(1) 213 content = match.group(2) + "\n" 214 215 self.data[fdata.key][fdata.tag] = content 216 217 return 218 219 # Store any contents before tags at the database 220 if not fdata.tag and "what" in fdata.nametag: 221 fdata.nametag["description"] += line 222 return 223 224 if fdata.tag == "description": 225 content = line.expandtabs() 226 227 if self.re_whitespace.sub("", content) == "": 228 self.data[fdata.key][fdata.tag] += "\n" 229 return 230 231 if fdata.space is None: 232 match = self.re_start_spc.match(content) 233 if match: 234 # Preserve initial spaces for the first line 235 fdata.space = match.group(1) 236 237 content = match.group(2) + "\n" 238 else: 239 if content.startswith(fdata.space): 240 content = content[len(fdata.space):] 241 242 else: 243 fdata.space = "" 244 245 if fdata.tag == "what": 246 w = content.strip("\n") 247 if w: 248 self.data[fdata.key][fdata.tag].append(w) 249 else: 250 self.data[fdata.key][fdata.tag] += content 251 return 252 253 content = line.strip() 254 if fdata.tag: 255 if fdata.tag == "what": 256 w = content.strip("\n") 257 if w: 258 self.data[fdata.key][fdata.tag].append(w) 259 else: 260 self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n") 261 return 262 263 # Everything else is error 264 if content: 265 self.warn(fdata, "Unexpected content", line) 266 267 def parse_file(self, fname, path, basename): 268 """Parse a single file""" 269 270 ref = f"abi_file_{path}_{basename}" 271 ref = self.re_unprintable.sub("_", ref).strip("_") 272 273 # Store per-file state into a namespace variable. This will be used 274 # by the per-line parser state machine and by the warning function. 275 fdata = Namespace 276 277 fdata.fname = fname 278 fdata.name = basename 279 280 pos = fname.find(ABI_DIR) 281 if pos > 0: 282 f = fname[pos:] 283 else: 284 f = fname 285 286 fdata.file_ref = (f, ref) 287 self.file_refs[f] = ref 288 289 fdata.ln = 0 290 fdata.what_ln = 0 291 fdata.tag = "" 292 fdata.label = "" 293 fdata.what = [] 294 fdata.key = None 295 fdata.xrefs = None 296 fdata.space = None 297 fdata.ftype = path.split("/")[0] 298 299 fdata.nametag = {} 300 fdata.nametag["what"] = [f"File {path}/{basename}"] 301 fdata.nametag["type"] = "File" 302 fdata.nametag["file"] = [fdata.file_ref] 303 fdata.nametag["line_no"] = 1 304 fdata.nametag["description"] = "" 305 fdata.nametag["symbols"] = [] 306 307 self.data[ref] = fdata.nametag 308 309 if self.debug & AbiDebug.WHAT_OPEN: 310 self.log.debug("Opening file %s", fname) 311 312 with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: 313 for line in fp: 314 fdata.ln += 1 315 316 self._parse_line(fdata, line) 317 318 if "description" in fdata.nametag: 319 fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n") 320 321 if fdata.key: 322 if "description" not in self.data.get(fdata.key, {}): 323 self.warn(fdata, f"{fdata.key} doesn't have a description") 324 325 for w in fdata.what: 326 self.add_symbol(what=w, fname=fname, xref=fdata.key) 327 328 def _parse_abi(self, root=None): 329 """Internal function to parse documentation ABI recursively""" 330 331 if not root: 332 root = self.directory 333 334 with os.scandir(root) as obj: 335 for entry in obj: 336 name = os.path.join(root, entry.name) 337 338 if entry.is_dir(): 339 self._parse_abi(name) 340 continue 341 342 if not entry.is_file(): 343 continue 344 345 basename = os.path.basename(name) 346 347 if basename == "README": 348 continue 349 350 if basename.startswith("."): 351 continue 352 353 if basename.endswith(self.ignore_suffixes): 354 continue 355 356 path = self.re_abi_dir.sub("", os.path.dirname(name)) 357 358 self.parse_file(name, path, basename) 359 360 def parse_abi(self, root=None): 361 """Parse documentation ABI""" 362 363 self._parse_abi(root) 364 365 if self.debug & AbiDebug.DUMP_ABI_STRUCTS: 366 self.log.debug(pformat(self.data)) 367 368 def desc_txt(self, desc): 369 """Print description as found inside ABI files""" 370 371 desc = desc.strip(" \t\n") 372 373 return desc + "\n\n" 374 375 def desc_rst(self, desc): 376 """Enrich ReST output by creating cross-references""" 377 378 # Remove title markups from the description 379 # Having titles inside ABI files will only work if extra 380 # care would be taken in order to strictly follow the same 381 # level order for each markup. 382 desc = self.re_title_mark.sub("\n\n", "\n" + desc) 383 desc = desc.rstrip(" \t\n").lstrip("\n") 384 385 # Python's regex performance for non-compiled expressions is a lot 386 # than Perl, as Perl automatically caches them at their 387 # first usage. Here, we'll need to do the same, as otherwise the 388 # performance penalty is be high 389 390 new_desc = "" 391 for d in desc.split("\n"): 392 if d == "": 393 new_desc += "\n" 394 continue 395 396 # Use cross-references for doc files where needed 397 d = self.re_doc.sub(r":doc:`/\1`", d) 398 399 # Use cross-references for ABI generated docs where needed 400 matches = self.re_abi.findall(d) 401 for m in matches: 402 abi = m[0] + m[1] 403 404 xref = self.file_refs.get(abi) 405 if not xref: 406 # This may happen if ABI is on a separate directory, 407 # like parsing ABI testing and symbol is at stable. 408 # The proper solution is to move this part of the code 409 # for it to be inside sphinx/kernel_abi.py 410 self.log.info("Didn't find ABI reference for '%s'", abi) 411 else: 412 new = self.re_escape.sub(r"\\\1", m[1]) 413 d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d) 414 415 # Seek for cross reference symbols like /sys/... 416 # Need to be careful to avoid doing it on a code block 417 if d[0] not in [" ", "\t"]: 418 matches = self.re_xref_node.findall(d) 419 for m in matches: 420 # Finding ABI here is more complex due to wildcards 421 xref = self.what_refs.get(m) 422 if xref: 423 new = self.re_escape.sub(r"\\\1", m) 424 d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d) 425 426 new_desc += d + "\n" 427 428 return new_desc + "\n\n" 429 430 def doc(self, output_in_txt=False, show_file=True): 431 """Print ABI at stdout""" 432 433 part = None 434 for key, v in sorted(self.data.items(), 435 key=lambda x: (x[1].get("type", ""), 436 x[1].get("what"))): 437 438 wtype = v.get("type", "Var") 439 file_ref = v.get("file") 440 names = v.get("what", [""]) 441 442 if not show_file and wtype == "File": 443 continue 444 445 msg = "" 446 447 if wtype != "File": 448 cur_part = names[0] 449 if cur_part.find("/") >= 0: 450 match = self.re_what.match(cur_part) 451 if match: 452 symbol = match.group(1).rstrip("/") 453 cur_part = "Symbols under " + symbol 454 455 if cur_part and cur_part != part: 456 part = cur_part 457 msg += f"{part}\n{"-" * len(part)}\n\n" 458 459 msg += f".. _{key}:\n\n" 460 461 max_len = 0 462 for i in range(0, len(names)): # pylint: disable=C0200 463 names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**" 464 465 max_len = max(max_len, len(names[i])) 466 467 msg += "+-" + "-" * max_len + "-+\n" 468 for name in names: 469 msg += f"| {name}" + " " * (max_len - len(name)) + " |\n" 470 msg += "+-" + "-" * max_len + "-+\n" 471 msg += "\n" 472 473 for ref in file_ref: 474 if wtype == "File": 475 msg += f".. _{ref[1]}:\n\n" 476 else: 477 base = os.path.basename(ref[0]) 478 msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n" 479 480 if wtype == "File": 481 msg += f"{names[0]}\n{"-" * len(names[0])}\n\n" 482 483 desc = v.get("description") 484 if not desc and wtype != "File": 485 msg += f"DESCRIPTION MISSING for {names[0]}\n\n" 486 487 if desc: 488 if output_in_txt: 489 msg += self.desc_txt(desc) 490 else: 491 msg += self.desc_rst(desc) 492 493 symbols = v.get("symbols") 494 if symbols: 495 msg += "Has the following ABI:\n\n" 496 497 for w, label in symbols: 498 # Escape special chars from content 499 content = self.re_escape.sub(r"\\\1", w) 500 501 msg += f"- :ref:`{content} <{label}>`\n\n" 502 503 users = v.get("users") 504 if users and users.strip(" \t\n"): 505 msg += f"Users:\n\t{users.strip("\n").replace('\n', '\n\t')}\n\n" 506 507 ln = v.get("line_no", 1) 508 509 yield (msg, file_ref[0][0], ln) 510 511 def check_issues(self): 512 """Warn about duplicated ABI entries""" 513 514 for what, v in self.what_symbols.items(): 515 files = v.get("file") 516 if not files: 517 # Should never happen if the parser works properly 518 self.log.warning("%s doesn't have a file associated", what) 519 continue 520 521 if len(files) == 1: 522 continue 523 524 f = [] 525 for fname, lines in sorted(files.items()): 526 if not lines: 527 f.append(f"{fname}") 528 elif len(lines) == 1: 529 f.append(f"{fname}:{lines[0]}") 530 else: 531 f.append(f"{fname} lines {", ".join(str(x) for x in lines)}") 532 533 self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f)) 534 535 def search_symbols(self, expr): 536 """ Searches for ABI symbols """ 537 538 regex = re.compile(expr, re.I) 539 540 found_keys = 0 541 for t in sorted(self.data.items(), key=lambda x: [0]): 542 v = t[1] 543 544 wtype = v.get("type", "") 545 if wtype == "File": 546 continue 547 548 for what in v.get("what", [""]): 549 if regex.search(what): 550 found_keys += 1 551 552 kernelversion = v.get("kernelversion", "").strip(" \t\n") 553 date = v.get("date", "").strip(" \t\n") 554 contact = v.get("contact", "").strip(" \t\n") 555 users = v.get("users", "").strip(" \t\n") 556 desc = v.get("description", "").strip(" \t\n") 557 558 files = [] 559 for f in v.get("file", ()): 560 files.append(f[0]) 561 562 what = str(found_keys) + ". " + what 563 title_tag = "-" * len(what) 564 565 print(f"\n{what}\n{title_tag}\n") 566 567 if kernelversion: 568 print(f"Kernel version:\t\t{kernelversion}") 569 570 if date: 571 print(f"Date:\t\t\t{date}") 572 573 if contact: 574 print(f"Contact:\t\t{contact}") 575 576 if users: 577 print(f"Users:\t\t\t{users}") 578 579 print(f"Defined on file{'s'[:len(files) ^ 1]}:\t{", ".join(files)}") 580 581 if desc: 582 print(f"\n{desc.strip("\n")}\n") 583 584 if not found_keys: 585 print(f"Regular expression /{expr}/ not found.") 586