1#!/usr/bin/env python3 2# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# SPDX-License-Identifier: GPL-2.0 5 6""" 7Parse ABI documentation and produce results from it. 8""" 9 10from argparse import Namespace 11import logging 12import os 13import re 14 15from pprint import pformat 16from random import randrange, seed 17 18# Import Python modules 19 20from abi.helpers import AbiDebug, ABI_DIR 21 22 23class AbiParser: 24 """Main class to parse ABI files.""" 25 26 #: Valid tags at Documentation/ABI. 27 TAGS = r"(what|where|date|kernelversion|contact|description|users)" 28 29 #: ABI elements that will auto-generate cross-references. 30 XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)" 31 32 def __init__(self, directory, logger=None, 33 enable_lineno=False, show_warnings=True, debug=0): 34 """Stores arguments for the class and initialize class vars.""" 35 36 self.directory = directory 37 self.enable_lineno = enable_lineno 38 self.show_warnings = show_warnings 39 self.debug = debug 40 41 if not logger: 42 self.log = logging.getLogger("get_abi") 43 else: 44 self.log = logger 45 46 self.data = {} 47 self.what_symbols = {} 48 self.file_refs = {} 49 self.what_refs = {} 50 51 # Ignore files that contain such suffixes 52 self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~") 53 54 # Regular expressions used on parser 55 self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR) 56 self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I) 57 self.re_valid = re.compile(self.TAGS) 58 self.re_start_spc = re.compile(r"(\s*)(\S.*)") 59 self.re_whitespace = re.compile(r"^\s+") 60 61 # Regular used on print 62 self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})") 63 self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])") 64 self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)") 65 self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n") 66 self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst") 67 self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)") 68 self.re_xref_node = re.compile(self.XREF) 69 70 def warn(self, fdata, msg, extra=None): 71 """Displays a parse error if warning is enabled.""" 72 73 if not self.show_warnings: 74 return 75 76 msg = f"{fdata.fname}:{fdata.ln}: {msg}" 77 if extra: 78 msg += "\n\t\t" + extra 79 80 self.log.warning(msg) 81 82 def add_symbol(self, what, fname, ln=None, xref=None): 83 """Create a reference table describing where each 'what' is located.""" 84 85 if what not in self.what_symbols: 86 self.what_symbols[what] = {"file": {}} 87 88 if fname not in self.what_symbols[what]["file"]: 89 self.what_symbols[what]["file"][fname] = [] 90 91 if ln and ln not in self.what_symbols[what]["file"][fname]: 92 self.what_symbols[what]["file"][fname].append(ln) 93 94 if xref: 95 self.what_symbols[what]["xref"] = xref 96 97 def _parse_line(self, fdata, line): 98 """Parse a single line of an ABI file.""" 99 100 new_what = False 101 new_tag = False 102 content = None 103 104 match = self.re_tag.match(line) 105 if match: 106 new = match.group(1).lower() 107 sep = match.group(2) 108 content = match.group(3) 109 110 match = self.re_valid.search(new) 111 if match: 112 new_tag = match.group(1) 113 else: 114 if fdata.tag == "description": 115 # New "tag" is actually part of description. 116 # Don't consider it a tag 117 new_tag = False 118 elif fdata.tag != "": 119 self.warn(fdata, f"tag '{fdata.tag}' is invalid", line) 120 121 if new_tag: 122 # "where" is Invalid, but was a common mistake. Warn if found 123 if new_tag == "where": 124 self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead") 125 new_tag = "what" 126 127 if new_tag == "what": 128 fdata.space = None 129 130 if content not in self.what_symbols: 131 self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln) 132 133 if fdata.tag == "what": 134 fdata.what.append(content.strip("\n")) 135 else: 136 if fdata.key: 137 if "description" not in self.data.get(fdata.key, {}): 138 self.warn(fdata, f"{fdata.key} doesn't have a description") 139 140 for w in fdata.what: 141 self.add_symbol(what=w, fname=fdata.fname, 142 ln=fdata.what_ln, xref=fdata.key) 143 144 fdata.label = content 145 new_what = True 146 147 key = "abi_" + content.lower() 148 fdata.key = self.re_unprintable.sub("_", key).strip("_") 149 150 # Avoid duplicated keys but using a defined seed, to make 151 # the namespace identical if there aren't changes at the 152 # ABI symbols 153 seed(42) 154 155 while fdata.key in self.data: 156 char = randrange(0, 51) + ord("A") 157 if char > ord("Z"): 158 char += ord("a") - ord("Z") - 1 159 160 fdata.key += chr(char) 161 162 if fdata.key and fdata.key not in self.data: 163 self.data[fdata.key] = { 164 "what": [content], 165 "file": [fdata.file_ref], 166 "path": fdata.ftype, 167 "line_no": fdata.ln, 168 } 169 170 fdata.what = self.data[fdata.key]["what"] 171 172 self.what_refs[content] = fdata.key 173 fdata.tag = new_tag 174 fdata.what_ln = fdata.ln 175 176 if fdata.nametag["what"]: 177 t = (content, fdata.key) 178 if t not in fdata.nametag["symbols"]: 179 fdata.nametag["symbols"].append(t) 180 181 return 182 183 if fdata.tag and new_tag: 184 fdata.tag = new_tag 185 186 if new_what: 187 fdata.label = "" 188 189 if "description" in self.data[fdata.key]: 190 self.data[fdata.key]["description"] += "\n\n" 191 192 if fdata.file_ref not in self.data[fdata.key]["file"]: 193 self.data[fdata.key]["file"].append(fdata.file_ref) 194 195 if self.debug == AbiDebug.WHAT_PARSING: 196 self.log.debug("what: %s", fdata.what) 197 198 if not fdata.what: 199 self.warn(fdata, "'What:' should come first:", line) 200 return 201 202 if new_tag == "description": 203 fdata.space = None 204 205 if content: 206 sep = sep.replace(":", " ") 207 208 c = " " * len(new_tag) + sep + content 209 c = c.expandtabs() 210 211 match = self.re_start_spc.match(c) 212 if match: 213 # Preserve initial spaces for the first line 214 fdata.space = match.group(1) 215 content = match.group(2) + "\n" 216 217 self.data[fdata.key][fdata.tag] = content 218 219 return 220 221 # Store any contents before tags at the database 222 if not fdata.tag and "what" in fdata.nametag: 223 fdata.nametag["description"] += line 224 return 225 226 if fdata.tag == "description": 227 content = line.expandtabs() 228 229 if self.re_whitespace.sub("", content) == "": 230 self.data[fdata.key][fdata.tag] += "\n" 231 return 232 233 if fdata.space is None: 234 match = self.re_start_spc.match(content) 235 if match: 236 # Preserve initial spaces for the first line 237 fdata.space = match.group(1) 238 239 content = match.group(2) + "\n" 240 else: 241 if content.startswith(fdata.space): 242 content = content[len(fdata.space):] 243 244 else: 245 fdata.space = "" 246 247 if fdata.tag == "what": 248 w = content.strip("\n") 249 if w: 250 self.data[fdata.key][fdata.tag].append(w) 251 else: 252 self.data[fdata.key][fdata.tag] += content 253 return 254 255 content = line.strip() 256 if fdata.tag: 257 if fdata.tag == "what": 258 w = content.strip("\n") 259 if w: 260 self.data[fdata.key][fdata.tag].append(w) 261 else: 262 self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n") 263 return 264 265 # Everything else is error 266 if content: 267 self.warn(fdata, "Unexpected content", line) 268 269 def parse_readme(self, nametag, fname): 270 """Parse ABI README file.""" 271 272 nametag["what"] = ["Introduction"] 273 nametag["path"] = "README" 274 with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: 275 for line in fp: 276 match = self.re_tag.match(line) 277 if match: 278 new = match.group(1).lower() 279 280 match = self.re_valid.search(new) 281 if match: 282 nametag["description"] += "\n:" + line 283 continue 284 285 nametag["description"] += line 286 287 def parse_file(self, fname, path, basename): 288 """Parse a single file.""" 289 290 ref = f"abi_file_{path}_{basename}" 291 ref = self.re_unprintable.sub("_", ref).strip("_") 292 293 # Store per-file state into a namespace variable. This will be used 294 # by the per-line parser state machine and by the warning function. 295 fdata = Namespace 296 297 fdata.fname = fname 298 fdata.name = basename 299 300 pos = fname.find(ABI_DIR) 301 if pos > 0: 302 f = fname[pos:] 303 else: 304 f = fname 305 306 fdata.file_ref = (f, ref) 307 self.file_refs[f] = ref 308 309 fdata.ln = 0 310 fdata.what_ln = 0 311 fdata.tag = "" 312 fdata.label = "" 313 fdata.what = [] 314 fdata.key = None 315 fdata.xrefs = None 316 fdata.space = None 317 fdata.ftype = path.split("/")[0] 318 319 fdata.nametag = {} 320 fdata.nametag["what"] = [f"ABI file {path}/{basename}"] 321 fdata.nametag["type"] = "File" 322 fdata.nametag["path"] = fdata.ftype 323 fdata.nametag["file"] = [fdata.file_ref] 324 fdata.nametag["line_no"] = 1 325 fdata.nametag["description"] = "" 326 fdata.nametag["symbols"] = [] 327 328 self.data[ref] = fdata.nametag 329 330 if self.debug & AbiDebug.WHAT_OPEN: 331 self.log.debug("Opening file %s", fname) 332 333 if basename == "README": 334 self.parse_readme(fdata.nametag, fname) 335 return 336 337 with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: 338 for line in fp: 339 fdata.ln += 1 340 341 self._parse_line(fdata, line) 342 343 if "description" in fdata.nametag: 344 fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n") 345 346 if fdata.key: 347 if "description" not in self.data.get(fdata.key, {}): 348 self.warn(fdata, f"{fdata.key} doesn't have a description") 349 350 for w in fdata.what: 351 self.add_symbol(what=w, fname=fname, xref=fdata.key) 352 353 def _parse_abi(self, root=None): 354 """Internal function to parse documentation ABI recursively.""" 355 356 if not root: 357 root = self.directory 358 359 with os.scandir(root) as obj: 360 for entry in obj: 361 name = os.path.join(root, entry.name) 362 363 if entry.is_dir(): 364 self._parse_abi(name) 365 continue 366 367 if not entry.is_file(): 368 continue 369 370 basename = os.path.basename(name) 371 372 if basename.startswith("."): 373 continue 374 375 if basename.endswith(self.ignore_suffixes): 376 continue 377 378 path = self.re_abi_dir.sub("", os.path.dirname(name)) 379 380 self.parse_file(name, path, basename) 381 382 def parse_abi(self, root=None): 383 """Parse documentation ABI.""" 384 385 self._parse_abi(root) 386 387 if self.debug & AbiDebug.DUMP_ABI_STRUCTS: 388 self.log.debug(pformat(self.data)) 389 390 def desc_txt(self, desc): 391 """Print description as found inside ABI files.""" 392 393 desc = desc.strip(" \t\n") 394 395 return desc + "\n\n" 396 397 def xref(self, fname): 398 """ 399 Converts a Documentation/ABI + basename into a ReST cross-reference. 400 """ 401 402 xref = self.file_refs.get(fname) 403 if not xref: 404 return None 405 else: 406 return xref 407 408 def desc_rst(self, desc): 409 """Enrich ReST output by creating cross-references.""" 410 411 # Remove title markups from the description 412 # Having titles inside ABI files will only work if extra 413 # care would be taken in order to strictly follow the same 414 # level order for each markup. 415 desc = self.re_title_mark.sub("\n\n", "\n" + desc) 416 desc = desc.rstrip(" \t\n").lstrip("\n") 417 418 # Python's regex performance for non-compiled expressions is a lot 419 # than Perl, as Perl automatically caches them at their 420 # first usage. Here, we'll need to do the same, as otherwise the 421 # performance penalty is be high 422 423 new_desc = "" 424 for d in desc.split("\n"): 425 if d == "": 426 new_desc += "\n" 427 continue 428 429 # Use cross-references for doc files where needed 430 d = self.re_doc.sub(r":doc:`/\1`", d) 431 432 # Use cross-references for ABI generated docs where needed 433 matches = self.re_abi.findall(d) 434 for m in matches: 435 abi = m[0] + m[1] 436 437 xref = self.file_refs.get(abi) 438 if not xref: 439 # This may happen if ABI is on a separate directory, 440 # like parsing ABI testing and symbol is at stable. 441 # The proper solution is to move this part of the code 442 # for it to be inside sphinx/kernel_abi.py 443 self.log.info("Didn't find ABI reference for '%s'", abi) 444 else: 445 new = self.re_escape.sub(r"\\\1", m[1]) 446 d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d) 447 448 # Seek for cross reference symbols like /sys/... 449 # Need to be careful to avoid doing it on a code block 450 if d[0] not in [" ", "\t"]: 451 matches = self.re_xref_node.findall(d) 452 for m in matches: 453 # Finding ABI here is more complex due to wildcards 454 xref = self.what_refs.get(m) 455 if xref: 456 new = self.re_escape.sub(r"\\\1", m) 457 d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d) 458 459 new_desc += d + "\n" 460 461 return new_desc + "\n\n" 462 463 def doc(self, output_in_txt=False, show_symbols=True, show_file=True, 464 filter_path=None): 465 """Print ABI at stdout.""" 466 467 part = None 468 for key, v in sorted(self.data.items(), 469 key=lambda x: (x[1].get("type", ""), 470 x[1].get("what"))): 471 472 wtype = v.get("type", "Symbol") 473 file_ref = v.get("file") 474 names = v.get("what", [""]) 475 476 if wtype == "File": 477 if not show_file: 478 continue 479 else: 480 if not show_symbols: 481 continue 482 483 if filter_path: 484 if v.get("path") != filter_path: 485 continue 486 487 msg = "" 488 489 if wtype != "File": 490 cur_part = names[0] 491 if cur_part.find("/") >= 0: 492 match = self.re_what.match(cur_part) 493 if match: 494 symbol = match.group(1).rstrip("/") 495 cur_part = "Symbols under " + symbol 496 497 if cur_part and cur_part != part: 498 part = cur_part 499 msg += part + "\n"+ "-" * len(part) +"\n\n" 500 501 msg += f".. _{key}:\n\n" 502 503 max_len = 0 504 for i in range(0, len(names)): # pylint: disable=C0200 505 names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**" 506 507 max_len = max(max_len, len(names[i])) 508 509 msg += "+-" + "-" * max_len + "-+\n" 510 for name in names: 511 msg += f"| {name}" + " " * (max_len - len(name)) + " |\n" 512 msg += "+-" + "-" * max_len + "-+\n" 513 msg += "\n" 514 515 for ref in file_ref: 516 if wtype == "File": 517 msg += f".. _{ref[1]}:\n\n" 518 else: 519 base = os.path.basename(ref[0]) 520 msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n" 521 522 if wtype == "File": 523 msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n" 524 525 desc = v.get("description") 526 if not desc and wtype != "File": 527 msg += f"DESCRIPTION MISSING for {names[0]}\n\n" 528 529 if desc: 530 if output_in_txt: 531 msg += self.desc_txt(desc) 532 else: 533 msg += self.desc_rst(desc) 534 535 symbols = v.get("symbols") 536 if symbols: 537 msg += "Has the following ABI:\n\n" 538 539 for w, label in symbols: 540 # Escape special chars from content 541 content = self.re_escape.sub(r"\\\1", w) 542 543 msg += f"- :ref:`{content} <{label}>`\n\n" 544 545 users = v.get("users") 546 if users and users.strip(" \t\n"): 547 users = users.strip("\n").replace('\n', '\n\t') 548 msg += f"Users:\n\t{users}\n\n" 549 550 ln = v.get("line_no", 1) 551 552 yield (msg, file_ref[0][0], ln) 553 554 def check_issues(self): 555 """Warn about duplicated ABI entries.""" 556 557 for what, v in self.what_symbols.items(): 558 files = v.get("file") 559 if not files: 560 # Should never happen if the parser works properly 561 self.log.warning("%s doesn't have a file associated", what) 562 continue 563 564 if len(files) == 1: 565 continue 566 567 f = [] 568 for fname, lines in sorted(files.items()): 569 if not lines: 570 f.append(f"{fname}") 571 elif len(lines) == 1: 572 f.append(f"{fname}:{lines[0]}") 573 else: 574 m = fname + "lines " 575 m += ", ".join(str(x) for x in lines) 576 f.append(m) 577 578 self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f)) 579 580 def search_symbols(self, expr): 581 """ Searches for ABI symbols.""" 582 583 regex = re.compile(expr, re.I) 584 585 found_keys = 0 586 for t in sorted(self.data.items(), key=lambda x: [0]): 587 v = t[1] 588 589 wtype = v.get("type", "") 590 if wtype == "File": 591 continue 592 593 for what in v.get("what", [""]): 594 if regex.search(what): 595 found_keys += 1 596 597 kernelversion = v.get("kernelversion", "").strip(" \t\n") 598 date = v.get("date", "").strip(" \t\n") 599 contact = v.get("contact", "").strip(" \t\n") 600 users = v.get("users", "").strip(" \t\n") 601 desc = v.get("description", "").strip(" \t\n") 602 603 files = [] 604 for f in v.get("file", ()): 605 files.append(f[0]) 606 607 what = str(found_keys) + ". " + what 608 title_tag = "-" * len(what) 609 610 print(f"\n{what}\n{title_tag}\n") 611 612 if kernelversion: 613 print(f"Kernel version:\t\t{kernelversion}") 614 615 if date: 616 print(f"Date:\t\t\t{date}") 617 618 if contact: 619 print(f"Contact:\t\t{contact}") 620 621 if users: 622 print(f"Users:\t\t\t{users}") 623 624 print("Defined on file(s):\t" + ", ".join(files)) 625 626 if desc: 627 desc = desc.strip("\n") 628 print(f"\n{desc}\n") 629 630 if not found_keys: 631 print(f"Regular expression /{expr}/ not found.") 632