1#!/usr/bin/env python3 2# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# SPDX-License-Identifier: GPL-2.0 5 6""" 7Parse ABI documentation and produce results from it. 8""" 9 10from argparse import Namespace 11import logging 12import os 13import re 14 15from glob import glob 16from pprint import pformat 17from random import randrange, seed 18 19# Import Python modules 20 21from helpers import AbiDebug, ABI_DIR 22 23 24class AbiParser: 25 """Main class to parse ABI files""" 26 27 TAGS = r"(what|where|date|kernelversion|contact|description|users)" 28 XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)" 29 30 def __init__(self, directory, logger=None, 31 enable_lineno=False, show_warnings=True, debug=0): 32 """Stores arguments for the class and initialize class vars""" 33 34 self.directory = directory 35 self.enable_lineno = enable_lineno 36 self.show_warnings = show_warnings 37 self.debug = debug 38 39 if not logger: 40 self.log = logging.getLogger("get_abi") 41 else: 42 self.log = logger 43 44 self.data = {} 45 self.what_symbols = {} 46 self.file_refs = {} 47 self.what_refs = {} 48 49 # Regular expressions used on parser 50 self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I) 51 self.re_valid = re.compile(self.TAGS) 52 self.re_start_spc = re.compile(r"(\s*)(\S.*)") 53 self.re_whitespace = re.compile(r"^\s+") 54 55 # Regular used on print 56 self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})") 57 self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])") 58 self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)") 59 self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n") 60 self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst") 61 self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)") 62 self.re_xref_node = re.compile(self.XREF) 63 64 def warn(self, fdata, msg, extra=None): 65 """Displays a parse error if warning is enabled""" 66 67 if not self.show_warnings: 68 return 69 70 msg = f"{fdata.fname}:{fdata.ln}: {msg}" 71 if extra: 72 msg += "\n\t\t" + extra 73 74 self.log.warning(msg) 75 76 def add_symbol(self, what, fname, ln=None, xref=None): 77 """Create a reference table describing where each 'what' is located""" 78 79 if what not in self.what_symbols: 80 self.what_symbols[what] = {"file": {}} 81 82 if fname not in self.what_symbols[what]["file"]: 83 self.what_symbols[what]["file"][fname] = [] 84 85 if ln and ln not in self.what_symbols[what]["file"][fname]: 86 self.what_symbols[what]["file"][fname].append(ln) 87 88 if xref: 89 self.what_symbols[what]["xref"] = xref 90 91 def _parse_line(self, fdata, line): 92 """Parse a single line of an ABI file""" 93 94 new_what = False 95 new_tag = False 96 content = None 97 98 match = self.re_tag.match(line) 99 if match: 100 new = match.group(1).lower() 101 sep = match.group(2) 102 content = match.group(3) 103 104 match = self.re_valid.search(new) 105 if match: 106 new_tag = match.group(1) 107 else: 108 if fdata.tag == "description": 109 # New "tag" is actually part of description. 110 # Don't consider it a tag 111 new_tag = False 112 elif fdata.tag != "": 113 self.warn(fdata, f"tag '{fdata.tag}' is invalid", line) 114 115 if new_tag: 116 # "where" is Invalid, but was a common mistake. Warn if found 117 if new_tag == "where": 118 self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead") 119 new_tag = "what" 120 121 if new_tag == "what": 122 fdata.space = None 123 124 if content not in self.what_symbols: 125 self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln) 126 127 if fdata.tag == "what": 128 fdata.what.append(content.strip("\n")) 129 else: 130 if fdata.key: 131 if "description" not in self.data.get(fdata.key, {}): 132 self.warn(fdata, f"{fdata.key} doesn't have a description") 133 134 for w in fdata.what: 135 self.add_symbol(what=w, fname=fdata.fname, 136 ln=fdata.what_ln, xref=fdata.key) 137 138 fdata.label = content 139 new_what = True 140 141 key = "abi_" + content.lower() 142 fdata.key = self.re_unprintable.sub("_", key).strip("_") 143 144 # Avoid duplicated keys but using a defined seed, to make 145 # the namespace identical if there aren't changes at the 146 # ABI symbols 147 seed(42) 148 149 while fdata.key in self.data: 150 char = randrange(0, 51) + ord("A") 151 if char > ord("Z"): 152 char += ord("a") - ord("Z") - 1 153 154 fdata.key += chr(char) 155 156 if fdata.key and fdata.key not in self.data: 157 self.data[fdata.key] = { 158 "what": [content], 159 "file": [fdata.file_ref], 160 "line_no": fdata.ln, 161 } 162 163 fdata.what = self.data[fdata.key]["what"] 164 165 self.what_refs[content] = fdata.key 166 fdata.tag = new_tag 167 fdata.what_ln = fdata.ln 168 169 if fdata.nametag["what"]: 170 t = (content, fdata.key) 171 if t not in fdata.nametag["symbols"]: 172 fdata.nametag["symbols"].append(t) 173 174 return 175 176 if fdata.tag and new_tag: 177 fdata.tag = new_tag 178 179 if new_what: 180 fdata.label = "" 181 182 self.data[fdata.key]["type"] = fdata.ftype 183 184 if "description" in self.data[fdata.key]: 185 self.data[fdata.key]["description"] += "\n\n" 186 187 if fdata.file_ref not in self.data[fdata.key]["file"]: 188 self.data[fdata.key]["file"].append(fdata.file_ref) 189 190 if self.debug == AbiDebug.WHAT_PARSING: 191 self.log.debug("what: %s", fdata.what) 192 193 if not fdata.what: 194 self.warn(fdata, "'What:' should come first:", line) 195 return 196 197 if new_tag == "description": 198 fdata.space = None 199 200 if content: 201 sep = sep.replace(":", " ") 202 203 c = " " * len(new_tag) + sep + content 204 c = c.expandtabs() 205 206 match = self.re_start_spc.match(c) 207 if match: 208 # Preserve initial spaces for the first line 209 fdata.space = match.group(1) 210 content = match.group(2) + "\n" 211 212 self.data[fdata.key][fdata.tag] = content 213 214 return 215 216 # Store any contents before tags at the database 217 if not fdata.tag and "what" in fdata.nametag: 218 fdata.nametag["description"] += line 219 return 220 221 if fdata.tag == "description": 222 content = line.expandtabs() 223 224 if self.re_whitespace.sub("", content) == "": 225 self.data[fdata.key][fdata.tag] += "\n" 226 return 227 228 if fdata.space is None: 229 match = self.re_start_spc.match(content) 230 if match: 231 # Preserve initial spaces for the first line 232 fdata.space = match.group(1) 233 234 content = match.group(2) + "\n" 235 else: 236 if content.startswith(fdata.space): 237 content = content[len(fdata.space):] 238 239 else: 240 fdata.space = "" 241 242 if fdata.tag == "what": 243 w = content.strip("\n") 244 if w: 245 self.data[fdata.key][fdata.tag].append(w) 246 else: 247 self.data[fdata.key][fdata.tag] += content 248 return 249 250 content = line.strip() 251 if fdata.tag: 252 if fdata.tag == "what": 253 w = content.strip("\n") 254 if w: 255 self.data[fdata.key][fdata.tag].append(w) 256 else: 257 self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n") 258 return 259 260 # Everything else is error 261 if content: 262 self.warn(fdata, "Unexpected content", line) 263 264 def parse_file(self, fname, path, basename): 265 """Parse a single file""" 266 267 ref = f"abi_file_{path}_{basename}" 268 ref = self.re_unprintable.sub("_", ref).strip("_") 269 270 # Store per-file state into a namespace variable. This will be used 271 # by the per-line parser state machine and by the warning function. 272 fdata = Namespace 273 274 fdata.fname = fname 275 fdata.name = basename 276 277 pos = fname.find(ABI_DIR) 278 if pos > 0: 279 f = fname[pos:] 280 else: 281 f = fname 282 283 fdata.file_ref = (f, ref) 284 self.file_refs[f] = ref 285 286 fdata.ln = 0 287 fdata.what_ln = 0 288 fdata.tag = "" 289 fdata.label = "" 290 fdata.what = [] 291 fdata.key = None 292 fdata.xrefs = None 293 fdata.space = None 294 fdata.ftype = path.split("/")[0] 295 296 fdata.nametag = {} 297 fdata.nametag["what"] = [f"File {path}/{basename}"] 298 fdata.nametag["type"] = "File" 299 fdata.nametag["file"] = [fdata.file_ref] 300 fdata.nametag["line_no"] = 1 301 fdata.nametag["description"] = "" 302 fdata.nametag["symbols"] = [] 303 304 self.data[ref] = fdata.nametag 305 306 if self.debug & AbiDebug.WHAT_OPEN: 307 self.log.debug("Opening file %s", fname) 308 309 with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: 310 for line in fp: 311 fdata.ln += 1 312 313 self._parse_line(fdata, line) 314 315 if "description" in fdata.nametag: 316 fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n") 317 318 if fdata.key: 319 if "description" not in self.data.get(fdata.key, {}): 320 self.warn(fdata, f"{fdata.key} doesn't have a description") 321 322 for w in fdata.what: 323 self.add_symbol(what=w, fname=fname, xref=fdata.key) 324 325 def parse_abi(self): 326 """Parse documentation ABI""" 327 328 ignore_suffixes = ("rej", "org", "orig", "bak", "~") 329 re_abi = re.compile(r".*" + ABI_DIR) 330 331 for fname in glob(os.path.join(self.directory, "**"), recursive=True): 332 if os.path.isdir(fname): 333 continue 334 335 basename = os.path.basename(fname) 336 337 if basename == "README": 338 continue 339 if basename.startswith(".") or basename.endswith(ignore_suffixes): 340 continue 341 342 path = re_abi.sub("", os.path.dirname(fname)) 343 344 self.parse_file(fname, path, basename) 345 346 if self.debug & AbiDebug.DUMP_ABI_STRUCTS: 347 self.log.debug(pformat(self.data)) 348 349 def print_desc_txt(self, desc): 350 """Print description as found inside ABI files""" 351 352 desc = desc.strip(" \t\n") 353 354 print(desc + "\n") 355 356 def print_desc_rst(self, desc): 357 """Enrich ReST output by creating cross-references""" 358 359 # Remove title markups from the description 360 # Having titles inside ABI files will only work if extra 361 # care would be taken in order to strictly follow the same 362 # level order for each markup. 363 desc = self.re_title_mark.sub("\n\n", "\n" + desc) 364 desc = desc.rstrip(" \t\n").lstrip("\n") 365 366 # Python's regex performance for non-compiled expressions is a lot 367 # than Perl, as Perl automatically caches them at their 368 # first usage. Here, we'll need to do the same, as otherwise the 369 # performance penalty is be high 370 371 new_desc = "" 372 for d in desc.split("\n"): 373 if d == "": 374 new_desc += "\n" 375 continue 376 377 # Use cross-references for doc files where needed 378 d = self.re_doc.sub(r":doc:`/\1`", d) 379 380 # Use cross-references for ABI generated docs where needed 381 matches = self.re_abi.findall(d) 382 for m in matches: 383 abi = m[0] + m[1] 384 385 xref = self.file_refs.get(abi) 386 if not xref: 387 # This may happen if ABI is on a separate directory, 388 # like parsing ABI testing and symbol is at stable. 389 # The proper solution is to move this part of the code 390 # for it to be inside sphinx/kernel_abi.py 391 self.log.info("Didn't find ABI reference for '%s'", abi) 392 else: 393 new = self.re_escape.sub(r"\\\1", m[1]) 394 d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d) 395 396 # Seek for cross reference symbols like /sys/... 397 # Need to be careful to avoid doing it on a code block 398 if d[0] not in [" ", "\t"]: 399 matches = self.re_xref_node.findall(d) 400 for m in matches: 401 # Finding ABI here is more complex due to wildcards 402 xref = self.what_refs.get(m) 403 if xref: 404 new = self.re_escape.sub(r"\\\1", m) 405 d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d) 406 407 new_desc += d + "\n" 408 409 print(new_desc + "\n") 410 411 def print_data(self, enable_lineno, output_in_txt, show_file=False): 412 """Print ABI at stdout""" 413 414 part = None 415 for key, v in sorted(self.data.items(), 416 key=lambda x: (x[1].get("type", ""), 417 x[1].get("what"))): 418 419 wtype = v.get("type", "Var") 420 file_ref = v.get("file") 421 names = v.get("what", [""]) 422 423 if not show_file and wtype == "File": 424 continue 425 426 if enable_lineno: 427 ln = v.get("line_no", 1) 428 print(f".. LINENO {file_ref[0][0]}#{ln}\n") 429 430 if wtype != "File": 431 cur_part = names[0] 432 if cur_part.find("/") >= 0: 433 match = self.re_what.match(cur_part) 434 if match: 435 symbol = match.group(1).rstrip("/") 436 cur_part = "Symbols under " + symbol 437 438 if cur_part and cur_part != part: 439 part = cur_part 440 print(f"{part}\n{"-" * len(part)}\n") 441 442 print(f".. _{key}:\n") 443 444 max_len = 0 445 for i in range(0, len(names)): # pylint: disable=C0200 446 names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**" 447 448 max_len = max(max_len, len(names[i])) 449 450 print("+-" + "-" * max_len + "-+") 451 for name in names: 452 print(f"| {name}" + " " * (max_len - len(name)) + " |") 453 print("+-" + "-" * max_len + "-+") 454 print() 455 456 for ref in file_ref: 457 if wtype == "File": 458 print(f".. _{ref[1]}:\n") 459 else: 460 base = os.path.basename(ref[0]) 461 print(f"Defined on file :ref:`{base} <{ref[1]}>`\n") 462 463 if wtype == "File": 464 print(f"{names[0]}\n{"-" * len(names[0])}\n") 465 466 desc = v.get("description") 467 if not desc and wtype != "File": 468 print(f"DESCRIPTION MISSING for {names[0]}\n") 469 470 if desc: 471 if output_in_txt: 472 self.print_desc_txt(desc) 473 else: 474 self.print_desc_rst(desc) 475 476 symbols = v.get("symbols") 477 if symbols: 478 print("Has the following ABI:\n") 479 480 for w, label in symbols: 481 # Escape special chars from content 482 content = self.re_escape.sub(r"\\\1", w) 483 484 print(f"- :ref:`{content} <{label}>`\n") 485 486 users = v.get("users") 487 if users and users.strip(" \t\n"): 488 print(f"Users:\n\t{users.strip("\n").replace('\n', '\n\t')}\n") 489 490 def check_issues(self): 491 """Warn about duplicated ABI entries""" 492 493 for what, v in self.what_symbols.items(): 494 files = v.get("file") 495 if not files: 496 # Should never happen if the parser works properly 497 self.log.warning("%s doesn't have a file associated", what) 498 continue 499 500 if len(files) == 1: 501 continue 502 503 f = [] 504 for fname, lines in sorted(files.items()): 505 if not lines: 506 f.append(f"{fname}") 507 elif len(lines) == 1: 508 f.append(f"{fname}:{lines[0]}") 509 else: 510 f.append(f"{fname} lines {", ".join(str(x) for x in lines)}") 511 512 self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f)) 513 514 def search_symbols(self, expr): 515 """ Searches for ABI symbols """ 516 517 regex = re.compile(expr, re.I) 518 519 found_keys = 0 520 for t in sorted(self.data.items(), key=lambda x: [0]): 521 v = t[1] 522 523 wtype = v.get("type", "") 524 if wtype == "File": 525 continue 526 527 for what in v.get("what", [""]): 528 if regex.search(what): 529 found_keys += 1 530 531 kernelversion = v.get("kernelversion", "").strip(" \t\n") 532 date = v.get("date", "").strip(" \t\n") 533 contact = v.get("contact", "").strip(" \t\n") 534 users = v.get("users", "").strip(" \t\n") 535 desc = v.get("description", "").strip(" \t\n") 536 537 files = [] 538 for f in v.get("file", ()): 539 files.append(f[0]) 540 541 what = str(found_keys) + ". " + what 542 title_tag = "-" * len(what) 543 544 print(f"\n{what}\n{title_tag}\n") 545 546 if kernelversion: 547 print(f"Kernel version:\t\t{kernelversion}") 548 549 if date: 550 print(f"Date:\t\t\t{date}") 551 552 if contact: 553 print(f"Contact:\t\t{contact}") 554 555 if users: 556 print(f"Users:\t\t\t{users}") 557 558 print(f"Defined on file{'s'[:len(files) ^ 1]}:\t{", ".join(files)}") 559 560 if desc: 561 print(f"\n{desc.strip("\n")}\n") 562 563 if not found_keys: 564 print(f"Regular expression /{expr}/ not found.") 565