1cde49466SMauro Carvalho Chehab#!/usr/bin/env python3 2cde49466SMauro Carvalho Chehab# SPDX-License-Identifier: GPL-2.0 3cde49466SMauro Carvalho Chehab# Copyright (c) 2016-2025 by Mauro Carvalho Chehab <mchehab@kernel.org>. 4cde49466SMauro Carvalho Chehab# pylint: disable=R0912,R0915 5cde49466SMauro Carvalho Chehab 6cde49466SMauro Carvalho Chehab""" 7cde49466SMauro Carvalho ChehabParse a source file or header, creating ReStructured Text cross references. 8cde49466SMauro Carvalho Chehab 9cde49466SMauro Carvalho ChehabIt accepts an optional file to change the default symbol reference or to 10cde49466SMauro Carvalho Chehabsuppress symbols from the output. 11cde49466SMauro Carvalho Chehab 12cde49466SMauro Carvalho ChehabIt is capable of identifying defines, functions, structs, typedefs, 13cde49466SMauro Carvalho Chehabenums and enum symbols and create cross-references for all of them. 14cde49466SMauro Carvalho ChehabIt is also capable of distinguish #define used for specifying a Linux 15cde49466SMauro Carvalho Chehabioctl. 16cde49466SMauro Carvalho Chehab 17cde49466SMauro Carvalho ChehabThe optional rules file contains a set of rules like: 18cde49466SMauro Carvalho Chehab 19cde49466SMauro Carvalho Chehab ignore ioctl VIDIOC_ENUM_FMT 20cde49466SMauro Carvalho Chehab replace ioctl VIDIOC_DQBUF vidioc_qbuf 21cde49466SMauro Carvalho Chehab replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` 22cde49466SMauro Carvalho Chehab""" 23cde49466SMauro Carvalho Chehab 24cde49466SMauro Carvalho Chehabimport os 25cde49466SMauro Carvalho Chehabimport re 26cde49466SMauro Carvalho Chehabimport sys 27cde49466SMauro Carvalho Chehab 28cde49466SMauro Carvalho Chehab 29cde49466SMauro Carvalho Chehabclass ParseDataStructs: 30cde49466SMauro Carvalho Chehab """ 31cde49466SMauro Carvalho Chehab Creates an enriched version of a Kernel header file with cross-links 32cde49466SMauro Carvalho Chehab to each C data structure type. 33cde49466SMauro Carvalho Chehab 34cde49466SMauro Carvalho Chehab It is meant to allow having a more comprehensive documentation, where 35cde49466SMauro Carvalho Chehab uAPI headers will create cross-reference links to the code. 36cde49466SMauro Carvalho Chehab 37cde49466SMauro Carvalho Chehab It is capable of identifying defines, functions, structs, typedefs, 38cde49466SMauro Carvalho Chehab enums and enum symbols and create cross-references for all of them. 39cde49466SMauro Carvalho Chehab It is also capable of distinguish #define used for specifying a Linux 40cde49466SMauro Carvalho Chehab ioctl. 41cde49466SMauro Carvalho Chehab 42cde49466SMauro Carvalho Chehab By default, it create rules for all symbols and defines, but it also 43cde49466SMauro Carvalho Chehab allows parsing an exception file. Such file contains a set of rules 44cde49466SMauro Carvalho Chehab using the syntax below: 45cde49466SMauro Carvalho Chehab 46cde49466SMauro Carvalho Chehab 1. Ignore rules: 47cde49466SMauro Carvalho Chehab 48cde49466SMauro Carvalho Chehab ignore <type> <symbol>` 49cde49466SMauro Carvalho Chehab 50cde49466SMauro Carvalho Chehab Removes the symbol from reference generation. 51cde49466SMauro Carvalho Chehab 52cde49466SMauro Carvalho Chehab 2. Replace rules: 53cde49466SMauro Carvalho Chehab 54cde49466SMauro Carvalho Chehab replace <type> <old_symbol> <new_reference> 55cde49466SMauro Carvalho Chehab 56cde49466SMauro Carvalho Chehab Replaces how old_symbol with a new reference. The new_reference can be: 57cde49466SMauro Carvalho Chehab - A simple symbol name; 58cde49466SMauro Carvalho Chehab - A full Sphinx reference. 59cde49466SMauro Carvalho Chehab 60cde49466SMauro Carvalho Chehab On both cases, <type> can be: 61cde49466SMauro Carvalho Chehab - ioctl: for defines that end with _IO*, e.g. ioctl definitions 62cde49466SMauro Carvalho Chehab - define: for other defines 63cde49466SMauro Carvalho Chehab - symbol: for symbols defined within enums; 64cde49466SMauro Carvalho Chehab - typedef: for typedefs; 65cde49466SMauro Carvalho Chehab - enum: for the name of a non-anonymous enum; 66cde49466SMauro Carvalho Chehab - struct: for structs. 67cde49466SMauro Carvalho Chehab 68cde49466SMauro Carvalho Chehab Examples: 69cde49466SMauro Carvalho Chehab 70cde49466SMauro Carvalho Chehab ignore define __LINUX_MEDIA_H 71cde49466SMauro Carvalho Chehab ignore ioctl VIDIOC_ENUM_FMT 72cde49466SMauro Carvalho Chehab replace ioctl VIDIOC_DQBUF vidioc_qbuf 73cde49466SMauro Carvalho Chehab replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` 74cde49466SMauro Carvalho Chehab """ 75cde49466SMauro Carvalho Chehab 76cde49466SMauro Carvalho Chehab # Parser regexes with multiple ways to capture enums and structs 77cde49466SMauro Carvalho Chehab RE_ENUMS = [ 78cde49466SMauro Carvalho Chehab re.compile(r"^\s*enum\s+([\w_]+)\s*\{"), 79cde49466SMauro Carvalho Chehab re.compile(r"^\s*enum\s+([\w_]+)\s*$"), 80cde49466SMauro Carvalho Chehab re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"), 81cde49466SMauro Carvalho Chehab re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"), 82cde49466SMauro Carvalho Chehab ] 83cde49466SMauro Carvalho Chehab RE_STRUCTS = [ 84cde49466SMauro Carvalho Chehab re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"), 85cde49466SMauro Carvalho Chehab re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"), 86cde49466SMauro Carvalho Chehab re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"), 87cde49466SMauro Carvalho Chehab re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"), 88cde49466SMauro Carvalho Chehab ] 89cde49466SMauro Carvalho Chehab 90cde49466SMauro Carvalho Chehab # FIXME: the original code was written a long time before Sphinx C 91cde49466SMauro Carvalho Chehab # domain to have multiple namespaces. To avoid to much turn at the 92cde49466SMauro Carvalho Chehab # existing hyperlinks, the code kept using "c:type" instead of the 93cde49466SMauro Carvalho Chehab # right types. To change that, we need to change the types not only 94cde49466SMauro Carvalho Chehab # here, but also at the uAPI media documentation. 95cde49466SMauro Carvalho Chehab DEF_SYMBOL_TYPES = { 96cde49466SMauro Carvalho Chehab "ioctl": { 97cde49466SMauro Carvalho Chehab "prefix": "\\ ", 98cde49466SMauro Carvalho Chehab "suffix": "\\ ", 99cde49466SMauro Carvalho Chehab "ref_type": ":ref", 100*242cfe3fSMauro Carvalho Chehab "description": "IOCTL Commands", 101cde49466SMauro Carvalho Chehab }, 102cde49466SMauro Carvalho Chehab "define": { 103cde49466SMauro Carvalho Chehab "prefix": "\\ ", 104cde49466SMauro Carvalho Chehab "suffix": "\\ ", 105cde49466SMauro Carvalho Chehab "ref_type": ":ref", 106*242cfe3fSMauro Carvalho Chehab "description": "Macros and Definitions", 107cde49466SMauro Carvalho Chehab }, 108cde49466SMauro Carvalho Chehab # We're calling each definition inside an enum as "symbol" 109cde49466SMauro Carvalho Chehab "symbol": { 110cde49466SMauro Carvalho Chehab "prefix": "\\ ", 111cde49466SMauro Carvalho Chehab "suffix": "\\ ", 112cde49466SMauro Carvalho Chehab "ref_type": ":ref", 113*242cfe3fSMauro Carvalho Chehab "description": "Enumeration values", 114cde49466SMauro Carvalho Chehab }, 115cde49466SMauro Carvalho Chehab "typedef": { 116cde49466SMauro Carvalho Chehab "prefix": "\\ ", 117cde49466SMauro Carvalho Chehab "suffix": "\\ ", 118cde49466SMauro Carvalho Chehab "ref_type": ":c:type", 119*242cfe3fSMauro Carvalho Chehab "description": "Type Definitions", 120cde49466SMauro Carvalho Chehab }, 121*242cfe3fSMauro Carvalho Chehab # This is the description of the enum itself 122cde49466SMauro Carvalho Chehab "enum": { 123cde49466SMauro Carvalho Chehab "prefix": "\\ ", 124cde49466SMauro Carvalho Chehab "suffix": "\\ ", 125cde49466SMauro Carvalho Chehab "ref_type": ":c:type", 126*242cfe3fSMauro Carvalho Chehab "description": "Enumerations", 127cde49466SMauro Carvalho Chehab }, 128cde49466SMauro Carvalho Chehab "struct": { 129cde49466SMauro Carvalho Chehab "prefix": "\\ ", 130cde49466SMauro Carvalho Chehab "suffix": "\\ ", 131cde49466SMauro Carvalho Chehab "ref_type": ":c:type", 132*242cfe3fSMauro Carvalho Chehab "description": "Structures", 133cde49466SMauro Carvalho Chehab }, 134cde49466SMauro Carvalho Chehab } 135cde49466SMauro Carvalho Chehab 136cde49466SMauro Carvalho Chehab def __init__(self, debug: bool = False): 137cde49466SMauro Carvalho Chehab """Initialize internal vars""" 138cde49466SMauro Carvalho Chehab self.debug = debug 139cde49466SMauro Carvalho Chehab self.data = "" 140cde49466SMauro Carvalho Chehab 141cde49466SMauro Carvalho Chehab self.symbols = {} 142cde49466SMauro Carvalho Chehab 143cde49466SMauro Carvalho Chehab for symbol_type in self.DEF_SYMBOL_TYPES: 144cde49466SMauro Carvalho Chehab self.symbols[symbol_type] = {} 145cde49466SMauro Carvalho Chehab 146cde49466SMauro Carvalho Chehab def store_type(self, symbol_type: str, symbol: str, 147cde49466SMauro Carvalho Chehab ref_name: str = None, replace_underscores: bool = True): 148cde49466SMauro Carvalho Chehab """ 149cde49466SMauro Carvalho Chehab Stores a new symbol at self.symbols under symbol_type. 150cde49466SMauro Carvalho Chehab 151cde49466SMauro Carvalho Chehab By default, underscores are replaced by "-" 152cde49466SMauro Carvalho Chehab """ 153cde49466SMauro Carvalho Chehab defs = self.DEF_SYMBOL_TYPES[symbol_type] 154cde49466SMauro Carvalho Chehab 155cde49466SMauro Carvalho Chehab prefix = defs.get("prefix", "") 156cde49466SMauro Carvalho Chehab suffix = defs.get("suffix", "") 157cde49466SMauro Carvalho Chehab ref_type = defs.get("ref_type") 158cde49466SMauro Carvalho Chehab 159cde49466SMauro Carvalho Chehab # Determine ref_link based on symbol type 160cde49466SMauro Carvalho Chehab if ref_type: 161cde49466SMauro Carvalho Chehab if symbol_type == "enum": 162cde49466SMauro Carvalho Chehab ref_link = f"{ref_type}:`{symbol}`" 163cde49466SMauro Carvalho Chehab else: 164cde49466SMauro Carvalho Chehab if not ref_name: 165cde49466SMauro Carvalho Chehab ref_name = symbol.lower() 166cde49466SMauro Carvalho Chehab 167cde49466SMauro Carvalho Chehab # c-type references don't support hash 168cde49466SMauro Carvalho Chehab if ref_type == ":ref" and replace_underscores: 169cde49466SMauro Carvalho Chehab ref_name = ref_name.replace("_", "-") 170cde49466SMauro Carvalho Chehab 171cde49466SMauro Carvalho Chehab ref_link = f"{ref_type}:`{symbol} <{ref_name}>`" 172cde49466SMauro Carvalho Chehab else: 173cde49466SMauro Carvalho Chehab ref_link = symbol 174cde49466SMauro Carvalho Chehab 175cde49466SMauro Carvalho Chehab self.symbols[symbol_type][symbol] = f"{prefix}{ref_link}{suffix}" 176cde49466SMauro Carvalho Chehab 177cde49466SMauro Carvalho Chehab def store_line(self, line): 178cde49466SMauro Carvalho Chehab """Stores a line at self.data, properly indented""" 179cde49466SMauro Carvalho Chehab line = " " + line.expandtabs() 180cde49466SMauro Carvalho Chehab self.data += line.rstrip(" ") 181cde49466SMauro Carvalho Chehab 182cde49466SMauro Carvalho Chehab def parse_file(self, file_in: str): 183cde49466SMauro Carvalho Chehab """Reads a C source file and get identifiers""" 184cde49466SMauro Carvalho Chehab self.data = "" 185cde49466SMauro Carvalho Chehab is_enum = False 186cde49466SMauro Carvalho Chehab is_comment = False 187cde49466SMauro Carvalho Chehab multiline = "" 188cde49466SMauro Carvalho Chehab 189cde49466SMauro Carvalho Chehab with open(file_in, "r", 190cde49466SMauro Carvalho Chehab encoding="utf-8", errors="backslashreplace") as f: 191cde49466SMauro Carvalho Chehab for line_no, line in enumerate(f): 192cde49466SMauro Carvalho Chehab self.store_line(line) 193cde49466SMauro Carvalho Chehab line = line.strip("\n") 194cde49466SMauro Carvalho Chehab 195cde49466SMauro Carvalho Chehab # Handle continuation lines 196cde49466SMauro Carvalho Chehab if line.endswith(r"\\"): 197cde49466SMauro Carvalho Chehab multiline += line[-1] 198cde49466SMauro Carvalho Chehab continue 199cde49466SMauro Carvalho Chehab 200cde49466SMauro Carvalho Chehab if multiline: 201cde49466SMauro Carvalho Chehab line = multiline + line 202cde49466SMauro Carvalho Chehab multiline = "" 203cde49466SMauro Carvalho Chehab 204cde49466SMauro Carvalho Chehab # Handle comments. They can be multilined 205cde49466SMauro Carvalho Chehab if not is_comment: 206cde49466SMauro Carvalho Chehab if re.search(r"/\*.*", line): 207cde49466SMauro Carvalho Chehab is_comment = True 208cde49466SMauro Carvalho Chehab else: 209cde49466SMauro Carvalho Chehab # Strip C99-style comments 210cde49466SMauro Carvalho Chehab line = re.sub(r"(//.*)", "", line) 211cde49466SMauro Carvalho Chehab 212cde49466SMauro Carvalho Chehab if is_comment: 213cde49466SMauro Carvalho Chehab if re.search(r".*\*/", line): 214cde49466SMauro Carvalho Chehab is_comment = False 215cde49466SMauro Carvalho Chehab else: 216cde49466SMauro Carvalho Chehab multiline = line 217cde49466SMauro Carvalho Chehab continue 218cde49466SMauro Carvalho Chehab 219cde49466SMauro Carvalho Chehab # At this point, line variable may be a multilined statement, 220cde49466SMauro Carvalho Chehab # if lines end with \ or if they have multi-line comments 221cde49466SMauro Carvalho Chehab # With that, it can safely remove the entire comments, 222cde49466SMauro Carvalho Chehab # and there's no need to use re.DOTALL for the logic below 223cde49466SMauro Carvalho Chehab 224cde49466SMauro Carvalho Chehab line = re.sub(r"(/\*.*\*/)", "", line) 225cde49466SMauro Carvalho Chehab if not line.strip(): 226cde49466SMauro Carvalho Chehab continue 227cde49466SMauro Carvalho Chehab 228cde49466SMauro Carvalho Chehab # It can be useful for debug purposes to print the file after 229cde49466SMauro Carvalho Chehab # having comments stripped and multi-lines grouped. 230cde49466SMauro Carvalho Chehab if self.debug > 1: 231cde49466SMauro Carvalho Chehab print(f"line {line_no + 1}: {line}") 232cde49466SMauro Carvalho Chehab 233cde49466SMauro Carvalho Chehab # Now the fun begins: parse each type and store it. 234cde49466SMauro Carvalho Chehab 235cde49466SMauro Carvalho Chehab # We opted for a two parsing logic here due to: 236cde49466SMauro Carvalho Chehab # 1. it makes easier to debug issues not-parsed symbols; 237cde49466SMauro Carvalho Chehab # 2. we want symbol replacement at the entire content, not 238cde49466SMauro Carvalho Chehab # just when the symbol is detected. 239cde49466SMauro Carvalho Chehab 240cde49466SMauro Carvalho Chehab if is_enum: 241cde49466SMauro Carvalho Chehab match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line) 242cde49466SMauro Carvalho Chehab if match: 243cde49466SMauro Carvalho Chehab self.store_type("symbol", match.group(1)) 244cde49466SMauro Carvalho Chehab if "}" in line: 245cde49466SMauro Carvalho Chehab is_enum = False 246cde49466SMauro Carvalho Chehab continue 247cde49466SMauro Carvalho Chehab 248cde49466SMauro Carvalho Chehab match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line) 249cde49466SMauro Carvalho Chehab if match: 250cde49466SMauro Carvalho Chehab self.store_type("ioctl", match.group(1), 251cde49466SMauro Carvalho Chehab replace_underscores=False) 252cde49466SMauro Carvalho Chehab continue 253cde49466SMauro Carvalho Chehab 254cde49466SMauro Carvalho Chehab match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line) 255cde49466SMauro Carvalho Chehab if match: 256cde49466SMauro Carvalho Chehab self.store_type("define", match.group(1)) 257cde49466SMauro Carvalho Chehab continue 258cde49466SMauro Carvalho Chehab 259cde49466SMauro Carvalho Chehab match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);", 260cde49466SMauro Carvalho Chehab line) 261cde49466SMauro Carvalho Chehab if match: 262cde49466SMauro Carvalho Chehab name = match.group(2).strip() 263cde49466SMauro Carvalho Chehab symbol = match.group(3) 264cde49466SMauro Carvalho Chehab self.store_type("typedef", symbol, ref_name=name) 265cde49466SMauro Carvalho Chehab continue 266cde49466SMauro Carvalho Chehab 267cde49466SMauro Carvalho Chehab for re_enum in self.RE_ENUMS: 268cde49466SMauro Carvalho Chehab match = re_enum.match(line) 269cde49466SMauro Carvalho Chehab if match: 270cde49466SMauro Carvalho Chehab self.store_type("enum", match.group(1)) 271cde49466SMauro Carvalho Chehab is_enum = True 272cde49466SMauro Carvalho Chehab break 273cde49466SMauro Carvalho Chehab 274cde49466SMauro Carvalho Chehab for re_struct in self.RE_STRUCTS: 275cde49466SMauro Carvalho Chehab match = re_struct.match(line) 276cde49466SMauro Carvalho Chehab if match: 277cde49466SMauro Carvalho Chehab self.store_type("struct", match.group(1)) 278cde49466SMauro Carvalho Chehab break 279cde49466SMauro Carvalho Chehab 280cde49466SMauro Carvalho Chehab def process_exceptions(self, fname: str): 281cde49466SMauro Carvalho Chehab """ 282cde49466SMauro Carvalho Chehab Process exceptions file with rules to ignore or replace references. 283cde49466SMauro Carvalho Chehab """ 284cde49466SMauro Carvalho Chehab if not fname: 285cde49466SMauro Carvalho Chehab return 286cde49466SMauro Carvalho Chehab 287cde49466SMauro Carvalho Chehab name = os.path.basename(fname) 288cde49466SMauro Carvalho Chehab 289cde49466SMauro Carvalho Chehab with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f: 290cde49466SMauro Carvalho Chehab for ln, line in enumerate(f): 291cde49466SMauro Carvalho Chehab ln += 1 292cde49466SMauro Carvalho Chehab line = line.strip() 293cde49466SMauro Carvalho Chehab if not line or line.startswith("#"): 294cde49466SMauro Carvalho Chehab continue 295cde49466SMauro Carvalho Chehab 296cde49466SMauro Carvalho Chehab # Handle ignore rules 297cde49466SMauro Carvalho Chehab match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line) 298cde49466SMauro Carvalho Chehab if match: 299cde49466SMauro Carvalho Chehab c_type = match.group(1) 300cde49466SMauro Carvalho Chehab symbol = match.group(2) 301cde49466SMauro Carvalho Chehab 302cde49466SMauro Carvalho Chehab if c_type not in self.DEF_SYMBOL_TYPES: 303cde49466SMauro Carvalho Chehab sys.exit(f"{name}:{ln}: {c_type} is invalid") 304cde49466SMauro Carvalho Chehab 305cde49466SMauro Carvalho Chehab d = self.symbols[c_type] 306cde49466SMauro Carvalho Chehab if symbol in d: 307cde49466SMauro Carvalho Chehab del d[symbol] 308cde49466SMauro Carvalho Chehab 309cde49466SMauro Carvalho Chehab continue 310cde49466SMauro Carvalho Chehab 311cde49466SMauro Carvalho Chehab # Handle replace rules 312cde49466SMauro Carvalho Chehab match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line) 313cde49466SMauro Carvalho Chehab if not match: 314cde49466SMauro Carvalho Chehab sys.exit(f"{name}:{ln}: invalid line: {line}") 315cde49466SMauro Carvalho Chehab 316cde49466SMauro Carvalho Chehab c_type, old, new = match.groups() 317cde49466SMauro Carvalho Chehab 318cde49466SMauro Carvalho Chehab if c_type not in self.DEF_SYMBOL_TYPES: 319cde49466SMauro Carvalho Chehab sys.exit(f"{name}:{ln}: {c_type} is invalid") 320cde49466SMauro Carvalho Chehab 321cde49466SMauro Carvalho Chehab reftype = None 322cde49466SMauro Carvalho Chehab 323cde49466SMauro Carvalho Chehab # Parse reference type when the type is specified 324cde49466SMauro Carvalho Chehab 325cde49466SMauro Carvalho Chehab match = re.match(r"^\:c\:(data|func|macro|type)\:\`(.+)\`", new) 326cde49466SMauro Carvalho Chehab if match: 327cde49466SMauro Carvalho Chehab reftype = f":c:{match.group(1)}" 328cde49466SMauro Carvalho Chehab new = match.group(2) 329cde49466SMauro Carvalho Chehab else: 330cde49466SMauro Carvalho Chehab match = re.search(r"(\:ref)\:\`(.+)\`", new) 331cde49466SMauro Carvalho Chehab if match: 332cde49466SMauro Carvalho Chehab reftype = match.group(1) 333cde49466SMauro Carvalho Chehab new = match.group(2) 334cde49466SMauro Carvalho Chehab 335cde49466SMauro Carvalho Chehab # If the replacement rule doesn't have a type, get default 336cde49466SMauro Carvalho Chehab if not reftype: 337cde49466SMauro Carvalho Chehab reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type") 338cde49466SMauro Carvalho Chehab if not reftype: 339cde49466SMauro Carvalho Chehab reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type") 340cde49466SMauro Carvalho Chehab 341cde49466SMauro Carvalho Chehab new_ref = f"{reftype}:`{old} <{new}>`" 342cde49466SMauro Carvalho Chehab 343cde49466SMauro Carvalho Chehab # Change self.symbols to use the replacement rule 344cde49466SMauro Carvalho Chehab if old in self.symbols[c_type]: 345cde49466SMauro Carvalho Chehab self.symbols[c_type][old] = new_ref 346cde49466SMauro Carvalho Chehab else: 347cde49466SMauro Carvalho Chehab print(f"{name}:{ln}: Warning: can't find {old} {c_type}") 348cde49466SMauro Carvalho Chehab 349cde49466SMauro Carvalho Chehab def debug_print(self): 350cde49466SMauro Carvalho Chehab """ 351cde49466SMauro Carvalho Chehab Print debug information containing the replacement rules per symbol. 352cde49466SMauro Carvalho Chehab To make easier to check, group them per type. 353cde49466SMauro Carvalho Chehab """ 354cde49466SMauro Carvalho Chehab if not self.debug: 355cde49466SMauro Carvalho Chehab return 356cde49466SMauro Carvalho Chehab 357cde49466SMauro Carvalho Chehab for c_type, refs in self.symbols.items(): 358cde49466SMauro Carvalho Chehab if not refs: # Skip empty dictionaries 359cde49466SMauro Carvalho Chehab continue 360cde49466SMauro Carvalho Chehab 361cde49466SMauro Carvalho Chehab print(f"{c_type}:") 362cde49466SMauro Carvalho Chehab 363cde49466SMauro Carvalho Chehab for symbol, ref in sorted(refs.items()): 364cde49466SMauro Carvalho Chehab print(f" {symbol} -> {ref}") 365cde49466SMauro Carvalho Chehab 366cde49466SMauro Carvalho Chehab print() 367cde49466SMauro Carvalho Chehab 368*242cfe3fSMauro Carvalho Chehab def gen_output(self): 369cde49466SMauro Carvalho Chehab """Write the formatted output to a file.""" 370cde49466SMauro Carvalho Chehab 371cde49466SMauro Carvalho Chehab # Avoid extra blank lines 372cde49466SMauro Carvalho Chehab text = re.sub(r"\s+$", "", self.data) + "\n" 373cde49466SMauro Carvalho Chehab text = re.sub(r"\n\s+\n", "\n\n", text) 374cde49466SMauro Carvalho Chehab 375cde49466SMauro Carvalho Chehab # Escape Sphinx special characters 376cde49466SMauro Carvalho Chehab text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text) 377cde49466SMauro Carvalho Chehab 378cde49466SMauro Carvalho Chehab # Source uAPI files may have special notes. Use bold font for them 379cde49466SMauro Carvalho Chehab text = re.sub(r"DEPRECATED", "**DEPRECATED**", text) 380cde49466SMauro Carvalho Chehab 381cde49466SMauro Carvalho Chehab # Delimiters to catch the entire symbol after escaped 382cde49466SMauro Carvalho Chehab start_delim = r"([ \n\t\(=\*\@])" 383cde49466SMauro Carvalho Chehab end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)" 384cde49466SMauro Carvalho Chehab 385cde49466SMauro Carvalho Chehab # Process all reference types 386cde49466SMauro Carvalho Chehab for ref_dict in self.symbols.values(): 387cde49466SMauro Carvalho Chehab for symbol, replacement in ref_dict.items(): 388cde49466SMauro Carvalho Chehab symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol)) 389cde49466SMauro Carvalho Chehab text = re.sub(fr'{start_delim}{symbol}{end_delim}', 390cde49466SMauro Carvalho Chehab fr'\1{replacement}\2', text) 391cde49466SMauro Carvalho Chehab 392cde49466SMauro Carvalho Chehab # Remove "\ " where not needed: before spaces and at the end of lines 393cde49466SMauro Carvalho Chehab text = re.sub(r"\\ ([\n ])", r"\1", text) 394cde49466SMauro Carvalho Chehab text = re.sub(r" \\ ", " ", text) 395cde49466SMauro Carvalho Chehab 396*242cfe3fSMauro Carvalho Chehab return text 397cde49466SMauro Carvalho Chehab 398*242cfe3fSMauro Carvalho Chehab def gen_toc(self): 399*242cfe3fSMauro Carvalho Chehab """ 400*242cfe3fSMauro Carvalho Chehab Create a TOC table pointing to each symbol from the header 401*242cfe3fSMauro Carvalho Chehab """ 402*242cfe3fSMauro Carvalho Chehab text = [] 403*242cfe3fSMauro Carvalho Chehab 404*242cfe3fSMauro Carvalho Chehab # Add header 405*242cfe3fSMauro Carvalho Chehab text.append(".. contents:: Table of Contents") 406*242cfe3fSMauro Carvalho Chehab text.append(" :depth: 2") 407*242cfe3fSMauro Carvalho Chehab text.append(" :local:") 408*242cfe3fSMauro Carvalho Chehab text.append("") 409*242cfe3fSMauro Carvalho Chehab 410*242cfe3fSMauro Carvalho Chehab # Sort symbol types per description 411*242cfe3fSMauro Carvalho Chehab symbol_descriptions = [] 412*242cfe3fSMauro Carvalho Chehab for k, v in self.DEF_SYMBOL_TYPES.items(): 413*242cfe3fSMauro Carvalho Chehab symbol_descriptions.append((v['description'], k)) 414*242cfe3fSMauro Carvalho Chehab 415*242cfe3fSMauro Carvalho Chehab symbol_descriptions.sort() 416*242cfe3fSMauro Carvalho Chehab 417*242cfe3fSMauro Carvalho Chehab # Process each category 418*242cfe3fSMauro Carvalho Chehab for description, c_type in symbol_descriptions: 419*242cfe3fSMauro Carvalho Chehab 420*242cfe3fSMauro Carvalho Chehab refs = self.symbols[c_type] 421*242cfe3fSMauro Carvalho Chehab if not refs: # Skip empty categories 422*242cfe3fSMauro Carvalho Chehab continue 423*242cfe3fSMauro Carvalho Chehab 424*242cfe3fSMauro Carvalho Chehab text.append(f"{description}") 425*242cfe3fSMauro Carvalho Chehab text.append("-" * len(description)) 426*242cfe3fSMauro Carvalho Chehab text.append("") 427*242cfe3fSMauro Carvalho Chehab 428*242cfe3fSMauro Carvalho Chehab # Sort symbols alphabetically 429*242cfe3fSMauro Carvalho Chehab for symbol, ref in sorted(refs.items()): 430*242cfe3fSMauro Carvalho Chehab text.append(f"* :{ref}:") 431*242cfe3fSMauro Carvalho Chehab 432*242cfe3fSMauro Carvalho Chehab text.append("") # Add empty line between categories 433*242cfe3fSMauro Carvalho Chehab 434*242cfe3fSMauro Carvalho Chehab return "\n".join(text) 435*242cfe3fSMauro Carvalho Chehab 436*242cfe3fSMauro Carvalho Chehab def write_output(self, file_in: str, file_out: str, toc: bool): 437cde49466SMauro Carvalho Chehab title = os.path.basename(file_in) 438cde49466SMauro Carvalho Chehab 439*242cfe3fSMauro Carvalho Chehab if toc: 440*242cfe3fSMauro Carvalho Chehab text = self.gen_toc() 441*242cfe3fSMauro Carvalho Chehab else: 442*242cfe3fSMauro Carvalho Chehab text = self.gen_output() 443*242cfe3fSMauro Carvalho Chehab 444cde49466SMauro Carvalho Chehab with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f: 445cde49466SMauro Carvalho Chehab f.write(".. -*- coding: utf-8; mode: rst -*-\n\n") 446cde49466SMauro Carvalho Chehab f.write(f"{title}\n") 447*242cfe3fSMauro Carvalho Chehab f.write("=" * len(title) + "\n\n") 448*242cfe3fSMauro Carvalho Chehab 449*242cfe3fSMauro Carvalho Chehab if not toc: 450*242cfe3fSMauro Carvalho Chehab f.write(".. parsed-literal::\n\n") 451*242cfe3fSMauro Carvalho Chehab 452cde49466SMauro Carvalho Chehab f.write(text) 453