1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright (c) 2016-2025 by Mauro Carvalho Chehab <mchehab@kernel.org>. 4# pylint: disable=R0912,R0915 5 6""" 7Parse a source file or header, creating ReStructured Text cross references. 8 9It accepts an optional file to change the default symbol reference or to 10suppress symbols from the output. 11 12It is capable of identifying defines, functions, structs, typedefs, 13enums and enum symbols and create cross-references for all of them. 14It is also capable of distinguish #define used for specifying a Linux 15ioctl. 16 17The optional rules file contains a set of rules like: 18 19 ignore ioctl VIDIOC_ENUM_FMT 20 replace ioctl VIDIOC_DQBUF vidioc_qbuf 21 replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` 22""" 23 24import os 25import re 26import sys 27 28 29class ParseDataStructs: 30 """ 31 Creates an enriched version of a Kernel header file with cross-links 32 to each C data structure type. 33 34 It is meant to allow having a more comprehensive documentation, where 35 uAPI headers will create cross-reference links to the code. 36 37 It is capable of identifying defines, functions, structs, typedefs, 38 enums and enum symbols and create cross-references for all of them. 39 It is also capable of distinguish #define used for specifying a Linux 40 ioctl. 41 42 By default, it create rules for all symbols and defines, but it also 43 allows parsing an exception file. Such file contains a set of rules 44 using the syntax below: 45 46 1. Ignore rules: 47 48 ignore <type> <symbol>` 49 50 Removes the symbol from reference generation. 51 52 2. Replace rules: 53 54 replace <type> <old_symbol> <new_reference> 55 56 Replaces how old_symbol with a new reference. The new_reference can be: 57 - A simple symbol name; 58 - A full Sphinx reference. 59 60 On both cases, <type> can be: 61 - ioctl: for defines that end with _IO*, e.g. ioctl definitions 62 - define: for other defines 63 - symbol: for symbols defined within enums; 64 - typedef: for typedefs; 65 - enum: for the name of a non-anonymous enum; 66 - struct: for structs. 67 68 Examples: 69 70 ignore define __LINUX_MEDIA_H 71 ignore ioctl VIDIOC_ENUM_FMT 72 replace ioctl VIDIOC_DQBUF vidioc_qbuf 73 replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` 74 """ 75 76 # Parser regexes with multiple ways to capture enums and structs 77 RE_ENUMS = [ 78 re.compile(r"^\s*enum\s+([\w_]+)\s*\{"), 79 re.compile(r"^\s*enum\s+([\w_]+)\s*$"), 80 re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"), 81 re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"), 82 ] 83 RE_STRUCTS = [ 84 re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"), 85 re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"), 86 re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"), 87 re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"), 88 ] 89 90 # FIXME: the original code was written a long time before Sphinx C 91 # domain to have multiple namespaces. To avoid to much turn at the 92 # existing hyperlinks, the code kept using "c:type" instead of the 93 # right types. To change that, we need to change the types not only 94 # here, but also at the uAPI media documentation. 95 DEF_SYMBOL_TYPES = { 96 "ioctl": { 97 "prefix": "\\ ", 98 "suffix": "\\ ", 99 "ref_type": ":ref", 100 "description": "IOCTL Commands", 101 }, 102 "define": { 103 "prefix": "\\ ", 104 "suffix": "\\ ", 105 "ref_type": ":ref", 106 "description": "Macros and Definitions", 107 }, 108 # We're calling each definition inside an enum as "symbol" 109 "symbol": { 110 "prefix": "\\ ", 111 "suffix": "\\ ", 112 "ref_type": ":ref", 113 "description": "Enumeration values", 114 }, 115 "typedef": { 116 "prefix": "\\ ", 117 "suffix": "\\ ", 118 "ref_type": ":c:type", 119 "description": "Type Definitions", 120 }, 121 # This is the description of the enum itself 122 "enum": { 123 "prefix": "\\ ", 124 "suffix": "\\ ", 125 "ref_type": ":c:type", 126 "description": "Enumerations", 127 }, 128 "struct": { 129 "prefix": "\\ ", 130 "suffix": "\\ ", 131 "ref_type": ":c:type", 132 "description": "Structures", 133 }, 134 } 135 136 def __init__(self, debug: bool = False): 137 """Initialize internal vars""" 138 self.debug = debug 139 self.data = "" 140 141 self.symbols = {} 142 143 for symbol_type in self.DEF_SYMBOL_TYPES: 144 self.symbols[symbol_type] = {} 145 146 def store_type(self, symbol_type: str, symbol: str, 147 ref_name: str = None, replace_underscores: bool = True): 148 """ 149 Stores a new symbol at self.symbols under symbol_type. 150 151 By default, underscores are replaced by "-" 152 """ 153 defs = self.DEF_SYMBOL_TYPES[symbol_type] 154 155 prefix = defs.get("prefix", "") 156 suffix = defs.get("suffix", "") 157 ref_type = defs.get("ref_type") 158 159 # Determine ref_link based on symbol type 160 if ref_type: 161 if symbol_type == "enum": 162 ref_link = f"{ref_type}:`{symbol}`" 163 else: 164 if not ref_name: 165 ref_name = symbol.lower() 166 167 # c-type references don't support hash 168 if ref_type == ":ref" and replace_underscores: 169 ref_name = ref_name.replace("_", "-") 170 171 ref_link = f"{ref_type}:`{symbol} <{ref_name}>`" 172 else: 173 ref_link = symbol 174 175 self.symbols[symbol_type][symbol] = f"{prefix}{ref_link}{suffix}" 176 177 def store_line(self, line): 178 """Stores a line at self.data, properly indented""" 179 line = " " + line.expandtabs() 180 self.data += line.rstrip(" ") 181 182 def parse_file(self, file_in: str): 183 """Reads a C source file and get identifiers""" 184 self.data = "" 185 is_enum = False 186 is_comment = False 187 multiline = "" 188 189 with open(file_in, "r", 190 encoding="utf-8", errors="backslashreplace") as f: 191 for line_no, line in enumerate(f): 192 self.store_line(line) 193 line = line.strip("\n") 194 195 # Handle continuation lines 196 if line.endswith(r"\\"): 197 multiline += line[-1] 198 continue 199 200 if multiline: 201 line = multiline + line 202 multiline = "" 203 204 # Handle comments. They can be multilined 205 if not is_comment: 206 if re.search(r"/\*.*", line): 207 is_comment = True 208 else: 209 # Strip C99-style comments 210 line = re.sub(r"(//.*)", "", line) 211 212 if is_comment: 213 if re.search(r".*\*/", line): 214 is_comment = False 215 else: 216 multiline = line 217 continue 218 219 # At this point, line variable may be a multilined statement, 220 # if lines end with \ or if they have multi-line comments 221 # With that, it can safely remove the entire comments, 222 # and there's no need to use re.DOTALL for the logic below 223 224 line = re.sub(r"(/\*.*\*/)", "", line) 225 if not line.strip(): 226 continue 227 228 # It can be useful for debug purposes to print the file after 229 # having comments stripped and multi-lines grouped. 230 if self.debug > 1: 231 print(f"line {line_no + 1}: {line}") 232 233 # Now the fun begins: parse each type and store it. 234 235 # We opted for a two parsing logic here due to: 236 # 1. it makes easier to debug issues not-parsed symbols; 237 # 2. we want symbol replacement at the entire content, not 238 # just when the symbol is detected. 239 240 if is_enum: 241 match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line) 242 if match: 243 self.store_type("symbol", match.group(1)) 244 if "}" in line: 245 is_enum = False 246 continue 247 248 match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line) 249 if match: 250 self.store_type("ioctl", match.group(1), 251 replace_underscores=False) 252 continue 253 254 match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line) 255 if match: 256 self.store_type("define", match.group(1)) 257 continue 258 259 match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);", 260 line) 261 if match: 262 name = match.group(2).strip() 263 symbol = match.group(3) 264 self.store_type("typedef", symbol, ref_name=name) 265 continue 266 267 for re_enum in self.RE_ENUMS: 268 match = re_enum.match(line) 269 if match: 270 self.store_type("enum", match.group(1)) 271 is_enum = True 272 break 273 274 for re_struct in self.RE_STRUCTS: 275 match = re_struct.match(line) 276 if match: 277 self.store_type("struct", match.group(1)) 278 break 279 280 def process_exceptions(self, fname: str): 281 """ 282 Process exceptions file with rules to ignore or replace references. 283 """ 284 if not fname: 285 return 286 287 name = os.path.basename(fname) 288 289 with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f: 290 for ln, line in enumerate(f): 291 ln += 1 292 line = line.strip() 293 if not line or line.startswith("#"): 294 continue 295 296 # Handle ignore rules 297 match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line) 298 if match: 299 c_type = match.group(1) 300 symbol = match.group(2) 301 302 if c_type not in self.DEF_SYMBOL_TYPES: 303 sys.exit(f"{name}:{ln}: {c_type} is invalid") 304 305 d = self.symbols[c_type] 306 if symbol in d: 307 del d[symbol] 308 309 continue 310 311 # Handle replace rules 312 match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line) 313 if not match: 314 sys.exit(f"{name}:{ln}: invalid line: {line}") 315 316 c_type, old, new = match.groups() 317 318 if c_type not in self.DEF_SYMBOL_TYPES: 319 sys.exit(f"{name}:{ln}: {c_type} is invalid") 320 321 reftype = None 322 323 # Parse reference type when the type is specified 324 325 match = re.match(r"^\:c\:(data|func|macro|type)\:\`(.+)\`", new) 326 if match: 327 reftype = f":c:{match.group(1)}" 328 new = match.group(2) 329 else: 330 match = re.search(r"(\:ref)\:\`(.+)\`", new) 331 if match: 332 reftype = match.group(1) 333 new = match.group(2) 334 335 # If the replacement rule doesn't have a type, get default 336 if not reftype: 337 reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type") 338 if not reftype: 339 reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type") 340 341 new_ref = f"{reftype}:`{old} <{new}>`" 342 343 # Change self.symbols to use the replacement rule 344 if old in self.symbols[c_type]: 345 self.symbols[c_type][old] = new_ref 346 else: 347 print(f"{name}:{ln}: Warning: can't find {old} {c_type}") 348 349 def debug_print(self): 350 """ 351 Print debug information containing the replacement rules per symbol. 352 To make easier to check, group them per type. 353 """ 354 if not self.debug: 355 return 356 357 for c_type, refs in self.symbols.items(): 358 if not refs: # Skip empty dictionaries 359 continue 360 361 print(f"{c_type}:") 362 363 for symbol, ref in sorted(refs.items()): 364 print(f" {symbol} -> {ref}") 365 366 print() 367 368 def gen_output(self): 369 """Write the formatted output to a file.""" 370 371 # Avoid extra blank lines 372 text = re.sub(r"\s+$", "", self.data) + "\n" 373 text = re.sub(r"\n\s+\n", "\n\n", text) 374 375 # Escape Sphinx special characters 376 text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text) 377 378 # Source uAPI files may have special notes. Use bold font for them 379 text = re.sub(r"DEPRECATED", "**DEPRECATED**", text) 380 381 # Delimiters to catch the entire symbol after escaped 382 start_delim = r"([ \n\t\(=\*\@])" 383 end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)" 384 385 # Process all reference types 386 for ref_dict in self.symbols.values(): 387 for symbol, replacement in ref_dict.items(): 388 symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol)) 389 text = re.sub(fr'{start_delim}{symbol}{end_delim}', 390 fr'\1{replacement}\2', text) 391 392 # Remove "\ " where not needed: before spaces and at the end of lines 393 text = re.sub(r"\\ ([\n ])", r"\1", text) 394 text = re.sub(r" \\ ", " ", text) 395 396 return text 397 398 def gen_toc(self): 399 """ 400 Create a TOC table pointing to each symbol from the header 401 """ 402 text = [] 403 404 # Add header 405 text.append(".. contents:: Table of Contents") 406 text.append(" :depth: 2") 407 text.append(" :local:") 408 text.append("") 409 410 # Sort symbol types per description 411 symbol_descriptions = [] 412 for k, v in self.DEF_SYMBOL_TYPES.items(): 413 symbol_descriptions.append((v['description'], k)) 414 415 symbol_descriptions.sort() 416 417 # Process each category 418 for description, c_type in symbol_descriptions: 419 420 refs = self.symbols[c_type] 421 if not refs: # Skip empty categories 422 continue 423 424 text.append(f"{description}") 425 text.append("-" * len(description)) 426 text.append("") 427 428 # Sort symbols alphabetically 429 for symbol, ref in sorted(refs.items()): 430 text.append(f"* :{ref}:") 431 432 text.append("") # Add empty line between categories 433 434 return "\n".join(text) 435 436 def write_output(self, file_in: str, file_out: str, toc: bool): 437 title = os.path.basename(file_in) 438 439 if toc: 440 text = self.gen_toc() 441 else: 442 text = self.gen_output() 443 444 with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f: 445 f.write(".. -*- coding: utf-8; mode: rst -*-\n\n") 446 f.write(f"{title}\n") 447 f.write("=" * len(title) + "\n\n") 448 449 if not toc: 450 f.write(".. parsed-literal::\n\n") 451 452 f.write(text) 453