1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8kdoc_parser 9=========== 10 11Read a C language source or header FILE and extract embedded 12documentation comments 13""" 14 15import sys 16import re 17from pprint import pformat 18 19from kdoc_re import NestedMatch, KernRe 20from kdoc_item import KdocItem 21 22# 23# Regular expressions used to parse kernel-doc markups at KernelDoc class. 24# 25# Let's declare them in lowercase outside any class to make easier to 26# convert from the python script. 27# 28# As those are evaluated at the beginning, no need to cache them 29# 30 31# Allow whitespace at end of comment start. 32doc_start = KernRe(r'^/\*\*\s*$', cache=False) 33 34doc_end = KernRe(r'\*/', cache=False) 35doc_com = KernRe(r'\s*\*\s*', cache=False) 36doc_com_body = KernRe(r'\s*\* ?', cache=False) 37doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 38 39# @params and a strictly limited set of supported section names 40# Specifically: 41# Match @word: 42# @...: 43# @{section-name}: 44# while trying to not match literal block starts like "example::" 45# 46known_section_names = 'description|context|returns?|notes?|examples?' 47known_sections = KernRe(known_section_names, flags = re.I) 48doc_sect = doc_com + \ 49 KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', 50 flags=re.I, cache=False) 51 52doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 53doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 54doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 55doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 56doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) 57 58export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 59export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 60 61type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 62 63# 64# Tests for the beginning of a kerneldoc block in its various forms. 65# 66doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 67doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) 68doc_begin_func = KernRe(str(doc_com) + # initial " * ' 69 r"(?:\w+\s*\*\s*)?" + # type (not captured) 70 r'(?:define\s+)?' + # possible "define" (not captured) 71 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 72 r'(?:[-:].*)?$', # description (not captured) 73 cache = False) 74 75# 76# Here begins a long set of transformations to turn structure member prefixes 77# and macro invocations into something we can parse and generate kdoc for. 78# 79struct_attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", 80 flags=re.I | re.S, cache=False) 81struct_args_pattern = r'([^,)]+)' 82 83struct_prefixes = [ 84 # Strip attributes 85 (struct_attribute, ' '), 86 (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), 87 (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), 88 (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), 89 (KernRe(r'\s*__packed\s*', re.S), ' '), 90 (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), 91 (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), 92 (KernRe(r'\s*____cacheline_aligned', re.S), ' '), 93 # 94 # Unwrap struct_group macros based on this definition: 95 # __struct_group(TAG, NAME, ATTRS, MEMBERS...) 96 # which has variants like: struct_group(NAME, MEMBERS...) 97 # Only MEMBERS arguments require documentation. 98 # 99 # Parsing them happens on two steps: 100 # 101 # 1. drop struct group arguments that aren't at MEMBERS, 102 # storing them as STRUCT_GROUP(MEMBERS) 103 # 104 # 2. remove STRUCT_GROUP() ancillary macro. 105 # 106 # The original logic used to remove STRUCT_GROUP() using an 107 # advanced regex: 108 # 109 # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; 110 # 111 # with two patterns that are incompatible with 112 # Python re module, as it has: 113 # 114 # - a recursive pattern: (?1) 115 # - an atomic grouping: (?>...) 116 # 117 # I tried a simpler version: but it didn't work either: 118 # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; 119 # 120 # As it doesn't properly match the end parenthesis on some cases. 121 # 122 # So, a better solution was crafted: there's now a NestedMatch 123 # class that ensures that delimiters after a search are properly 124 # matched. So, the implementation to drop STRUCT_GROUP() will be 125 # handled in separate. 126 # 127 (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), 128 (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), 129 (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), 130 (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), 131 # 132 # Replace macros 133 # 134 # TODO: use NestedMatch for FOO($1, $2, ...) matches 135 # 136 # it is better to also move those to the NestedMatch logic, 137 # to ensure that parenthesis will be properly matched. 138 # 139 (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), 140 r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), 141 (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), 142 r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), 143 (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', 144 re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), 145 (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', 146 re.S), r'unsigned long \1[1 << ((\2) - 1)]'), 147 (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + 148 r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), 149 (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + 150 struct_args_pattern + r'\)', re.S), r'\2 *\1'), 151 (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + 152 struct_args_pattern + r'\)', re.S), r'\1 \2[]'), 153 (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), 154 (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), 155] 156# 157# Regexes here are guaranteed to have the end limiter matching 158# the start delimiter. Yet, right now, only one replace group 159# is allowed. 160# 161struct_nested_prefixes = [ 162 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), 163] 164 165 166# 167# A little helper to get rid of excess white space 168# 169multi_space = KernRe(r'\s\s+') 170def trim_whitespace(s): 171 return multi_space.sub(' ', s.strip()) 172 173# 174# Remove struct/enum members that have been marked "private". 175# 176def trim_private_members(text): 177 # 178 # First look for a "public:" block that ends a private region, then 179 # handle the "private until the end" case. 180 # 181 text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) 182 text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) 183 # 184 # We needed the comments to do the above, but now we can take them out. 185 # 186 return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() 187 188class state: 189 """ 190 State machine enums 191 """ 192 193 # Parser states 194 NORMAL = 0 # normal code 195 NAME = 1 # looking for function name 196 DECLARATION = 2 # We have seen a declaration which might not be done 197 BODY = 3 # the body of the comment 198 SPECIAL_SECTION = 4 # doc section ending with a blank line 199 PROTO = 5 # scanning prototype 200 DOCBLOCK = 6 # documentation block 201 INLINE_NAME = 7 # gathering doc outside main block 202 INLINE_TEXT = 8 # reading the body of inline docs 203 204 name = [ 205 "NORMAL", 206 "NAME", 207 "DECLARATION", 208 "BODY", 209 "SPECIAL_SECTION", 210 "PROTO", 211 "DOCBLOCK", 212 "INLINE_NAME", 213 "INLINE_TEXT", 214 ] 215 216 217SECTION_DEFAULT = "Description" # default section 218 219class KernelEntry: 220 221 def __init__(self, config, ln): 222 self.config = config 223 224 self._contents = [] 225 self.prototype = "" 226 227 self.warnings = [] 228 229 self.parameterlist = [] 230 self.parameterdescs = {} 231 self.parametertypes = {} 232 self.parameterdesc_start_lines = {} 233 234 self.section_start_lines = {} 235 self.sections = {} 236 237 self.anon_struct_union = False 238 239 self.leading_space = None 240 241 # State flags 242 self.brcount = 0 243 self.declaration_start_line = ln + 1 244 245 # 246 # Management of section contents 247 # 248 def add_text(self, text): 249 self._contents.append(text) 250 251 def contents(self): 252 return '\n'.join(self._contents) + '\n' 253 254 # TODO: rename to emit_message after removal of kernel-doc.pl 255 def emit_msg(self, log_msg, warning=True): 256 """Emit a message""" 257 258 if not warning: 259 self.config.log.info(log_msg) 260 return 261 262 # Delegate warning output to output logic, as this way it 263 # will report warnings/info only for symbols that are output 264 265 self.warnings.append(log_msg) 266 return 267 268 # 269 # Begin a new section. 270 # 271 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): 272 if dump: 273 self.dump_section(start_new = True) 274 self.section = title 275 self.new_start_line = line_no 276 277 def dump_section(self, start_new=True): 278 """ 279 Dumps section contents to arrays/hashes intended for that purpose. 280 """ 281 # 282 # If we have accumulated no contents in the default ("description") 283 # section, don't bother. 284 # 285 if self.section == SECTION_DEFAULT and not self._contents: 286 return 287 name = self.section 288 contents = self.contents() 289 290 if type_param.match(name): 291 name = type_param.group(1) 292 293 self.parameterdescs[name] = contents 294 self.parameterdesc_start_lines[name] = self.new_start_line 295 296 self.new_start_line = 0 297 298 else: 299 if name in self.sections and self.sections[name] != "": 300 # Only warn on user-specified duplicate section names 301 if name != SECTION_DEFAULT: 302 self.emit_msg(self.new_start_line, 303 f"duplicate section name '{name}'\n") 304 # Treat as a new paragraph - add a blank line 305 self.sections[name] += '\n' + contents 306 else: 307 self.sections[name] = contents 308 self.section_start_lines[name] = self.new_start_line 309 self.new_start_line = 0 310 311# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 312 313 if start_new: 314 self.section = SECTION_DEFAULT 315 self._contents = [] 316 317 318class KernelDoc: 319 """ 320 Read a C language source or header FILE and extract embedded 321 documentation comments. 322 """ 323 324 # Section names 325 326 section_context = "Context" 327 section_return = "Return" 328 329 undescribed = "-- undescribed --" 330 331 def __init__(self, config, fname): 332 """Initialize internal variables""" 333 334 self.fname = fname 335 self.config = config 336 337 # Initial state for the state machines 338 self.state = state.NORMAL 339 340 # Store entry currently being processed 341 self.entry = None 342 343 # Place all potential outputs into an array 344 self.entries = [] 345 346 # 347 # We need Python 3.7 for its "dicts remember the insertion 348 # order" guarantee 349 # 350 if sys.version_info.major == 3 and sys.version_info.minor < 7: 351 self.emit_msg(0, 352 'Python 3.7 or later is required for correct results') 353 354 def emit_msg(self, ln, msg, warning=True): 355 """Emit a message""" 356 357 log_msg = f"{self.fname}:{ln} {msg}" 358 359 if self.entry: 360 self.entry.emit_msg(log_msg, warning) 361 return 362 363 if warning: 364 self.config.log.warning(log_msg) 365 else: 366 self.config.log.info(log_msg) 367 368 def dump_section(self, start_new=True): 369 """ 370 Dumps section contents to arrays/hashes intended for that purpose. 371 """ 372 373 if self.entry: 374 self.entry.dump_section(start_new) 375 376 # TODO: rename it to store_declaration after removal of kernel-doc.pl 377 def output_declaration(self, dtype, name, **args): 378 """ 379 Stores the entry into an entry array. 380 381 The actual output and output filters will be handled elsewhere 382 """ 383 384 item = KdocItem(name, dtype, self.entry.declaration_start_line, **args) 385 item.warnings = self.entry.warnings 386 387 # Drop empty sections 388 # TODO: improve empty sections logic to emit warnings 389 sections = self.entry.sections 390 for section in ["Description", "Return"]: 391 if section in sections and not sections[section].rstrip(): 392 del sections[section] 393 item.set_sections(sections, self.entry.section_start_lines) 394 item.set_params(self.entry.parameterlist, self.entry.parameterdescs, 395 self.entry.parametertypes, 396 self.entry.parameterdesc_start_lines) 397 self.entries.append(item) 398 399 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 400 401 def reset_state(self, ln): 402 """ 403 Ancillary routine to create a new entry. It initializes all 404 variables used by the state machine. 405 """ 406 407 self.entry = KernelEntry(self.config, ln) 408 409 # State flags 410 self.state = state.NORMAL 411 412 def push_parameter(self, ln, decl_type, param, dtype, 413 org_arg, declaration_name): 414 """ 415 Store parameters and their descriptions at self.entry. 416 """ 417 418 if self.entry.anon_struct_union and dtype == "" and param == "}": 419 return # Ignore the ending }; from anonymous struct/union 420 421 self.entry.anon_struct_union = False 422 423 param = KernRe(r'[\[\)].*').sub('', param, count=1) 424 425 if dtype == "" and param.endswith("..."): 426 if KernRe(r'\w\.\.\.$').search(param): 427 # For named variable parameters of the form `x...`, 428 # remove the dots 429 param = param[:-3] 430 else: 431 # Handles unnamed variable parameters 432 param = "..." 433 434 if param not in self.entry.parameterdescs or \ 435 not self.entry.parameterdescs[param]: 436 437 self.entry.parameterdescs[param] = "variable arguments" 438 439 elif dtype == "" and (not param or param == "void"): 440 param = "void" 441 self.entry.parameterdescs[param] = "no arguments" 442 443 elif dtype == "" and param in ["struct", "union"]: 444 # Handle unnamed (anonymous) union or struct 445 dtype = param 446 param = "{unnamed_" + param + "}" 447 self.entry.parameterdescs[param] = "anonymous\n" 448 self.entry.anon_struct_union = True 449 450 # Handle cache group enforcing variables: they do not need 451 # to be described in header files 452 elif "__cacheline_group" in param: 453 # Ignore __cacheline_group_begin and __cacheline_group_end 454 return 455 456 # Warn if parameter has no description 457 # (but ignore ones starting with # as these are not parameters 458 # but inline preprocessor statements) 459 if param not in self.entry.parameterdescs and not param.startswith("#"): 460 self.entry.parameterdescs[param] = self.undescribed 461 462 if "." not in param: 463 if decl_type == 'function': 464 dname = f"{decl_type} parameter" 465 else: 466 dname = f"{decl_type} member" 467 468 self.emit_msg(ln, 469 f"{dname} '{param}' not described in '{declaration_name}'") 470 471 # Strip spaces from param so that it is one continuous string on 472 # parameterlist. This fixes a problem where check_sections() 473 # cannot find a parameter like "addr[6 + 2]" because it actually 474 # appears as "addr[6", "+", "2]" on the parameter list. 475 # However, it's better to maintain the param string unchanged for 476 # output, so just weaken the string compare in check_sections() 477 # to ignore "[blah" in a parameter string. 478 479 self.entry.parameterlist.append(param) 480 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 481 self.entry.parametertypes[param] = org_arg 482 483 484 def create_parameter_list(self, ln, decl_type, args, 485 splitter, declaration_name): 486 """ 487 Creates a list of parameters, storing them at self.entry. 488 """ 489 490 # temporarily replace all commas inside function pointer definition 491 arg_expr = KernRe(r'(\([^\),]+),') 492 while arg_expr.search(args): 493 args = arg_expr.sub(r"\1#", args) 494 495 for arg in args.split(splitter): 496 # Ignore argument attributes 497 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 498 499 # Strip leading/trailing spaces 500 arg = arg.strip() 501 arg = KernRe(r'\s+').sub(' ', arg, count=1) 502 503 if arg.startswith('#'): 504 # Treat preprocessor directive as a typeless variable just to fill 505 # corresponding data structures "correctly". Catch it later in 506 # output_* subs. 507 508 # Treat preprocessor directive as a typeless variable 509 self.push_parameter(ln, decl_type, arg, "", 510 "", declaration_name) 511 # 512 # The pointer-to-function case. 513 # 514 elif KernRe(r'\(.+\)\s*\(').search(arg): 515 arg = arg.replace('#', ',') 516 r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" 517 r'([\w\[\].]*)' # Capture the name and possible [array] 518 r'\s*\)') # Make sure the trailing ")" is there 519 if r.match(arg): 520 param = r.group(1) 521 else: 522 self.emit_msg(ln, f"Invalid param: {arg}") 523 param = arg 524 dtype = arg.replace(param, '') 525 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 526 # 527 # The array-of-pointers case. Dig the parameter name out from the middle 528 # of the declaration. 529 # 530 elif KernRe(r'\(.+\)\s*\[').search(arg): 531 r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" 532 r'([\w.]*?)' # The actual pointer name 533 r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] 534 if r.match(arg): 535 param = r.group(1) 536 else: 537 self.emit_msg(ln, f"Invalid param: {arg}") 538 param = arg 539 dtype = arg.replace(param, '') 540 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 541 elif arg: 542 # 543 # Clean up extraneous spaces and split the string at commas; the first 544 # element of the resulting list will also include the type information. 545 # 546 arg = KernRe(r'\s*:\s*').sub(":", arg) 547 arg = KernRe(r'\s*\[').sub('[', arg) 548 args = KernRe(r'\s*,\s*').split(arg) 549 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 550 # 551 # args[0] has a string of "type a". If "a" includes an [array] 552 # declaration, we want to not be fooled by any white space inside 553 # the brackets, so detect and handle that case specially. 554 # 555 r = KernRe(r'^([^[\]]*\s+)(.*)$') 556 if r.match(args[0]): 557 args[0] = r.group(2) 558 dtype = r.group(1) 559 else: 560 # No space in args[0]; this seems wrong but preserves previous behavior 561 dtype = '' 562 563 bitfield_re = KernRe(r'(.*?):(\w+)') 564 for param in args: 565 # 566 # For pointers, shift the star(s) from the variable name to the 567 # type declaration. 568 # 569 r = KernRe(r'^(\*+)\s*(.*)') 570 if r.match(param): 571 self.push_parameter(ln, decl_type, r.group(2), 572 f"{dtype} {r.group(1)}", 573 arg, declaration_name) 574 # 575 # Perform a similar shift for bitfields. 576 # 577 elif bitfield_re.search(param): 578 if dtype != "": # Skip unnamed bit-fields 579 self.push_parameter(ln, decl_type, bitfield_re.group(1), 580 f"{dtype}:{bitfield_re.group(2)}", 581 arg, declaration_name) 582 else: 583 self.push_parameter(ln, decl_type, param, dtype, 584 arg, declaration_name) 585 586 def check_sections(self, ln, decl_name, decl_type): 587 """ 588 Check for errors inside sections, emitting warnings if not found 589 parameters are described. 590 """ 591 for section in self.entry.sections: 592 if section not in self.entry.parameterlist and \ 593 not known_sections.search(section): 594 if decl_type == 'function': 595 dname = f"{decl_type} parameter" 596 else: 597 dname = f"{decl_type} member" 598 self.emit_msg(ln, 599 f"Excess {dname} '{section}' description in '{decl_name}'") 600 601 def check_return_section(self, ln, declaration_name, return_type): 602 """ 603 If the function doesn't return void, warns about the lack of a 604 return description. 605 """ 606 607 if not self.config.wreturn: 608 return 609 610 # Ignore an empty return type (It's a macro) 611 # Ignore functions with a "void" return type (but not "void *") 612 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 613 return 614 615 if not self.entry.sections.get("Return", None): 616 self.emit_msg(ln, 617 f"No description found for return value of '{declaration_name}'") 618 619 # 620 # Split apart a structure prototype; returns (struct|union, name, members) or None 621 # 622 def split_struct_proto(self, proto): 623 type_pattern = r'(struct|union)' 624 qualifiers = [ 625 "__attribute__", 626 "__packed", 627 "__aligned", 628 "____cacheline_aligned_in_smp", 629 "____cacheline_aligned", 630 ] 631 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 632 633 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 634 if r.search(proto): 635 return (r.group(1), r.group(2), r.group(3)) 636 else: 637 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 638 if r.search(proto): 639 return (r.group(1), r.group(3), r.group(2)) 640 return None 641 # 642 # Rewrite the members of a structure or union for easier formatting later on. 643 # Among other things, this function will turn a member like: 644 # 645 # struct { inner_members; } foo; 646 # 647 # into: 648 # 649 # struct foo; inner_members; 650 # 651 def rewrite_struct_members(self, members): 652 # 653 # Process struct/union members from the most deeply nested outward. The 654 # trick is in the ^{ below - it prevents a match of an outer struct/union 655 # until the inner one has been munged (removing the "{" in the process). 656 # 657 struct_members = KernRe(r'(struct|union)' # 0: declaration type 658 r'([^\{\};]+)' # 1: possible name 659 r'(\{)' 660 r'([^\{\}]*)' # 3: Contents of declaration 661 r'(\})' 662 r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration 663 tuples = struct_members.findall(members) 664 while tuples: 665 for t in tuples: 666 newmember = "" 667 oldmember = "".join(t) # Reconstruct the original formatting 668 dtype, name, lbr, content, rbr, rest, semi = t 669 # 670 # Pass through each field name, normalizing the form and formatting. 671 # 672 for s_id in rest.split(','): 673 s_id = s_id.strip() 674 newmember += f"{dtype} {s_id}; " 675 # 676 # Remove bitfield/array/pointer info, getting the bare name. 677 # 678 s_id = KernRe(r'[:\[].*').sub('', s_id) 679 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 680 # 681 # Pass through the members of this inner structure/union. 682 # 683 for arg in content.split(';'): 684 arg = arg.strip() 685 # 686 # Look for (type)(*name)(args) - pointer to function 687 # 688 r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') 689 if r.match(arg): 690 dtype, name, extra = r.group(1), r.group(2), r.group(3) 691 # Pointer-to-function 692 if not s_id: 693 # Anonymous struct/union 694 newmember += f"{dtype}{name}{extra}; " 695 else: 696 newmember += f"{dtype}{s_id}.{name}{extra}; " 697 # 698 # Otherwise a non-function member. 699 # 700 else: 701 # 702 # Remove bitmap and array portions and spaces around commas 703 # 704 arg = KernRe(r':\s*\d+\s*').sub('', arg) 705 arg = KernRe(r'\[.*\]').sub('', arg) 706 arg = KernRe(r'\s*,\s*').sub(',', arg) 707 # 708 # Look for a normal decl - "type name[,name...]" 709 # 710 r = KernRe(r'(.*)\s+([\S+,]+)') 711 if r.search(arg): 712 for name in r.group(2).split(','): 713 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) 714 if not s_id: 715 # Anonymous struct/union 716 newmember += f"{r.group(1)} {name}; " 717 else: 718 newmember += f"{r.group(1)} {s_id}.{name}; " 719 else: 720 newmember += f"{arg}; " 721 # 722 # At the end of the s_id loop, replace the original declaration with 723 # the munged version. 724 # 725 members = members.replace(oldmember, newmember) 726 # 727 # End of the tuple loop - search again and see if there are outer members 728 # that now turn up. 729 # 730 tuples = struct_members.findall(members) 731 return members 732 733 # 734 # Format the struct declaration into a standard form for inclusion in the 735 # resulting docs. 736 # 737 def format_struct_decl(self, declaration): 738 # 739 # Insert newlines, get rid of extra spaces. 740 # 741 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 742 declaration = KernRe(r'\}\s+;').sub('};', declaration) 743 # 744 # Format inline enums with each member on its own line. 745 # 746 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 747 while r.search(declaration): 748 declaration = r.sub(r'\1,\n\2', declaration) 749 # 750 # Now go through and supply the right number of tabs 751 # for each line. 752 # 753 def_args = declaration.split('\n') 754 level = 1 755 declaration = "" 756 for clause in def_args: 757 clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) 758 if clause: 759 if '}' in clause and level > 1: 760 level -= 1 761 if not clause.startswith('#'): 762 declaration += "\t" * level 763 declaration += "\t" + clause + "\n" 764 if "{" in clause and "}" not in clause: 765 level += 1 766 return declaration 767 768 769 def dump_struct(self, ln, proto): 770 """ 771 Store an entry for an struct or union 772 """ 773 # 774 # Do the basic parse to get the pieces of the declaration. 775 # 776 struct_parts = self.split_struct_proto(proto) 777 if not struct_parts: 778 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 779 return 780 decl_type, declaration_name, members = struct_parts 781 782 if self.entry.identifier != declaration_name: 783 self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " 784 f"Prototype was for {decl_type} {declaration_name} instead\n") 785 return 786 # 787 # Go through the list of members applying all of our transformations. 788 # 789 members = trim_private_members(members) 790 for search, sub in struct_prefixes: 791 members = search.sub(sub, members) 792 793 nested = NestedMatch() 794 for search, sub in struct_nested_prefixes: 795 members = nested.sub(search, sub, members) 796 # 797 # Deal with embedded struct and union members, and drop enums entirely. 798 # 799 declaration = members 800 members = self.rewrite_struct_members(members) 801 members = re.sub(r'(\{[^\{\}]*\})', '', members) 802 # 803 # Output the result and we are done. 804 # 805 self.create_parameter_list(ln, decl_type, members, ';', 806 declaration_name) 807 self.check_sections(ln, declaration_name, decl_type) 808 self.output_declaration(decl_type, declaration_name, 809 definition=self.format_struct_decl(declaration), 810 purpose=self.entry.declaration_purpose) 811 812 def dump_enum(self, ln, proto): 813 """ 814 Stores an enum inside self.entries array. 815 """ 816 # 817 # Strip preprocessor directives. Note that this depends on the 818 # trailing semicolon we added in process_proto_type(). 819 # 820 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 821 # 822 # Parse out the name and members of the enum. Typedef form first. 823 # 824 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 825 if r.search(proto): 826 declaration_name = r.group(2) 827 members = trim_private_members(r.group(1)) 828 # 829 # Failing that, look for a straight enum 830 # 831 else: 832 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 833 if r.match(proto): 834 declaration_name = r.group(1) 835 members = trim_private_members(r.group(2)) 836 # 837 # OK, this isn't going to work. 838 # 839 else: 840 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 841 return 842 # 843 # Make sure we found what we were expecting. 844 # 845 if self.entry.identifier != declaration_name: 846 if self.entry.identifier == "": 847 self.emit_msg(ln, 848 f"{proto}: wrong kernel-doc identifier on prototype") 849 else: 850 self.emit_msg(ln, 851 f"expecting prototype for enum {self.entry.identifier}. " 852 f"Prototype was for enum {declaration_name} instead") 853 return 854 855 if not declaration_name: 856 declaration_name = "(anonymous)" 857 # 858 # Parse out the name of each enum member, and verify that we 859 # have a description for it. 860 # 861 member_set = set() 862 members = KernRe(r'\([^;)]*\)').sub('', members) 863 for arg in members.split(','): 864 if not arg: 865 continue 866 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 867 self.entry.parameterlist.append(arg) 868 if arg not in self.entry.parameterdescs: 869 self.entry.parameterdescs[arg] = self.undescribed 870 self.emit_msg(ln, 871 f"Enum value '{arg}' not described in enum '{declaration_name}'") 872 member_set.add(arg) 873 # 874 # Ensure that every described member actually exists in the enum. 875 # 876 for k in self.entry.parameterdescs: 877 if k not in member_set: 878 self.emit_msg(ln, 879 f"Excess enum value '%{k}' description in '{declaration_name}'") 880 881 self.output_declaration('enum', declaration_name, 882 purpose=self.entry.declaration_purpose) 883 884 def dump_declaration(self, ln, prototype): 885 """ 886 Stores a data declaration inside self.entries array. 887 """ 888 889 if self.entry.decl_type == "enum": 890 self.dump_enum(ln, prototype) 891 elif self.entry.decl_type == "typedef": 892 self.dump_typedef(ln, prototype) 893 elif self.entry.decl_type in ["union", "struct"]: 894 self.dump_struct(ln, prototype) 895 else: 896 # This would be a bug 897 self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') 898 899 def dump_function(self, ln, prototype): 900 """ 901 Stores a function of function macro inside self.entries array. 902 """ 903 904 func_macro = False 905 return_type = '' 906 decl_type = 'function' 907 908 # Prefixes that would be removed 909 sub_prefixes = [ 910 (r"^static +", "", 0), 911 (r"^extern +", "", 0), 912 (r"^asmlinkage +", "", 0), 913 (r"^inline +", "", 0), 914 (r"^__inline__ +", "", 0), 915 (r"^__inline +", "", 0), 916 (r"^__always_inline +", "", 0), 917 (r"^noinline +", "", 0), 918 (r"^__FORTIFY_INLINE +", "", 0), 919 (r"__init +", "", 0), 920 (r"__init_or_module +", "", 0), 921 (r"__deprecated +", "", 0), 922 (r"__flatten +", "", 0), 923 (r"__meminit +", "", 0), 924 (r"__must_check +", "", 0), 925 (r"__weak +", "", 0), 926 (r"__sched +", "", 0), 927 (r"_noprof", "", 0), 928 (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), 929 (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), 930 (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), 931 (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), 932 (r"__attribute_const__ +", "", 0), 933 934 # It seems that Python support for re.X is broken: 935 # At least for me (Python 3.13), this didn't work 936# (r""" 937# __attribute__\s*\(\( 938# (?: 939# [\w\s]+ # attribute name 940# (?:\([^)]*\))? # attribute arguments 941# \s*,? # optional comma at the end 942# )+ 943# \)\)\s+ 944# """, "", re.X), 945 946 # So, remove whitespaces and comments from it 947 (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), 948 ] 949 950 for search, sub, flags in sub_prefixes: 951 prototype = KernRe(search, flags).sub(sub, prototype) 952 953 # Macros are a special case, as they change the prototype format 954 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 955 if new_proto != prototype: 956 is_define_proto = True 957 prototype = new_proto 958 else: 959 is_define_proto = False 960 961 # Yes, this truly is vile. We are looking for: 962 # 1. Return type (may be nothing if we're looking at a macro) 963 # 2. Function name 964 # 3. Function parameters. 965 # 966 # All the while we have to watch out for function pointer parameters 967 # (which IIRC is what the two sections are for), C types (these 968 # regexps don't even start to express all the possibilities), and 969 # so on. 970 # 971 # If you mess with these regexps, it's a good idea to check that 972 # the following functions' documentation still comes out right: 973 # - parport_register_device (function pointer parameters) 974 # - atomic_set (macro) 975 # - pci_match_device, __copy_to_user (long return type) 976 977 name = r'[a-zA-Z0-9_~:]+' 978 prototype_end1 = r'[^\(]*' 979 prototype_end2 = r'[^\{]*' 980 prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)' 981 982 # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing group. 983 # So, this needs to be mapped in Python with (?:...)? or (?:...)+ 984 985 type1 = r'(?:[\w\s]+)?' 986 type2 = r'(?:[\w\s]+\*+)+' 987 988 found = False 989 990 if is_define_proto: 991 r = KernRe(r'^()(' + name + r')\s+') 992 993 if r.search(prototype): 994 return_type = '' 995 declaration_name = r.group(2) 996 func_macro = True 997 998 found = True 999 1000 if not found: 1001 patterns = [ 1002 rf'^()({name})\s*{prototype_end}', 1003 rf'^({type1})\s+({name})\s*{prototype_end}', 1004 rf'^({type2})\s*({name})\s*{prototype_end}', 1005 ] 1006 1007 for p in patterns: 1008 r = KernRe(p) 1009 1010 if r.match(prototype): 1011 1012 return_type = r.group(1) 1013 declaration_name = r.group(2) 1014 args = r.group(3) 1015 1016 self.create_parameter_list(ln, decl_type, args, ',', 1017 declaration_name) 1018 1019 found = True 1020 break 1021 if not found: 1022 self.emit_msg(ln, 1023 f"cannot understand function prototype: '{prototype}'") 1024 return 1025 1026 if self.entry.identifier != declaration_name: 1027 self.emit_msg(ln, 1028 f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") 1029 return 1030 1031 self.check_sections(ln, declaration_name, "function") 1032 1033 self.check_return_section(ln, declaration_name, return_type) 1034 1035 if 'typedef' in return_type: 1036 self.output_declaration(decl_type, declaration_name, 1037 typedef=True, 1038 functiontype=return_type, 1039 purpose=self.entry.declaration_purpose, 1040 func_macro=func_macro) 1041 else: 1042 self.output_declaration(decl_type, declaration_name, 1043 typedef=False, 1044 functiontype=return_type, 1045 purpose=self.entry.declaration_purpose, 1046 func_macro=func_macro) 1047 1048 def dump_typedef(self, ln, proto): 1049 """ 1050 Stores a typedef inside self.entries array. 1051 """ 1052 1053 typedef_type = r'((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1054 typedef_ident = r'\*?\s*(\w\S+)\s*' 1055 typedef_args = r'\s*\((.*)\);' 1056 1057 typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1058 typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args) 1059 1060 # Strip comments 1061 proto = KernRe(r'/\*.*?\*/', flags=re.S).sub('', proto) 1062 1063 # Parse function typedef prototypes 1064 for r in [typedef1, typedef2]: 1065 if not r.match(proto): 1066 continue 1067 1068 return_type = r.group(1).strip() 1069 declaration_name = r.group(2) 1070 args = r.group(3) 1071 1072 if self.entry.identifier != declaration_name: 1073 self.emit_msg(ln, 1074 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1075 return 1076 1077 decl_type = 'function' 1078 self.create_parameter_list(ln, decl_type, args, ',', declaration_name) 1079 1080 self.output_declaration(decl_type, declaration_name, 1081 typedef=True, 1082 functiontype=return_type, 1083 purpose=self.entry.declaration_purpose) 1084 return 1085 1086 # Handle nested parentheses or brackets 1087 r = KernRe(r'(\(*.\)\s*|\[*.\]\s*);$') 1088 while r.search(proto): 1089 proto = r.sub('', proto) 1090 1091 # Parse simple typedefs 1092 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1093 if r.match(proto): 1094 declaration_name = r.group(1) 1095 1096 if self.entry.identifier != declaration_name: 1097 self.emit_msg(ln, 1098 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1099 return 1100 1101 self.output_declaration('typedef', declaration_name, 1102 purpose=self.entry.declaration_purpose) 1103 return 1104 1105 self.emit_msg(ln, "error: Cannot parse typedef!") 1106 1107 @staticmethod 1108 def process_export(function_set, line): 1109 """ 1110 process EXPORT_SYMBOL* tags 1111 1112 This method doesn't use any variable from the class, so declare it 1113 with a staticmethod decorator. 1114 """ 1115 1116 # We support documenting some exported symbols with different 1117 # names. A horrible hack. 1118 suffixes = [ '_noprof' ] 1119 1120 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1121 # multiple export lines would violate Kernel coding style. 1122 1123 if export_symbol.search(line): 1124 symbol = export_symbol.group(2) 1125 elif export_symbol_ns.search(line): 1126 symbol = export_symbol_ns.group(2) 1127 else: 1128 return False 1129 # 1130 # Found an export, trim out any special suffixes 1131 # 1132 for suffix in suffixes: 1133 # Be backward compatible with Python < 3.9 1134 if symbol.endswith(suffix): 1135 symbol = symbol[:-len(suffix)] 1136 function_set.add(symbol) 1137 return True 1138 1139 def process_normal(self, ln, line): 1140 """ 1141 STATE_NORMAL: looking for the /** to begin everything. 1142 """ 1143 1144 if not doc_start.match(line): 1145 return 1146 1147 # start a new entry 1148 self.reset_state(ln) 1149 1150 # next line is always the function name 1151 self.state = state.NAME 1152 1153 def process_name(self, ln, line): 1154 """ 1155 STATE_NAME: Looking for the "name - description" line 1156 """ 1157 # 1158 # Check for a DOC: block and handle them specially. 1159 # 1160 if doc_block.search(line): 1161 1162 if not doc_block.group(1): 1163 self.entry.begin_section(ln, "Introduction") 1164 else: 1165 self.entry.begin_section(ln, doc_block.group(1)) 1166 1167 self.entry.identifier = self.entry.section 1168 self.state = state.DOCBLOCK 1169 # 1170 # Otherwise we're looking for a normal kerneldoc declaration line. 1171 # 1172 elif doc_decl.search(line): 1173 self.entry.identifier = doc_decl.group(1) 1174 1175 # Test for data declaration 1176 if doc_begin_data.search(line): 1177 self.entry.decl_type = doc_begin_data.group(1) 1178 self.entry.identifier = doc_begin_data.group(2) 1179 # 1180 # Look for a function description 1181 # 1182 elif doc_begin_func.search(line): 1183 self.entry.identifier = doc_begin_func.group(1) 1184 self.entry.decl_type = "function" 1185 # 1186 # We struck out. 1187 # 1188 else: 1189 self.emit_msg(ln, 1190 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") 1191 self.state = state.NORMAL 1192 return 1193 # 1194 # OK, set up for a new kerneldoc entry. 1195 # 1196 self.state = state.BODY 1197 self.entry.identifier = self.entry.identifier.strip(" ") 1198 # if there's no @param blocks need to set up default section here 1199 self.entry.begin_section(ln + 1) 1200 # 1201 # Find the description portion, which *should* be there but 1202 # isn't always. 1203 # (We should be able to capture this from the previous parsing - someday) 1204 # 1205 r = KernRe("[-:](.*)") 1206 if r.search(line): 1207 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1208 self.state = state.DECLARATION 1209 else: 1210 self.entry.declaration_purpose = "" 1211 1212 if not self.entry.declaration_purpose and self.config.wshort_desc: 1213 self.emit_msg(ln, 1214 f"missing initial short description on line:\n{line}") 1215 1216 if not self.entry.identifier and self.entry.decl_type != "enum": 1217 self.emit_msg(ln, 1218 f"wrong kernel-doc identifier on line:\n{line}") 1219 self.state = state.NORMAL 1220 1221 if self.config.verbose: 1222 self.emit_msg(ln, 1223 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1224 warning=False) 1225 # 1226 # Failed to find an identifier. Emit a warning 1227 # 1228 else: 1229 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1230 1231 # 1232 # Helper function to determine if a new section is being started. 1233 # 1234 def is_new_section(self, ln, line): 1235 if doc_sect.search(line): 1236 self.state = state.BODY 1237 # 1238 # Pick out the name of our new section, tweaking it if need be. 1239 # 1240 newsection = doc_sect.group(1) 1241 if newsection.lower() == 'description': 1242 newsection = 'Description' 1243 elif newsection.lower() == 'context': 1244 newsection = 'Context' 1245 self.state = state.SPECIAL_SECTION 1246 elif newsection.lower() in ["@return", "@returns", 1247 "return", "returns"]: 1248 newsection = "Return" 1249 self.state = state.SPECIAL_SECTION 1250 elif newsection[0] == '@': 1251 self.state = state.SPECIAL_SECTION 1252 # 1253 # Initialize the contents, and get the new section going. 1254 # 1255 newcontents = doc_sect.group(2) 1256 if not newcontents: 1257 newcontents = "" 1258 self.dump_section() 1259 self.entry.begin_section(ln, newsection) 1260 self.entry.leading_space = None 1261 1262 self.entry.add_text(newcontents.lstrip()) 1263 return True 1264 return False 1265 1266 # 1267 # Helper function to detect (and effect) the end of a kerneldoc comment. 1268 # 1269 def is_comment_end(self, ln, line): 1270 if doc_end.search(line): 1271 self.dump_section() 1272 1273 # Look for doc_com + <text> + doc_end: 1274 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') 1275 if r.match(line): 1276 self.emit_msg(ln, f"suspicious ending line: {line}") 1277 1278 self.entry.prototype = "" 1279 self.entry.new_start_line = ln + 1 1280 1281 self.state = state.PROTO 1282 return True 1283 return False 1284 1285 1286 def process_decl(self, ln, line): 1287 """ 1288 STATE_DECLARATION: We've seen the beginning of a declaration 1289 """ 1290 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1291 return 1292 # 1293 # Look for anything with the " * " line beginning. 1294 # 1295 if doc_content.search(line): 1296 cont = doc_content.group(1) 1297 # 1298 # A blank line means that we have moved out of the declaration 1299 # part of the comment (without any "special section" parameter 1300 # descriptions). 1301 # 1302 if cont == "": 1303 self.state = state.BODY 1304 # 1305 # Otherwise we have more of the declaration section to soak up. 1306 # 1307 else: 1308 self.entry.declaration_purpose = \ 1309 trim_whitespace(self.entry.declaration_purpose + ' ' + cont) 1310 else: 1311 # Unknown line, ignore 1312 self.emit_msg(ln, f"bad line: {line}") 1313 1314 1315 def process_special(self, ln, line): 1316 """ 1317 STATE_SPECIAL_SECTION: a section ending with a blank line 1318 """ 1319 # 1320 # If we have hit a blank line (only the " * " marker), then this 1321 # section is done. 1322 # 1323 if KernRe(r"\s*\*\s*$").match(line): 1324 self.entry.begin_section(ln, dump = True) 1325 self.state = state.BODY 1326 return 1327 # 1328 # Not a blank line, look for the other ways to end the section. 1329 # 1330 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1331 return 1332 # 1333 # OK, we should have a continuation of the text for this section. 1334 # 1335 if doc_content.search(line): 1336 cont = doc_content.group(1) 1337 # 1338 # If the lines of text after the first in a special section have 1339 # leading white space, we need to trim it out or Sphinx will get 1340 # confused. For the second line (the None case), see what we 1341 # find there and remember it. 1342 # 1343 if self.entry.leading_space is None: 1344 r = KernRe(r'^(\s+)') 1345 if r.match(cont): 1346 self.entry.leading_space = len(r.group(1)) 1347 else: 1348 self.entry.leading_space = 0 1349 # 1350 # Otherwise, before trimming any leading chars, be *sure* 1351 # that they are white space. We should maybe warn if this 1352 # isn't the case. 1353 # 1354 for i in range(0, self.entry.leading_space): 1355 if cont[i] != " ": 1356 self.entry.leading_space = i 1357 break 1358 # 1359 # Add the trimmed result to the section and we're done. 1360 # 1361 self.entry.add_text(cont[self.entry.leading_space:]) 1362 else: 1363 # Unknown line, ignore 1364 self.emit_msg(ln, f"bad line: {line}") 1365 1366 def process_body(self, ln, line): 1367 """ 1368 STATE_BODY: the bulk of a kerneldoc comment. 1369 """ 1370 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1371 return 1372 1373 if doc_content.search(line): 1374 cont = doc_content.group(1) 1375 self.entry.add_text(cont) 1376 else: 1377 # Unknown line, ignore 1378 self.emit_msg(ln, f"bad line: {line}") 1379 1380 def process_inline_name(self, ln, line): 1381 """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" 1382 1383 if doc_inline_sect.search(line): 1384 self.entry.begin_section(ln, doc_inline_sect.group(1)) 1385 self.entry.add_text(doc_inline_sect.group(2).lstrip()) 1386 self.state = state.INLINE_TEXT 1387 elif doc_inline_end.search(line): 1388 self.dump_section() 1389 self.state = state.PROTO 1390 elif doc_content.search(line): 1391 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") 1392 self.state = state.PROTO 1393 # else ... ?? 1394 1395 def process_inline_text(self, ln, line): 1396 """STATE_INLINE_TEXT: docbook comments within a prototype.""" 1397 1398 if doc_inline_end.search(line): 1399 self.dump_section() 1400 self.state = state.PROTO 1401 elif doc_content.search(line): 1402 self.entry.add_text(doc_content.group(1)) 1403 # else ... ?? 1404 1405 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1406 """ 1407 Handle syscall definitions 1408 """ 1409 1410 is_void = False 1411 1412 # Strip newlines/CR's 1413 proto = re.sub(r'[\r\n]+', ' ', proto) 1414 1415 # Check if it's a SYSCALL_DEFINE0 1416 if 'SYSCALL_DEFINE0' in proto: 1417 is_void = True 1418 1419 # Replace SYSCALL_DEFINE with correct return type & function name 1420 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1421 1422 r = KernRe(r'long\s+(sys_.*?),') 1423 if r.search(proto): 1424 proto = KernRe(',').sub('(', proto, count=1) 1425 elif is_void: 1426 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1427 1428 # Now delete all of the odd-numbered commas in the proto 1429 # so that argument types & names don't have a comma between them 1430 count = 0 1431 length = len(proto) 1432 1433 if is_void: 1434 length = 0 # skip the loop if is_void 1435 1436 for ix in range(length): 1437 if proto[ix] == ',': 1438 count += 1 1439 if count % 2 == 1: 1440 proto = proto[:ix] + ' ' + proto[ix + 1:] 1441 1442 return proto 1443 1444 def tracepoint_munge(self, ln, proto): 1445 """ 1446 Handle tracepoint definitions 1447 """ 1448 1449 tracepointname = None 1450 tracepointargs = None 1451 1452 # Match tracepoint name based on different patterns 1453 r = KernRe(r'TRACE_EVENT\((.*?),') 1454 if r.search(proto): 1455 tracepointname = r.group(1) 1456 1457 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1458 if r.search(proto): 1459 tracepointname = r.group(1) 1460 1461 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1462 if r.search(proto): 1463 tracepointname = r.group(2) 1464 1465 if tracepointname: 1466 tracepointname = tracepointname.lstrip() 1467 1468 r = KernRe(r'TP_PROTO\((.*?)\)') 1469 if r.search(proto): 1470 tracepointargs = r.group(1) 1471 1472 if not tracepointname or not tracepointargs: 1473 self.emit_msg(ln, 1474 f"Unrecognized tracepoint format:\n{proto}\n") 1475 else: 1476 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1477 self.entry.identifier = f"trace_{self.entry.identifier}" 1478 1479 return proto 1480 1481 def process_proto_function(self, ln, line): 1482 """Ancillary routine to process a function prototype""" 1483 1484 # strip C99-style comments to end of line 1485 line = KernRe(r"//.*$", re.S).sub('', line) 1486 # 1487 # Soak up the line's worth of prototype text, stopping at { or ; if present. 1488 # 1489 if KernRe(r'\s*#\s*define').match(line): 1490 self.entry.prototype = line 1491 elif not line.startswith('#'): # skip other preprocessor stuff 1492 r = KernRe(r'([^\{]*)') 1493 if r.match(line): 1494 self.entry.prototype += r.group(1) + " " 1495 # 1496 # If we now have the whole prototype, clean it up and declare victory. 1497 # 1498 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1499 # strip comments and surrounding spaces 1500 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() 1501 # 1502 # Handle self.entry.prototypes for function pointers like: 1503 # int (*pcs_config)(struct foo) 1504 # by turning it into 1505 # int pcs_config(struct foo) 1506 # 1507 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1508 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1509 # 1510 # Handle special declaration syntaxes 1511 # 1512 if 'SYSCALL_DEFINE' in self.entry.prototype: 1513 self.entry.prototype = self.syscall_munge(ln, 1514 self.entry.prototype) 1515 else: 1516 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1517 if r.search(self.entry.prototype): 1518 self.entry.prototype = self.tracepoint_munge(ln, 1519 self.entry.prototype) 1520 # 1521 # ... and we're done 1522 # 1523 self.dump_function(ln, self.entry.prototype) 1524 self.reset_state(ln) 1525 1526 def process_proto_type(self, ln, line): 1527 """Ancillary routine to process a type""" 1528 1529 # Strip C99-style comments and surrounding whitespace 1530 line = KernRe(r"//.*$", re.S).sub('', line).strip() 1531 if not line: 1532 return # nothing to see here 1533 1534 # To distinguish preprocessor directive from regular declaration later. 1535 if line.startswith('#'): 1536 line += ";" 1537 # 1538 # Split the declaration on any of { } or ;, and accumulate pieces 1539 # until we hit a semicolon while not inside {brackets} 1540 # 1541 r = KernRe(r'(.*?)([{};])') 1542 for chunk in r.split(line): 1543 if chunk: # Ignore empty matches 1544 self.entry.prototype += chunk 1545 # 1546 # This cries out for a match statement ... someday after we can 1547 # drop Python 3.9 ... 1548 # 1549 if chunk == '{': 1550 self.entry.brcount += 1 1551 elif chunk == '}': 1552 self.entry.brcount -= 1 1553 elif chunk == ';' and self.entry.brcount <= 0: 1554 self.dump_declaration(ln, self.entry.prototype) 1555 self.reset_state(ln) 1556 return 1557 # 1558 # We hit the end of the line while still in the declaration; put 1559 # in a space to represent the newline. 1560 # 1561 self.entry.prototype += ' ' 1562 1563 def process_proto(self, ln, line): 1564 """STATE_PROTO: reading a function/whatever prototype.""" 1565 1566 if doc_inline_oneline.search(line): 1567 self.entry.begin_section(ln, doc_inline_oneline.group(1)) 1568 self.entry.add_text(doc_inline_oneline.group(2)) 1569 self.dump_section() 1570 1571 elif doc_inline_start.search(line): 1572 self.state = state.INLINE_NAME 1573 1574 elif self.entry.decl_type == 'function': 1575 self.process_proto_function(ln, line) 1576 1577 else: 1578 self.process_proto_type(ln, line) 1579 1580 def process_docblock(self, ln, line): 1581 """STATE_DOCBLOCK: within a DOC: block.""" 1582 1583 if doc_end.search(line): 1584 self.dump_section() 1585 self.output_declaration("doc", self.entry.identifier) 1586 self.reset_state(ln) 1587 1588 elif doc_content.search(line): 1589 self.entry.add_text(doc_content.group(1)) 1590 1591 def parse_export(self): 1592 """ 1593 Parses EXPORT_SYMBOL* macros from a single Kernel source file. 1594 """ 1595 1596 export_table = set() 1597 1598 try: 1599 with open(self.fname, "r", encoding="utf8", 1600 errors="backslashreplace") as fp: 1601 1602 for line in fp: 1603 self.process_export(export_table, line) 1604 1605 except IOError: 1606 return None 1607 1608 return export_table 1609 1610 # 1611 # The state/action table telling us which function to invoke in 1612 # each state. 1613 # 1614 state_actions = { 1615 state.NORMAL: process_normal, 1616 state.NAME: process_name, 1617 state.BODY: process_body, 1618 state.DECLARATION: process_decl, 1619 state.SPECIAL_SECTION: process_special, 1620 state.INLINE_NAME: process_inline_name, 1621 state.INLINE_TEXT: process_inline_text, 1622 state.PROTO: process_proto, 1623 state.DOCBLOCK: process_docblock, 1624 } 1625 1626 def parse_kdoc(self): 1627 """ 1628 Open and process each line of a C source file. 1629 The parsing is controlled via a state machine, and the line is passed 1630 to a different process function depending on the state. The process 1631 function may update the state as needed. 1632 1633 Besides parsing kernel-doc tags, it also parses export symbols. 1634 """ 1635 1636 prev = "" 1637 prev_ln = None 1638 export_table = set() 1639 1640 try: 1641 with open(self.fname, "r", encoding="utf8", 1642 errors="backslashreplace") as fp: 1643 for ln, line in enumerate(fp): 1644 1645 line = line.expandtabs().strip("\n") 1646 1647 # Group continuation lines on prototypes 1648 if self.state == state.PROTO: 1649 if line.endswith("\\"): 1650 prev += line.rstrip("\\") 1651 if not prev_ln: 1652 prev_ln = ln 1653 continue 1654 1655 if prev: 1656 ln = prev_ln 1657 line = prev + line 1658 prev = "" 1659 prev_ln = None 1660 1661 self.config.log.debug("%d %s: %s", 1662 ln, state.name[self.state], 1663 line) 1664 1665 # This is an optimization over the original script. 1666 # There, when export_file was used for the same file, 1667 # it was read twice. Here, we use the already-existing 1668 # loop to parse exported symbols as well. 1669 # 1670 if (self.state != state.NORMAL) or \ 1671 not self.process_export(export_table, line): 1672 # Hand this line to the appropriate state handler 1673 self.state_actions[self.state](self, ln, line) 1674 1675 except OSError: 1676 self.config.log.error(f"Error: Cannot open file {self.fname}") 1677 1678 return export_table, self.entries 1679