1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8kdoc_parser 9=========== 10 11Read a C language source or header FILE and extract embedded 12documentation comments 13""" 14 15import sys 16import re 17from pprint import pformat 18 19from kdoc_re import NestedMatch, KernRe 20from kdoc_item import KdocItem 21 22# 23# Regular expressions used to parse kernel-doc markups at KernelDoc class. 24# 25# Let's declare them in lowercase outside any class to make easier to 26# convert from the python script. 27# 28# As those are evaluated at the beginning, no need to cache them 29# 30 31# Allow whitespace at end of comment start. 32doc_start = KernRe(r'^/\*\*\s*$', cache=False) 33 34doc_end = KernRe(r'\*/', cache=False) 35doc_com = KernRe(r'\s*\*\s*', cache=False) 36doc_com_body = KernRe(r'\s*\* ?', cache=False) 37doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 38 39# @params and a strictly limited set of supported section names 40# Specifically: 41# Match @word: 42# @...: 43# @{section-name}: 44# while trying to not match literal block starts like "example::" 45# 46known_section_names = 'description|context|returns?|notes?|examples?' 47known_sections = KernRe(known_section_names, flags = re.I) 48doc_sect = doc_com + \ 49 KernRe(r'\s*(\@[.\w]+|\@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', 50 flags=re.I, cache=False) 51 52doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 53doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 54doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 55doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 56doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) 57attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", 58 flags=re.I | re.S, cache=False) 59 60export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 61export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 62 63type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 64 65# 66# Tests for the beginning of a kerneldoc block in its various forms. 67# 68doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 69doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) 70doc_begin_func = KernRe(str(doc_com) + # initial " * ' 71 r"(?:\w+\s*\*\s*)?" + # type (not captured) 72 r'(?:define\s+)?' + # possible "define" (not captured) 73 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 74 r'(?:[-:].*)?$', # description (not captured) 75 cache = False) 76 77# 78# A little helper to get rid of excess white space 79# 80multi_space = KernRe(r'\s\s+') 81def trim_whitespace(s): 82 return multi_space.sub(' ', s.strip()) 83 84class state: 85 """ 86 State machine enums 87 """ 88 89 # Parser states 90 NORMAL = 0 # normal code 91 NAME = 1 # looking for function name 92 DECLARATION = 2 # We have seen a declaration which might not be done 93 BODY = 3 # the body of the comment 94 SPECIAL_SECTION = 4 # doc section ending with a blank line 95 PROTO = 5 # scanning prototype 96 DOCBLOCK = 6 # documentation block 97 INLINE_NAME = 7 # gathering doc outside main block 98 INLINE_TEXT = 8 # reading the body of inline docs 99 100 name = [ 101 "NORMAL", 102 "NAME", 103 "DECLARATION", 104 "BODY", 105 "SPECIAL_SECTION", 106 "PROTO", 107 "DOCBLOCK", 108 "INLINE_NAME", 109 "INLINE_TEXT", 110 ] 111 112 113SECTION_DEFAULT = "Description" # default section 114 115class KernelEntry: 116 117 def __init__(self, config, ln): 118 self.config = config 119 120 self._contents = [] 121 self.prototype = "" 122 123 self.warnings = [] 124 125 self.parameterlist = [] 126 self.parameterdescs = {} 127 self.parametertypes = {} 128 self.parameterdesc_start_lines = {} 129 130 self.section_start_lines = {} 131 self.sections = {} 132 133 self.anon_struct_union = False 134 135 self.leading_space = None 136 137 # State flags 138 self.brcount = 0 139 self.declaration_start_line = ln + 1 140 141 # 142 # Management of section contents 143 # 144 def add_text(self, text): 145 self._contents.append(text) 146 147 def contents(self): 148 return '\n'.join(self._contents) + '\n' 149 150 # TODO: rename to emit_message after removal of kernel-doc.pl 151 def emit_msg(self, log_msg, warning=True): 152 """Emit a message""" 153 154 if not warning: 155 self.config.log.info(log_msg) 156 return 157 158 # Delegate warning output to output logic, as this way it 159 # will report warnings/info only for symbols that are output 160 161 self.warnings.append(log_msg) 162 return 163 164 # 165 # Begin a new section. 166 # 167 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): 168 if dump: 169 self.dump_section(start_new = True) 170 self.section = title 171 self.new_start_line = line_no 172 173 def dump_section(self, start_new=True): 174 """ 175 Dumps section contents to arrays/hashes intended for that purpose. 176 """ 177 # 178 # If we have accumulated no contents in the default ("description") 179 # section, don't bother. 180 # 181 if self.section == SECTION_DEFAULT and not self._contents: 182 return 183 name = self.section 184 contents = self.contents() 185 186 if type_param.match(name): 187 name = type_param.group(1) 188 189 self.parameterdescs[name] = contents 190 self.parameterdesc_start_lines[name] = self.new_start_line 191 192 self.new_start_line = 0 193 194 else: 195 if name in self.sections and self.sections[name] != "": 196 # Only warn on user-specified duplicate section names 197 if name != SECTION_DEFAULT: 198 self.emit_msg(self.new_start_line, 199 f"duplicate section name '{name}'\n") 200 # Treat as a new paragraph - add a blank line 201 self.sections[name] += '\n' + contents 202 else: 203 self.sections[name] = contents 204 self.section_start_lines[name] = self.new_start_line 205 self.new_start_line = 0 206 207# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 208 209 if start_new: 210 self.section = SECTION_DEFAULT 211 self._contents = [] 212 213 214class KernelDoc: 215 """ 216 Read a C language source or header FILE and extract embedded 217 documentation comments. 218 """ 219 220 # Section names 221 222 section_context = "Context" 223 section_return = "Return" 224 225 undescribed = "-- undescribed --" 226 227 def __init__(self, config, fname): 228 """Initialize internal variables""" 229 230 self.fname = fname 231 self.config = config 232 233 # Initial state for the state machines 234 self.state = state.NORMAL 235 236 # Store entry currently being processed 237 self.entry = None 238 239 # Place all potential outputs into an array 240 self.entries = [] 241 242 # 243 # We need Python 3.7 for its "dicts remember the insertion 244 # order" guarantee 245 # 246 if sys.version_info.major == 3 and sys.version_info.minor < 7: 247 self.emit_msg(0, 248 'Python 3.7 or later is required for correct results') 249 250 def emit_msg(self, ln, msg, warning=True): 251 """Emit a message""" 252 253 log_msg = f"{self.fname}:{ln} {msg}" 254 255 if self.entry: 256 self.entry.emit_msg(log_msg, warning) 257 return 258 259 if warning: 260 self.config.log.warning(log_msg) 261 else: 262 self.config.log.info(log_msg) 263 264 def dump_section(self, start_new=True): 265 """ 266 Dumps section contents to arrays/hashes intended for that purpose. 267 """ 268 269 if self.entry: 270 self.entry.dump_section(start_new) 271 272 # TODO: rename it to store_declaration after removal of kernel-doc.pl 273 def output_declaration(self, dtype, name, **args): 274 """ 275 Stores the entry into an entry array. 276 277 The actual output and output filters will be handled elsewhere 278 """ 279 280 item = KdocItem(name, dtype, self.entry.declaration_start_line, **args) 281 item.warnings = self.entry.warnings 282 283 # Drop empty sections 284 # TODO: improve empty sections logic to emit warnings 285 sections = self.entry.sections 286 for section in ["Description", "Return"]: 287 if section in sections and not sections[section].rstrip(): 288 del sections[section] 289 item.set_sections(sections, self.entry.section_start_lines) 290 item.set_params(self.entry.parameterlist, self.entry.parameterdescs, 291 self.entry.parametertypes, 292 self.entry.parameterdesc_start_lines) 293 self.entries.append(item) 294 295 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 296 297 def reset_state(self, ln): 298 """ 299 Ancillary routine to create a new entry. It initializes all 300 variables used by the state machine. 301 """ 302 303 self.entry = KernelEntry(self.config, ln) 304 305 # State flags 306 self.state = state.NORMAL 307 308 def push_parameter(self, ln, decl_type, param, dtype, 309 org_arg, declaration_name): 310 """ 311 Store parameters and their descriptions at self.entry. 312 """ 313 314 if self.entry.anon_struct_union and dtype == "" and param == "}": 315 return # Ignore the ending }; from anonymous struct/union 316 317 self.entry.anon_struct_union = False 318 319 param = KernRe(r'[\[\)].*').sub('', param, count=1) 320 321 if dtype == "" and param.endswith("..."): 322 if KernRe(r'\w\.\.\.$').search(param): 323 # For named variable parameters of the form `x...`, 324 # remove the dots 325 param = param[:-3] 326 else: 327 # Handles unnamed variable parameters 328 param = "..." 329 330 if param not in self.entry.parameterdescs or \ 331 not self.entry.parameterdescs[param]: 332 333 self.entry.parameterdescs[param] = "variable arguments" 334 335 elif dtype == "" and (not param or param == "void"): 336 param = "void" 337 self.entry.parameterdescs[param] = "no arguments" 338 339 elif dtype == "" and param in ["struct", "union"]: 340 # Handle unnamed (anonymous) union or struct 341 dtype = param 342 param = "{unnamed_" + param + "}" 343 self.entry.parameterdescs[param] = "anonymous\n" 344 self.entry.anon_struct_union = True 345 346 # Handle cache group enforcing variables: they do not need 347 # to be described in header files 348 elif "__cacheline_group" in param: 349 # Ignore __cacheline_group_begin and __cacheline_group_end 350 return 351 352 # Warn if parameter has no description 353 # (but ignore ones starting with # as these are not parameters 354 # but inline preprocessor statements) 355 if param not in self.entry.parameterdescs and not param.startswith("#"): 356 self.entry.parameterdescs[param] = self.undescribed 357 358 if "." not in param: 359 if decl_type == 'function': 360 dname = f"{decl_type} parameter" 361 else: 362 dname = f"{decl_type} member" 363 364 self.emit_msg(ln, 365 f"{dname} '{param}' not described in '{declaration_name}'") 366 367 # Strip spaces from param so that it is one continuous string on 368 # parameterlist. This fixes a problem where check_sections() 369 # cannot find a parameter like "addr[6 + 2]" because it actually 370 # appears as "addr[6", "+", "2]" on the parameter list. 371 # However, it's better to maintain the param string unchanged for 372 # output, so just weaken the string compare in check_sections() 373 # to ignore "[blah" in a parameter string. 374 375 self.entry.parameterlist.append(param) 376 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 377 self.entry.parametertypes[param] = org_arg 378 379 380 def create_parameter_list(self, ln, decl_type, args, 381 splitter, declaration_name): 382 """ 383 Creates a list of parameters, storing them at self.entry. 384 """ 385 386 # temporarily replace all commas inside function pointer definition 387 arg_expr = KernRe(r'(\([^\),]+),') 388 while arg_expr.search(args): 389 args = arg_expr.sub(r"\1#", args) 390 391 for arg in args.split(splitter): 392 # Strip comments 393 arg = KernRe(r'\/\*.*\*\/').sub('', arg) 394 395 # Ignore argument attributes 396 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 397 398 # Strip leading/trailing spaces 399 arg = arg.strip() 400 arg = KernRe(r'\s+').sub(' ', arg, count=1) 401 402 if arg.startswith('#'): 403 # Treat preprocessor directive as a typeless variable just to fill 404 # corresponding data structures "correctly". Catch it later in 405 # output_* subs. 406 407 # Treat preprocessor directive as a typeless variable 408 self.push_parameter(ln, decl_type, arg, "", 409 "", declaration_name) 410 411 elif KernRe(r'\(.+\)\s*\(').search(arg): 412 # Pointer-to-function 413 414 arg = arg.replace('#', ',') 415 416 r = KernRe(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') 417 if r.match(arg): 418 param = r.group(1) 419 else: 420 self.emit_msg(ln, f"Invalid param: {arg}") 421 param = arg 422 423 dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) 424 self.push_parameter(ln, decl_type, param, dtype, 425 arg, declaration_name) 426 427 elif KernRe(r'\(.+\)\s*\[').search(arg): 428 # Array-of-pointers 429 430 arg = arg.replace('#', ',') 431 r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') 432 if r.match(arg): 433 param = r.group(1) 434 else: 435 self.emit_msg(ln, f"Invalid param: {arg}") 436 param = arg 437 438 dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) 439 440 self.push_parameter(ln, decl_type, param, dtype, 441 arg, declaration_name) 442 443 elif arg: 444 arg = KernRe(r'\s*:\s*').sub(":", arg) 445 arg = KernRe(r'\s*\[').sub('[', arg) 446 447 args = KernRe(r'\s*,\s*').split(arg) 448 if args[0] and '*' in args[0]: 449 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 450 451 first_arg = [] 452 r = KernRe(r'^(.*\s+)(.*?\[.*\].*)$') 453 if args[0] and r.match(args[0]): 454 args.pop(0) 455 first_arg.extend(r.group(1)) 456 first_arg.append(r.group(2)) 457 else: 458 first_arg = KernRe(r'\s+').split(args.pop(0)) 459 460 args.insert(0, first_arg.pop()) 461 dtype = ' '.join(first_arg) 462 463 for param in args: 464 if KernRe(r'^(\*+)\s*(.*)').match(param): 465 r = KernRe(r'^(\*+)\s*(.*)') 466 if not r.match(param): 467 self.emit_msg(ln, f"Invalid param: {param}") 468 continue 469 470 param = r.group(1) 471 472 self.push_parameter(ln, decl_type, r.group(2), 473 f"{dtype} {r.group(1)}", 474 arg, declaration_name) 475 476 elif KernRe(r'(.*?):(\w+)').search(param): 477 r = KernRe(r'(.*?):(\w+)') 478 if not r.match(param): 479 self.emit_msg(ln, f"Invalid param: {param}") 480 continue 481 482 if dtype != "": # Skip unnamed bit-fields 483 self.push_parameter(ln, decl_type, r.group(1), 484 f"{dtype}:{r.group(2)}", 485 arg, declaration_name) 486 else: 487 self.push_parameter(ln, decl_type, param, dtype, 488 arg, declaration_name) 489 490 def check_sections(self, ln, decl_name, decl_type): 491 """ 492 Check for errors inside sections, emitting warnings if not found 493 parameters are described. 494 """ 495 for section in self.entry.sections: 496 if section not in self.entry.parameterlist and \ 497 not known_sections.search(section): 498 if decl_type == 'function': 499 dname = f"{decl_type} parameter" 500 else: 501 dname = f"{decl_type} member" 502 self.emit_msg(ln, 503 f"Excess {dname} '{section}' description in '{decl_name}'") 504 505 def check_return_section(self, ln, declaration_name, return_type): 506 """ 507 If the function doesn't return void, warns about the lack of a 508 return description. 509 """ 510 511 if not self.config.wreturn: 512 return 513 514 # Ignore an empty return type (It's a macro) 515 # Ignore functions with a "void" return type (but not "void *") 516 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 517 return 518 519 if not self.entry.sections.get("Return", None): 520 self.emit_msg(ln, 521 f"No description found for return value of '{declaration_name}'") 522 523 def dump_struct(self, ln, proto): 524 """ 525 Store an entry for an struct or union 526 """ 527 528 type_pattern = r'(struct|union)' 529 530 qualifiers = [ 531 "__attribute__", 532 "__packed", 533 "__aligned", 534 "____cacheline_aligned_in_smp", 535 "____cacheline_aligned", 536 ] 537 538 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 539 struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') 540 541 # Extract struct/union definition 542 members = None 543 declaration_name = None 544 decl_type = None 545 546 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 547 if r.search(proto): 548 decl_type = r.group(1) 549 declaration_name = r.group(2) 550 members = r.group(3) 551 else: 552 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 553 554 if r.search(proto): 555 decl_type = r.group(1) 556 declaration_name = r.group(3) 557 members = r.group(2) 558 559 if not members: 560 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 561 return 562 563 if self.entry.identifier != declaration_name: 564 self.emit_msg(ln, 565 f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") 566 return 567 568 args_pattern = r'([^,)]+)' 569 570 sub_prefixes = [ 571 (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), 572 (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''), 573 574 # Strip comments 575 (KernRe(r'\/\*.*?\*\/', re.S), ''), 576 577 # Strip attributes 578 (attribute, ' '), 579 (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), 580 (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), 581 (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), 582 (KernRe(r'\s*__packed\s*', re.S), ' '), 583 (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), 584 (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), 585 (KernRe(r'\s*____cacheline_aligned', re.S), ' '), 586 587 # Unwrap struct_group macros based on this definition: 588 # __struct_group(TAG, NAME, ATTRS, MEMBERS...) 589 # which has variants like: struct_group(NAME, MEMBERS...) 590 # Only MEMBERS arguments require documentation. 591 # 592 # Parsing them happens on two steps: 593 # 594 # 1. drop struct group arguments that aren't at MEMBERS, 595 # storing them as STRUCT_GROUP(MEMBERS) 596 # 597 # 2. remove STRUCT_GROUP() ancillary macro. 598 # 599 # The original logic used to remove STRUCT_GROUP() using an 600 # advanced regex: 601 # 602 # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; 603 # 604 # with two patterns that are incompatible with 605 # Python re module, as it has: 606 # 607 # - a recursive pattern: (?1) 608 # - an atomic grouping: (?>...) 609 # 610 # I tried a simpler version: but it didn't work either: 611 # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; 612 # 613 # As it doesn't properly match the end parenthesis on some cases. 614 # 615 # So, a better solution was crafted: there's now a NestedMatch 616 # class that ensures that delimiters after a search are properly 617 # matched. So, the implementation to drop STRUCT_GROUP() will be 618 # handled in separate. 619 620 (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), 621 (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), 622 (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), 623 (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), 624 625 # Replace macros 626 # 627 # TODO: use NestedMatch for FOO($1, $2, ...) matches 628 # 629 # it is better to also move those to the NestedMatch logic, 630 # to ensure that parenthesis will be properly matched. 631 632 (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), 633 (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), 634 (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), 635 (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), 636 (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), 637 (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), 638 (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), 639 (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), 640 (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), 641 (KernRe(r'VIRTIO_DECLARE_FEATURES\s*\(' + args_pattern + r'\)', re.S), r'u64 \1; u64 \1_array[VIRTIO_FEATURES_DWORDS]'), 642 ] 643 644 # Regexes here are guaranteed to have the end limiter matching 645 # the start delimiter. Yet, right now, only one replace group 646 # is allowed. 647 648 sub_nested_prefixes = [ 649 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), 650 ] 651 652 for search, sub in sub_prefixes: 653 members = search.sub(sub, members) 654 655 nested = NestedMatch() 656 657 for search, sub in sub_nested_prefixes: 658 members = nested.sub(search, sub, members) 659 660 # Keeps the original declaration as-is 661 declaration = members 662 663 # Split nested struct/union elements 664 # 665 # This loop was simpler at the original kernel-doc perl version, as 666 # while ($members =~ m/$struct_members/) { ... } 667 # reads 'members' string on each interaction. 668 # 669 # Python behavior is different: it parses 'members' only once, 670 # creating a list of tuples from the first interaction. 671 # 672 # On other words, this won't get nested structs. 673 # 674 # So, we need to have an extra loop on Python to override such 675 # re limitation. 676 677 while True: 678 tuples = struct_members.findall(members) 679 if not tuples: 680 break 681 682 for t in tuples: 683 newmember = "" 684 maintype = t[0] 685 s_ids = t[5] 686 content = t[3] 687 688 oldmember = "".join(t) 689 690 for s_id in s_ids.split(','): 691 s_id = s_id.strip() 692 693 newmember += f"{maintype} {s_id}; " 694 s_id = KernRe(r'[:\[].*').sub('', s_id) 695 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 696 697 for arg in content.split(';'): 698 arg = arg.strip() 699 700 if not arg: 701 continue 702 703 r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') 704 if r.match(arg): 705 # Pointer-to-function 706 dtype = r.group(1) 707 name = r.group(2) 708 extra = r.group(3) 709 710 if not name: 711 continue 712 713 if not s_id: 714 # Anonymous struct/union 715 newmember += f"{dtype}{name}{extra}; " 716 else: 717 newmember += f"{dtype}{s_id}.{name}{extra}; " 718 719 else: 720 arg = arg.strip() 721 # Handle bitmaps 722 arg = KernRe(r':\s*\d+\s*').sub('', arg) 723 724 # Handle arrays 725 arg = KernRe(r'\[.*\]').sub('', arg) 726 727 # Handle multiple IDs 728 arg = KernRe(r'\s*,\s*').sub(',', arg) 729 730 r = KernRe(r'(.*)\s+([\S+,]+)') 731 732 if r.search(arg): 733 dtype = r.group(1) 734 names = r.group(2) 735 else: 736 newmember += f"{arg}; " 737 continue 738 739 for name in names.split(','): 740 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() 741 742 if not name: 743 continue 744 745 if not s_id: 746 # Anonymous struct/union 747 newmember += f"{dtype} {name}; " 748 else: 749 newmember += f"{dtype} {s_id}.{name}; " 750 751 members = members.replace(oldmember, newmember) 752 753 # Ignore other nested elements, like enums 754 members = re.sub(r'(\{[^\{\}]*\})', '', members) 755 756 self.create_parameter_list(ln, decl_type, members, ';', 757 declaration_name) 758 self.check_sections(ln, declaration_name, decl_type) 759 760 # Adjust declaration for better display 761 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 762 declaration = KernRe(r'\}\s+;').sub('};', declaration) 763 764 # Better handle inlined enums 765 while True: 766 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 767 if not r.search(declaration): 768 break 769 770 declaration = r.sub(r'\1,\n\2', declaration) 771 772 def_args = declaration.split('\n') 773 level = 1 774 declaration = "" 775 for clause in def_args: 776 777 clause = clause.strip() 778 clause = KernRe(r'\s+').sub(' ', clause, count=1) 779 780 if not clause: 781 continue 782 783 if '}' in clause and level > 1: 784 level -= 1 785 786 if not KernRe(r'^\s*#').match(clause): 787 declaration += "\t" * level 788 789 declaration += "\t" + clause + "\n" 790 if "{" in clause and "}" not in clause: 791 level += 1 792 793 self.output_declaration(decl_type, declaration_name, 794 definition=declaration, 795 purpose=self.entry.declaration_purpose) 796 797 def dump_enum(self, ln, proto): 798 """ 799 Stores an enum inside self.entries array. 800 """ 801 802 # Ignore members marked private 803 proto = KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) 804 proto = KernRe(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) 805 806 # Strip comments 807 proto = KernRe(r'\/\*.*?\*\/', flags=re.S).sub('', proto) 808 809 # Strip #define macros inside enums 810 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 811 812 # 813 # Parse out the name and members of the enum. Typedef form first. 814 # 815 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 816 if r.search(proto): 817 declaration_name = r.group(2) 818 members = r.group(1).rstrip() 819 # 820 # Failing that, look for a straight enum 821 # 822 else: 823 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 824 if r.match(proto): 825 declaration_name = r.group(1) 826 members = r.group(2).rstrip() 827 # 828 # OK, this isn't going to work. 829 # 830 else: 831 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 832 return 833 # 834 # Make sure we found what we were expecting. 835 # 836 if self.entry.identifier != declaration_name: 837 if self.entry.identifier == "": 838 self.emit_msg(ln, 839 f"{proto}: wrong kernel-doc identifier on prototype") 840 else: 841 self.emit_msg(ln, 842 f"expecting prototype for enum {self.entry.identifier}. " 843 f"Prototype was for enum {declaration_name} instead") 844 return 845 846 if not declaration_name: 847 declaration_name = "(anonymous)" 848 # 849 # Parse out the name of each enum member, and verify that we 850 # have a description for it. 851 # 852 member_set = set() 853 members = KernRe(r'\([^;)]*\)').sub('', members) 854 for arg in members.split(','): 855 if not arg: 856 continue 857 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 858 self.entry.parameterlist.append(arg) 859 if arg not in self.entry.parameterdescs: 860 self.entry.parameterdescs[arg] = self.undescribed 861 self.emit_msg(ln, 862 f"Enum value '{arg}' not described in enum '{declaration_name}'") 863 member_set.add(arg) 864 # 865 # Ensure that every described member actually exists in the enum. 866 # 867 for k in self.entry.parameterdescs: 868 if k not in member_set: 869 self.emit_msg(ln, 870 f"Excess enum value '%{k}' description in '{declaration_name}'") 871 872 self.output_declaration('enum', declaration_name, 873 purpose=self.entry.declaration_purpose) 874 875 def dump_declaration(self, ln, prototype): 876 """ 877 Stores a data declaration inside self.entries array. 878 """ 879 880 if self.entry.decl_type == "enum": 881 self.dump_enum(ln, prototype) 882 elif self.entry.decl_type == "typedef": 883 self.dump_typedef(ln, prototype) 884 elif self.entry.decl_type in ["union", "struct"]: 885 self.dump_struct(ln, prototype) 886 else: 887 # This would be a bug 888 self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') 889 890 def dump_function(self, ln, prototype): 891 """ 892 Stores a function of function macro inside self.entries array. 893 """ 894 895 func_macro = False 896 return_type = '' 897 decl_type = 'function' 898 899 # Prefixes that would be removed 900 sub_prefixes = [ 901 (r"^static +", "", 0), 902 (r"^extern +", "", 0), 903 (r"^asmlinkage +", "", 0), 904 (r"^inline +", "", 0), 905 (r"^__inline__ +", "", 0), 906 (r"^__inline +", "", 0), 907 (r"^__always_inline +", "", 0), 908 (r"^noinline +", "", 0), 909 (r"^__FORTIFY_INLINE +", "", 0), 910 (r"__init +", "", 0), 911 (r"__init_or_module +", "", 0), 912 (r"__deprecated +", "", 0), 913 (r"__flatten +", "", 0), 914 (r"__meminit +", "", 0), 915 (r"__must_check +", "", 0), 916 (r"__weak +", "", 0), 917 (r"__sched +", "", 0), 918 (r"_noprof", "", 0), 919 (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), 920 (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), 921 (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), 922 (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), 923 (r"__attribute_const__ +", "", 0), 924 925 # It seems that Python support for re.X is broken: 926 # At least for me (Python 3.13), this didn't work 927# (r""" 928# __attribute__\s*\(\( 929# (?: 930# [\w\s]+ # attribute name 931# (?:\([^)]*\))? # attribute arguments 932# \s*,? # optional comma at the end 933# )+ 934# \)\)\s+ 935# """, "", re.X), 936 937 # So, remove whitespaces and comments from it 938 (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), 939 ] 940 941 for search, sub, flags in sub_prefixes: 942 prototype = KernRe(search, flags).sub(sub, prototype) 943 944 # Macros are a special case, as they change the prototype format 945 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 946 if new_proto != prototype: 947 is_define_proto = True 948 prototype = new_proto 949 else: 950 is_define_proto = False 951 952 # Yes, this truly is vile. We are looking for: 953 # 1. Return type (may be nothing if we're looking at a macro) 954 # 2. Function name 955 # 3. Function parameters. 956 # 957 # All the while we have to watch out for function pointer parameters 958 # (which IIRC is what the two sections are for), C types (these 959 # regexps don't even start to express all the possibilities), and 960 # so on. 961 # 962 # If you mess with these regexps, it's a good idea to check that 963 # the following functions' documentation still comes out right: 964 # - parport_register_device (function pointer parameters) 965 # - atomic_set (macro) 966 # - pci_match_device, __copy_to_user (long return type) 967 968 name = r'[a-zA-Z0-9_~:]+' 969 prototype_end1 = r'[^\(]*' 970 prototype_end2 = r'[^\{]*' 971 prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)' 972 973 # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing group. 974 # So, this needs to be mapped in Python with (?:...)? or (?:...)+ 975 976 type1 = r'(?:[\w\s]+)?' 977 type2 = r'(?:[\w\s]+\*+)+' 978 979 found = False 980 981 if is_define_proto: 982 r = KernRe(r'^()(' + name + r')\s+') 983 984 if r.search(prototype): 985 return_type = '' 986 declaration_name = r.group(2) 987 func_macro = True 988 989 found = True 990 991 if not found: 992 patterns = [ 993 rf'^()({name})\s*{prototype_end}', 994 rf'^({type1})\s+({name})\s*{prototype_end}', 995 rf'^({type2})\s*({name})\s*{prototype_end}', 996 ] 997 998 for p in patterns: 999 r = KernRe(p) 1000 1001 if r.match(prototype): 1002 1003 return_type = r.group(1) 1004 declaration_name = r.group(2) 1005 args = r.group(3) 1006 1007 self.create_parameter_list(ln, decl_type, args, ',', 1008 declaration_name) 1009 1010 found = True 1011 break 1012 if not found: 1013 self.emit_msg(ln, 1014 f"cannot understand function prototype: '{prototype}'") 1015 return 1016 1017 if self.entry.identifier != declaration_name: 1018 self.emit_msg(ln, 1019 f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") 1020 return 1021 1022 self.check_sections(ln, declaration_name, "function") 1023 1024 self.check_return_section(ln, declaration_name, return_type) 1025 1026 if 'typedef' in return_type: 1027 self.output_declaration(decl_type, declaration_name, 1028 typedef=True, 1029 functiontype=return_type, 1030 purpose=self.entry.declaration_purpose, 1031 func_macro=func_macro) 1032 else: 1033 self.output_declaration(decl_type, declaration_name, 1034 typedef=False, 1035 functiontype=return_type, 1036 purpose=self.entry.declaration_purpose, 1037 func_macro=func_macro) 1038 1039 def dump_typedef(self, ln, proto): 1040 """ 1041 Stores a typedef inside self.entries array. 1042 """ 1043 1044 typedef_type = r'((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1045 typedef_ident = r'\*?\s*(\w\S+)\s*' 1046 typedef_args = r'\s*\((.*)\);' 1047 1048 typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1049 typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args) 1050 1051 # Strip comments 1052 proto = KernRe(r'/\*.*?\*/', flags=re.S).sub('', proto) 1053 1054 # Parse function typedef prototypes 1055 for r in [typedef1, typedef2]: 1056 if not r.match(proto): 1057 continue 1058 1059 return_type = r.group(1).strip() 1060 declaration_name = r.group(2) 1061 args = r.group(3) 1062 1063 if self.entry.identifier != declaration_name: 1064 self.emit_msg(ln, 1065 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1066 return 1067 1068 decl_type = 'function' 1069 self.create_parameter_list(ln, decl_type, args, ',', declaration_name) 1070 1071 self.output_declaration(decl_type, declaration_name, 1072 typedef=True, 1073 functiontype=return_type, 1074 purpose=self.entry.declaration_purpose) 1075 return 1076 1077 # Handle nested parentheses or brackets 1078 r = KernRe(r'(\(*.\)\s*|\[*.\]\s*);$') 1079 while r.search(proto): 1080 proto = r.sub('', proto) 1081 1082 # Parse simple typedefs 1083 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1084 if r.match(proto): 1085 declaration_name = r.group(1) 1086 1087 if self.entry.identifier != declaration_name: 1088 self.emit_msg(ln, 1089 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1090 return 1091 1092 self.output_declaration('typedef', declaration_name, 1093 purpose=self.entry.declaration_purpose) 1094 return 1095 1096 self.emit_msg(ln, "error: Cannot parse typedef!") 1097 1098 @staticmethod 1099 def process_export(function_set, line): 1100 """ 1101 process EXPORT_SYMBOL* tags 1102 1103 This method doesn't use any variable from the class, so declare it 1104 with a staticmethod decorator. 1105 """ 1106 1107 # We support documenting some exported symbols with different 1108 # names. A horrible hack. 1109 suffixes = [ '_noprof' ] 1110 1111 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1112 # multiple export lines would violate Kernel coding style. 1113 1114 if export_symbol.search(line): 1115 symbol = export_symbol.group(2) 1116 elif export_symbol_ns.search(line): 1117 symbol = export_symbol_ns.group(2) 1118 else: 1119 return False 1120 # 1121 # Found an export, trim out any special suffixes 1122 # 1123 for suffix in suffixes: 1124 # Be backward compatible with Python < 3.9 1125 if symbol.endswith(suffix): 1126 symbol = symbol[:-len(suffix)] 1127 function_set.add(symbol) 1128 return True 1129 1130 def process_normal(self, ln, line): 1131 """ 1132 STATE_NORMAL: looking for the /** to begin everything. 1133 """ 1134 1135 if not doc_start.match(line): 1136 return 1137 1138 # start a new entry 1139 self.reset_state(ln) 1140 1141 # next line is always the function name 1142 self.state = state.NAME 1143 1144 def process_name(self, ln, line): 1145 """ 1146 STATE_NAME: Looking for the "name - description" line 1147 """ 1148 # 1149 # Check for a DOC: block and handle them specially. 1150 # 1151 if doc_block.search(line): 1152 1153 if not doc_block.group(1): 1154 self.entry.begin_section(ln, "Introduction") 1155 else: 1156 self.entry.begin_section(ln, doc_block.group(1)) 1157 1158 self.entry.identifier = self.entry.section 1159 self.state = state.DOCBLOCK 1160 # 1161 # Otherwise we're looking for a normal kerneldoc declaration line. 1162 # 1163 elif doc_decl.search(line): 1164 self.entry.identifier = doc_decl.group(1) 1165 1166 # Test for data declaration 1167 if doc_begin_data.search(line): 1168 self.entry.decl_type = doc_begin_data.group(1) 1169 self.entry.identifier = doc_begin_data.group(2) 1170 # 1171 # Look for a function description 1172 # 1173 elif doc_begin_func.search(line): 1174 self.entry.identifier = doc_begin_func.group(1) 1175 self.entry.decl_type = "function" 1176 # 1177 # We struck out. 1178 # 1179 else: 1180 self.emit_msg(ln, 1181 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") 1182 self.state = state.NORMAL 1183 return 1184 # 1185 # OK, set up for a new kerneldoc entry. 1186 # 1187 self.state = state.BODY 1188 self.entry.identifier = self.entry.identifier.strip(" ") 1189 # if there's no @param blocks need to set up default section here 1190 self.entry.begin_section(ln + 1) 1191 # 1192 # Find the description portion, which *should* be there but 1193 # isn't always. 1194 # (We should be able to capture this from the previous parsing - someday) 1195 # 1196 r = KernRe("[-:](.*)") 1197 if r.search(line): 1198 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1199 self.state = state.DECLARATION 1200 else: 1201 self.entry.declaration_purpose = "" 1202 1203 if not self.entry.declaration_purpose and self.config.wshort_desc: 1204 self.emit_msg(ln, 1205 f"missing initial short description on line:\n{line}") 1206 1207 if not self.entry.identifier and self.entry.decl_type != "enum": 1208 self.emit_msg(ln, 1209 f"wrong kernel-doc identifier on line:\n{line}") 1210 self.state = state.NORMAL 1211 1212 if self.config.verbose: 1213 self.emit_msg(ln, 1214 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1215 warning=False) 1216 # 1217 # Failed to find an identifier. Emit a warning 1218 # 1219 else: 1220 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1221 1222 # 1223 # Helper function to determine if a new section is being started. 1224 # 1225 def is_new_section(self, ln, line): 1226 if doc_sect.search(line): 1227 self.state = state.BODY 1228 # 1229 # Pick out the name of our new section, tweaking it if need be. 1230 # 1231 newsection = doc_sect.group(1) 1232 if newsection.lower() == 'description': 1233 newsection = 'Description' 1234 elif newsection.lower() == 'context': 1235 newsection = 'Context' 1236 self.state = state.SPECIAL_SECTION 1237 elif newsection.lower() in ["@return", "@returns", 1238 "return", "returns"]: 1239 newsection = "Return" 1240 self.state = state.SPECIAL_SECTION 1241 elif newsection[0] == '@': 1242 self.state = state.SPECIAL_SECTION 1243 # 1244 # Initialize the contents, and get the new section going. 1245 # 1246 newcontents = doc_sect.group(2) 1247 if not newcontents: 1248 newcontents = "" 1249 self.dump_section() 1250 self.entry.begin_section(ln, newsection) 1251 self.entry.leading_space = None 1252 1253 self.entry.add_text(newcontents.lstrip()) 1254 return True 1255 return False 1256 1257 # 1258 # Helper function to detect (and effect) the end of a kerneldoc comment. 1259 # 1260 def is_comment_end(self, ln, line): 1261 if doc_end.search(line): 1262 self.dump_section() 1263 1264 # Look for doc_com + <text> + doc_end: 1265 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') 1266 if r.match(line): 1267 self.emit_msg(ln, f"suspicious ending line: {line}") 1268 1269 self.entry.prototype = "" 1270 self.entry.new_start_line = ln + 1 1271 1272 self.state = state.PROTO 1273 return True 1274 return False 1275 1276 1277 def process_decl(self, ln, line): 1278 """ 1279 STATE_DECLARATION: We've seen the beginning of a declaration 1280 """ 1281 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1282 return 1283 # 1284 # Look for anything with the " * " line beginning. 1285 # 1286 if doc_content.search(line): 1287 cont = doc_content.group(1) 1288 # 1289 # A blank line means that we have moved out of the declaration 1290 # part of the comment (without any "special section" parameter 1291 # descriptions). 1292 # 1293 if cont == "": 1294 self.state = state.BODY 1295 # 1296 # Otherwise we have more of the declaration section to soak up. 1297 # 1298 else: 1299 self.entry.declaration_purpose = \ 1300 trim_whitespace(self.entry.declaration_purpose + ' ' + cont) 1301 else: 1302 # Unknown line, ignore 1303 self.emit_msg(ln, f"bad line: {line}") 1304 1305 1306 def process_special(self, ln, line): 1307 """ 1308 STATE_SPECIAL_SECTION: a section ending with a blank line 1309 """ 1310 # 1311 # If we have hit a blank line (only the " * " marker), then this 1312 # section is done. 1313 # 1314 if KernRe(r"\s*\*\s*$").match(line): 1315 self.entry.begin_section(ln, dump = True) 1316 self.state = state.BODY 1317 return 1318 # 1319 # Not a blank line, look for the other ways to end the section. 1320 # 1321 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1322 return 1323 # 1324 # OK, we should have a continuation of the text for this section. 1325 # 1326 if doc_content.search(line): 1327 cont = doc_content.group(1) 1328 # 1329 # If the lines of text after the first in a special section have 1330 # leading white space, we need to trim it out or Sphinx will get 1331 # confused. For the second line (the None case), see what we 1332 # find there and remember it. 1333 # 1334 if self.entry.leading_space is None: 1335 r = KernRe(r'^(\s+)') 1336 if r.match(cont): 1337 self.entry.leading_space = len(r.group(1)) 1338 else: 1339 self.entry.leading_space = 0 1340 # 1341 # Otherwise, before trimming any leading chars, be *sure* 1342 # that they are white space. We should maybe warn if this 1343 # isn't the case. 1344 # 1345 for i in range(0, self.entry.leading_space): 1346 if cont[i] != " ": 1347 self.entry.leading_space = i 1348 break 1349 # 1350 # Add the trimmed result to the section and we're done. 1351 # 1352 self.entry.add_text(cont[self.entry.leading_space:]) 1353 else: 1354 # Unknown line, ignore 1355 self.emit_msg(ln, f"bad line: {line}") 1356 1357 def process_body(self, ln, line): 1358 """ 1359 STATE_BODY: the bulk of a kerneldoc comment. 1360 """ 1361 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1362 return 1363 1364 if doc_content.search(line): 1365 cont = doc_content.group(1) 1366 self.entry.add_text(cont) 1367 else: 1368 # Unknown line, ignore 1369 self.emit_msg(ln, f"bad line: {line}") 1370 1371 def process_inline_name(self, ln, line): 1372 """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" 1373 1374 if doc_inline_sect.search(line): 1375 self.entry.begin_section(ln, doc_inline_sect.group(1)) 1376 self.entry.add_text(doc_inline_sect.group(2).lstrip()) 1377 self.state = state.INLINE_TEXT 1378 elif doc_inline_end.search(line): 1379 self.dump_section() 1380 self.state = state.PROTO 1381 elif doc_content.search(line): 1382 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") 1383 self.state = state.PROTO 1384 # else ... ?? 1385 1386 def process_inline_text(self, ln, line): 1387 """STATE_INLINE_TEXT: docbook comments within a prototype.""" 1388 1389 if doc_inline_end.search(line): 1390 self.dump_section() 1391 self.state = state.PROTO 1392 elif doc_content.search(line): 1393 self.entry.add_text(doc_content.group(1)) 1394 # else ... ?? 1395 1396 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1397 """ 1398 Handle syscall definitions 1399 """ 1400 1401 is_void = False 1402 1403 # Strip newlines/CR's 1404 proto = re.sub(r'[\r\n]+', ' ', proto) 1405 1406 # Check if it's a SYSCALL_DEFINE0 1407 if 'SYSCALL_DEFINE0' in proto: 1408 is_void = True 1409 1410 # Replace SYSCALL_DEFINE with correct return type & function name 1411 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1412 1413 r = KernRe(r'long\s+(sys_.*?),') 1414 if r.search(proto): 1415 proto = KernRe(',').sub('(', proto, count=1) 1416 elif is_void: 1417 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1418 1419 # Now delete all of the odd-numbered commas in the proto 1420 # so that argument types & names don't have a comma between them 1421 count = 0 1422 length = len(proto) 1423 1424 if is_void: 1425 length = 0 # skip the loop if is_void 1426 1427 for ix in range(length): 1428 if proto[ix] == ',': 1429 count += 1 1430 if count % 2 == 1: 1431 proto = proto[:ix] + ' ' + proto[ix + 1:] 1432 1433 return proto 1434 1435 def tracepoint_munge(self, ln, proto): 1436 """ 1437 Handle tracepoint definitions 1438 """ 1439 1440 tracepointname = None 1441 tracepointargs = None 1442 1443 # Match tracepoint name based on different patterns 1444 r = KernRe(r'TRACE_EVENT\((.*?),') 1445 if r.search(proto): 1446 tracepointname = r.group(1) 1447 1448 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1449 if r.search(proto): 1450 tracepointname = r.group(1) 1451 1452 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1453 if r.search(proto): 1454 tracepointname = r.group(2) 1455 1456 if tracepointname: 1457 tracepointname = tracepointname.lstrip() 1458 1459 r = KernRe(r'TP_PROTO\((.*?)\)') 1460 if r.search(proto): 1461 tracepointargs = r.group(1) 1462 1463 if not tracepointname or not tracepointargs: 1464 self.emit_msg(ln, 1465 f"Unrecognized tracepoint format:\n{proto}\n") 1466 else: 1467 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1468 self.entry.identifier = f"trace_{self.entry.identifier}" 1469 1470 return proto 1471 1472 def process_proto_function(self, ln, line): 1473 """Ancillary routine to process a function prototype""" 1474 1475 # strip C99-style comments to end of line 1476 line = KernRe(r"\/\/.*$", re.S).sub('', line) 1477 # 1478 # Soak up the line's worth of prototype text, stopping at { or ; if present. 1479 # 1480 if KernRe(r'\s*#\s*define').match(line): 1481 self.entry.prototype = line 1482 elif not line.startswith('#'): # skip other preprocessor stuff 1483 r = KernRe(r'([^\{]*)') 1484 if r.match(line): 1485 self.entry.prototype += r.group(1) + " " 1486 # 1487 # If we now have the whole prototype, clean it up and declare victory. 1488 # 1489 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1490 # strip comments and surrounding spaces 1491 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() 1492 # 1493 # Handle self.entry.prototypes for function pointers like: 1494 # int (*pcs_config)(struct foo) 1495 # by turning it into 1496 # int pcs_config(struct foo) 1497 # 1498 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1499 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1500 # 1501 # Handle special declaration syntaxes 1502 # 1503 if 'SYSCALL_DEFINE' in self.entry.prototype: 1504 self.entry.prototype = self.syscall_munge(ln, 1505 self.entry.prototype) 1506 else: 1507 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1508 if r.search(self.entry.prototype): 1509 self.entry.prototype = self.tracepoint_munge(ln, 1510 self.entry.prototype) 1511 # 1512 # ... and we're done 1513 # 1514 self.dump_function(ln, self.entry.prototype) 1515 self.reset_state(ln) 1516 1517 def process_proto_type(self, ln, line): 1518 """Ancillary routine to process a type""" 1519 1520 # Strip C99-style comments and surrounding whitespace 1521 line = KernRe(r"//.*$", re.S).sub('', line).strip() 1522 if not line: 1523 return # nothing to see here 1524 1525 # To distinguish preprocessor directive from regular declaration later. 1526 if line.startswith('#'): 1527 line += ";" 1528 # 1529 # Split the declaration on any of { } or ;, and accumulate pieces 1530 # until we hit a semicolon while not inside {brackets} 1531 # 1532 r = KernRe(r'(.*?)([{};])') 1533 for chunk in r.split(line): 1534 if chunk: # Ignore empty matches 1535 self.entry.prototype += chunk 1536 # 1537 # This cries out for a match statement ... someday after we can 1538 # drop Python 3.9 ... 1539 # 1540 if chunk == '{': 1541 self.entry.brcount += 1 1542 elif chunk == '}': 1543 self.entry.brcount -= 1 1544 elif chunk == ';' and self.entry.brcount <= 0: 1545 self.dump_declaration(ln, self.entry.prototype) 1546 self.reset_state(ln) 1547 return 1548 # 1549 # We hit the end of the line while still in the declaration; put 1550 # in a space to represent the newline. 1551 # 1552 self.entry.prototype += ' ' 1553 1554 def process_proto(self, ln, line): 1555 """STATE_PROTO: reading a function/whatever prototype.""" 1556 1557 if doc_inline_oneline.search(line): 1558 self.entry.begin_section(ln, doc_inline_oneline.group(1)) 1559 self.entry.add_text(doc_inline_oneline.group(2)) 1560 self.dump_section() 1561 1562 elif doc_inline_start.search(line): 1563 self.state = state.INLINE_NAME 1564 1565 elif self.entry.decl_type == 'function': 1566 self.process_proto_function(ln, line) 1567 1568 else: 1569 self.process_proto_type(ln, line) 1570 1571 def process_docblock(self, ln, line): 1572 """STATE_DOCBLOCK: within a DOC: block.""" 1573 1574 if doc_end.search(line): 1575 self.dump_section() 1576 self.output_declaration("doc", self.entry.identifier) 1577 self.reset_state(ln) 1578 1579 elif doc_content.search(line): 1580 self.entry.add_text(doc_content.group(1)) 1581 1582 def parse_export(self): 1583 """ 1584 Parses EXPORT_SYMBOL* macros from a single Kernel source file. 1585 """ 1586 1587 export_table = set() 1588 1589 try: 1590 with open(self.fname, "r", encoding="utf8", 1591 errors="backslashreplace") as fp: 1592 1593 for line in fp: 1594 self.process_export(export_table, line) 1595 1596 except IOError: 1597 return None 1598 1599 return export_table 1600 1601 # 1602 # The state/action table telling us which function to invoke in 1603 # each state. 1604 # 1605 state_actions = { 1606 state.NORMAL: process_normal, 1607 state.NAME: process_name, 1608 state.BODY: process_body, 1609 state.DECLARATION: process_decl, 1610 state.SPECIAL_SECTION: process_special, 1611 state.INLINE_NAME: process_inline_name, 1612 state.INLINE_TEXT: process_inline_text, 1613 state.PROTO: process_proto, 1614 state.DOCBLOCK: process_docblock, 1615 } 1616 1617 def parse_kdoc(self): 1618 """ 1619 Open and process each line of a C source file. 1620 The parsing is controlled via a state machine, and the line is passed 1621 to a different process function depending on the state. The process 1622 function may update the state as needed. 1623 1624 Besides parsing kernel-doc tags, it also parses export symbols. 1625 """ 1626 1627 prev = "" 1628 prev_ln = None 1629 export_table = set() 1630 1631 try: 1632 with open(self.fname, "r", encoding="utf8", 1633 errors="backslashreplace") as fp: 1634 for ln, line in enumerate(fp): 1635 1636 line = line.expandtabs().strip("\n") 1637 1638 # Group continuation lines on prototypes 1639 if self.state == state.PROTO: 1640 if line.endswith("\\"): 1641 prev += line.rstrip("\\") 1642 if not prev_ln: 1643 prev_ln = ln 1644 continue 1645 1646 if prev: 1647 ln = prev_ln 1648 line = prev + line 1649 prev = "" 1650 prev_ln = None 1651 1652 self.config.log.debug("%d %s: %s", 1653 ln, state.name[self.state], 1654 line) 1655 1656 # This is an optimization over the original script. 1657 # There, when export_file was used for the same file, 1658 # it was read twice. Here, we use the already-existing 1659 # loop to parse exported symbols as well. 1660 # 1661 if (self.state != state.NORMAL) or \ 1662 not self.process_export(export_table, line): 1663 # Hand this line to the appropriate state handler 1664 self.state_actions[self.state](self, ln, line) 1665 1666 except OSError: 1667 self.config.log.error(f"Error: Cannot open file {self.fname}") 1668 1669 return export_table, self.entries 1670