1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8kdoc_parser 9=========== 10 11Read a C language source or header FILE and extract embedded 12documentation comments 13""" 14 15import re 16from pprint import pformat 17 18from kdoc_re import NestedMatch, KernRe 19from kdoc_item import KdocItem 20 21# 22# Regular expressions used to parse kernel-doc markups at KernelDoc class. 23# 24# Let's declare them in lowercase outside any class to make easier to 25# convert from the python script. 26# 27# As those are evaluated at the beginning, no need to cache them 28# 29 30# Allow whitespace at end of comment start. 31doc_start = KernRe(r'^/\*\*\s*$', cache=False) 32 33doc_end = KernRe(r'\*/', cache=False) 34doc_com = KernRe(r'\s*\*\s*', cache=False) 35doc_com_body = KernRe(r'\s*\* ?', cache=False) 36doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 37 38# @params and a strictly limited set of supported section names 39# Specifically: 40# Match @word: 41# @...: 42# @{section-name}: 43# while trying to not match literal block starts like "example::" 44# 45doc_sect = doc_com + \ 46 KernRe(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', 47 flags=re.I, cache=False) 48 49doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 50doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 51doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 52doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 53doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) 54attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", 55 flags=re.I | re.S, cache=False) 56 57export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 58export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 59 60type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 61 62# 63# Tests for the beginning of a kerneldoc block in its various forms. 64# 65doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 66doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) 67doc_begin_func = KernRe(str(doc_com) + # initial " * ' 68 r"(?:\w+\s*\*\s*)?" + # type (not captured) 69 r'(?:define\s+)?' + # possible "define" (not captured) 70 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 71 r'(?:[-:].*)?$', # description (not captured) 72 cache = False) 73 74# 75# A little helper to get rid of excess white space 76# 77multi_space = KernRe(r'\s\s+') 78def trim_whitespace(s): 79 return multi_space.sub(' ', s.strip()) 80 81class state: 82 """ 83 State machine enums 84 """ 85 86 # Parser states 87 NORMAL = 0 # normal code 88 NAME = 1 # looking for function name 89 DECLARATION = 2 # We have seen a declaration which might not be done 90 BODY = 3 # the body of the comment 91 SPECIAL_SECTION = 4 # doc section ending with a blank line 92 PROTO = 5 # scanning prototype 93 DOCBLOCK = 6 # documentation block 94 INLINE_NAME = 7 # gathering doc outside main block 95 INLINE_TEXT = 8 # reading the body of inline docs 96 97 name = [ 98 "NORMAL", 99 "NAME", 100 "DECLARATION", 101 "BODY", 102 "SPECIAL_SECTION", 103 "PROTO", 104 "DOCBLOCK", 105 "INLINE_NAME", 106 "INLINE_TEXT", 107 ] 108 109 110SECTION_DEFAULT = "Description" # default section 111 112class KernelEntry: 113 114 def __init__(self, config, ln): 115 self.config = config 116 117 self._contents = [] 118 self.sectcheck = "" 119 self.struct_actual = "" 120 self.prototype = "" 121 122 self.warnings = [] 123 124 self.parameterlist = [] 125 self.parameterdescs = {} 126 self.parametertypes = {} 127 self.parameterdesc_start_lines = {} 128 129 self.section_start_lines = {} 130 self.sections = {} 131 132 self.anon_struct_union = False 133 134 self.leading_space = None 135 136 # State flags 137 self.brcount = 0 138 self.declaration_start_line = ln + 1 139 140 # 141 # Management of section contents 142 # 143 def add_text(self, text): 144 self._contents.append(text) 145 146 def contents(self): 147 return '\n'.join(self._contents) + '\n' 148 149 # TODO: rename to emit_message after removal of kernel-doc.pl 150 def emit_msg(self, log_msg, warning=True): 151 """Emit a message""" 152 153 if not warning: 154 self.config.log.info(log_msg) 155 return 156 157 # Delegate warning output to output logic, as this way it 158 # will report warnings/info only for symbols that are output 159 160 self.warnings.append(log_msg) 161 return 162 163 # 164 # Begin a new section. 165 # 166 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): 167 if dump: 168 self.dump_section(start_new = True) 169 self.section = title 170 self.new_start_line = line_no 171 172 def dump_section(self, start_new=True): 173 """ 174 Dumps section contents to arrays/hashes intended for that purpose. 175 """ 176 # 177 # If we have accumulated no contents in the default ("description") 178 # section, don't bother. 179 # 180 if self.section == SECTION_DEFAULT and not self._contents: 181 return 182 name = self.section 183 contents = self.contents() 184 185 if type_param.match(name): 186 name = type_param.group(1) 187 188 self.parameterdescs[name] = contents 189 self.parameterdesc_start_lines[name] = self.new_start_line 190 191 self.sectcheck += name + " " 192 self.new_start_line = 0 193 194 else: 195 if name in self.sections and self.sections[name] != "": 196 # Only warn on user-specified duplicate section names 197 if name != SECTION_DEFAULT: 198 self.emit_msg(self.new_start_line, 199 f"duplicate section name '{name}'\n") 200 # Treat as a new paragraph - add a blank line 201 self.sections[name] += '\n' + contents 202 else: 203 self.sections[name] = contents 204 self.section_start_lines[name] = self.new_start_line 205 self.new_start_line = 0 206 207# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 208 209 if start_new: 210 self.section = SECTION_DEFAULT 211 self._contents = [] 212 213 214class KernelDoc: 215 """ 216 Read a C language source or header FILE and extract embedded 217 documentation comments. 218 """ 219 220 # Section names 221 222 section_context = "Context" 223 section_return = "Return" 224 225 undescribed = "-- undescribed --" 226 227 def __init__(self, config, fname): 228 """Initialize internal variables""" 229 230 self.fname = fname 231 self.config = config 232 233 # Initial state for the state machines 234 self.state = state.NORMAL 235 236 # Store entry currently being processed 237 self.entry = None 238 239 # Place all potential outputs into an array 240 self.entries = [] 241 242 def emit_msg(self, ln, msg, warning=True): 243 """Emit a message""" 244 245 log_msg = f"{self.fname}:{ln} {msg}" 246 247 if self.entry: 248 self.entry.emit_msg(log_msg, warning) 249 return 250 251 if warning: 252 self.config.log.warning(log_msg) 253 else: 254 self.config.log.info(log_msg) 255 256 def dump_section(self, start_new=True): 257 """ 258 Dumps section contents to arrays/hashes intended for that purpose. 259 """ 260 261 if self.entry: 262 self.entry.dump_section(start_new) 263 264 # TODO: rename it to store_declaration after removal of kernel-doc.pl 265 def output_declaration(self, dtype, name, **args): 266 """ 267 Stores the entry into an entry array. 268 269 The actual output and output filters will be handled elsewhere 270 """ 271 272 item = KdocItem(name, dtype, self.entry.declaration_start_line, **args) 273 item.warnings = self.entry.warnings 274 275 sections = item.get('sections', {}) 276 277 # Drop empty sections 278 # TODO: improve empty sections logic to emit warnings 279 for section in ["Description", "Return"]: 280 if section in sections and not sections[section].rstrip(): 281 del sections[section] 282 283 self.entries.append(item) 284 285 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 286 287 def reset_state(self, ln): 288 """ 289 Ancillary routine to create a new entry. It initializes all 290 variables used by the state machine. 291 """ 292 293 self.entry = KernelEntry(self.config, ln) 294 295 # State flags 296 self.state = state.NORMAL 297 298 def push_parameter(self, ln, decl_type, param, dtype, 299 org_arg, declaration_name): 300 """ 301 Store parameters and their descriptions at self.entry. 302 """ 303 304 if self.entry.anon_struct_union and dtype == "" and param == "}": 305 return # Ignore the ending }; from anonymous struct/union 306 307 self.entry.anon_struct_union = False 308 309 param = KernRe(r'[\[\)].*').sub('', param, count=1) 310 311 if dtype == "" and param.endswith("..."): 312 if KernRe(r'\w\.\.\.$').search(param): 313 # For named variable parameters of the form `x...`, 314 # remove the dots 315 param = param[:-3] 316 else: 317 # Handles unnamed variable parameters 318 param = "..." 319 320 if param not in self.entry.parameterdescs or \ 321 not self.entry.parameterdescs[param]: 322 323 self.entry.parameterdescs[param] = "variable arguments" 324 325 elif dtype == "" and (not param or param == "void"): 326 param = "void" 327 self.entry.parameterdescs[param] = "no arguments" 328 329 elif dtype == "" and param in ["struct", "union"]: 330 # Handle unnamed (anonymous) union or struct 331 dtype = param 332 param = "{unnamed_" + param + "}" 333 self.entry.parameterdescs[param] = "anonymous\n" 334 self.entry.anon_struct_union = True 335 336 # Handle cache group enforcing variables: they do not need 337 # to be described in header files 338 elif "__cacheline_group" in param: 339 # Ignore __cacheline_group_begin and __cacheline_group_end 340 return 341 342 # Warn if parameter has no description 343 # (but ignore ones starting with # as these are not parameters 344 # but inline preprocessor statements) 345 if param not in self.entry.parameterdescs and not param.startswith("#"): 346 self.entry.parameterdescs[param] = self.undescribed 347 348 if "." not in param: 349 if decl_type == 'function': 350 dname = f"{decl_type} parameter" 351 else: 352 dname = f"{decl_type} member" 353 354 self.emit_msg(ln, 355 f"{dname} '{param}' not described in '{declaration_name}'") 356 357 # Strip spaces from param so that it is one continuous string on 358 # parameterlist. This fixes a problem where check_sections() 359 # cannot find a parameter like "addr[6 + 2]" because it actually 360 # appears as "addr[6", "+", "2]" on the parameter list. 361 # However, it's better to maintain the param string unchanged for 362 # output, so just weaken the string compare in check_sections() 363 # to ignore "[blah" in a parameter string. 364 365 self.entry.parameterlist.append(param) 366 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 367 self.entry.parametertypes[param] = org_arg 368 369 def save_struct_actual(self, actual): 370 """ 371 Strip all spaces from the actual param so that it looks like 372 one string item. 373 """ 374 375 actual = KernRe(r'\s*').sub("", actual, count=1) 376 377 self.entry.struct_actual += actual + " " 378 379 def create_parameter_list(self, ln, decl_type, args, 380 splitter, declaration_name): 381 """ 382 Creates a list of parameters, storing them at self.entry. 383 """ 384 385 # temporarily replace all commas inside function pointer definition 386 arg_expr = KernRe(r'(\([^\),]+),') 387 while arg_expr.search(args): 388 args = arg_expr.sub(r"\1#", args) 389 390 for arg in args.split(splitter): 391 # Strip comments 392 arg = KernRe(r'\/\*.*\*\/').sub('', arg) 393 394 # Ignore argument attributes 395 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 396 397 # Strip leading/trailing spaces 398 arg = arg.strip() 399 arg = KernRe(r'\s+').sub(' ', arg, count=1) 400 401 if arg.startswith('#'): 402 # Treat preprocessor directive as a typeless variable just to fill 403 # corresponding data structures "correctly". Catch it later in 404 # output_* subs. 405 406 # Treat preprocessor directive as a typeless variable 407 self.push_parameter(ln, decl_type, arg, "", 408 "", declaration_name) 409 410 elif KernRe(r'\(.+\)\s*\(').search(arg): 411 # Pointer-to-function 412 413 arg = arg.replace('#', ',') 414 415 r = KernRe(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') 416 if r.match(arg): 417 param = r.group(1) 418 else: 419 self.emit_msg(ln, f"Invalid param: {arg}") 420 param = arg 421 422 dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) 423 self.save_struct_actual(param) 424 self.push_parameter(ln, decl_type, param, dtype, 425 arg, declaration_name) 426 427 elif KernRe(r'\(.+\)\s*\[').search(arg): 428 # Array-of-pointers 429 430 arg = arg.replace('#', ',') 431 r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') 432 if r.match(arg): 433 param = r.group(1) 434 else: 435 self.emit_msg(ln, f"Invalid param: {arg}") 436 param = arg 437 438 dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) 439 440 self.save_struct_actual(param) 441 self.push_parameter(ln, decl_type, param, dtype, 442 arg, declaration_name) 443 444 elif arg: 445 arg = KernRe(r'\s*:\s*').sub(":", arg) 446 arg = KernRe(r'\s*\[').sub('[', arg) 447 448 args = KernRe(r'\s*,\s*').split(arg) 449 if args[0] and '*' in args[0]: 450 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 451 452 first_arg = [] 453 r = KernRe(r'^(.*\s+)(.*?\[.*\].*)$') 454 if args[0] and r.match(args[0]): 455 args.pop(0) 456 first_arg.extend(r.group(1)) 457 first_arg.append(r.group(2)) 458 else: 459 first_arg = KernRe(r'\s+').split(args.pop(0)) 460 461 args.insert(0, first_arg.pop()) 462 dtype = ' '.join(first_arg) 463 464 for param in args: 465 if KernRe(r'^(\*+)\s*(.*)').match(param): 466 r = KernRe(r'^(\*+)\s*(.*)') 467 if not r.match(param): 468 self.emit_msg(ln, f"Invalid param: {param}") 469 continue 470 471 param = r.group(1) 472 473 self.save_struct_actual(r.group(2)) 474 self.push_parameter(ln, decl_type, r.group(2), 475 f"{dtype} {r.group(1)}", 476 arg, declaration_name) 477 478 elif KernRe(r'(.*?):(\w+)').search(param): 479 r = KernRe(r'(.*?):(\w+)') 480 if not r.match(param): 481 self.emit_msg(ln, f"Invalid param: {param}") 482 continue 483 484 if dtype != "": # Skip unnamed bit-fields 485 self.save_struct_actual(r.group(1)) 486 self.push_parameter(ln, decl_type, r.group(1), 487 f"{dtype}:{r.group(2)}", 488 arg, declaration_name) 489 else: 490 self.save_struct_actual(param) 491 self.push_parameter(ln, decl_type, param, dtype, 492 arg, declaration_name) 493 494 def check_sections(self, ln, decl_name, decl_type, sectcheck, prmscheck): 495 """ 496 Check for errors inside sections, emitting warnings if not found 497 parameters are described. 498 """ 499 500 sects = sectcheck.split() 501 prms = prmscheck.split() 502 err = False 503 504 for sx in range(len(sects)): # pylint: disable=C0200 505 err = True 506 for px in range(len(prms)): # pylint: disable=C0200 507 prm_clean = prms[px] 508 prm_clean = KernRe(r'\[.*\]').sub('', prm_clean) 509 prm_clean = attribute.sub('', prm_clean) 510 511 # ignore array size in a parameter string; 512 # however, the original param string may contain 513 # spaces, e.g.: addr[6 + 2] 514 # and this appears in @prms as "addr[6" since the 515 # parameter list is split at spaces; 516 # hence just ignore "[..." for the sections check; 517 prm_clean = KernRe(r'\[.*').sub('', prm_clean) 518 519 if prm_clean == sects[sx]: 520 err = False 521 break 522 523 if err: 524 if decl_type == 'function': 525 dname = f"{decl_type} parameter" 526 else: 527 dname = f"{decl_type} member" 528 529 self.emit_msg(ln, 530 f"Excess {dname} '{sects[sx]}' description in '{decl_name}'") 531 532 def check_return_section(self, ln, declaration_name, return_type): 533 """ 534 If the function doesn't return void, warns about the lack of a 535 return description. 536 """ 537 538 if not self.config.wreturn: 539 return 540 541 # Ignore an empty return type (It's a macro) 542 # Ignore functions with a "void" return type (but not "void *") 543 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 544 return 545 546 if not self.entry.sections.get("Return", None): 547 self.emit_msg(ln, 548 f"No description found for return value of '{declaration_name}'") 549 550 def dump_struct(self, ln, proto): 551 """ 552 Store an entry for an struct or union 553 """ 554 555 type_pattern = r'(struct|union)' 556 557 qualifiers = [ 558 "__attribute__", 559 "__packed", 560 "__aligned", 561 "____cacheline_aligned_in_smp", 562 "____cacheline_aligned", 563 ] 564 565 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 566 struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') 567 568 # Extract struct/union definition 569 members = None 570 declaration_name = None 571 decl_type = None 572 573 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 574 if r.search(proto): 575 decl_type = r.group(1) 576 declaration_name = r.group(2) 577 members = r.group(3) 578 else: 579 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 580 581 if r.search(proto): 582 decl_type = r.group(1) 583 declaration_name = r.group(3) 584 members = r.group(2) 585 586 if not members: 587 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 588 return 589 590 if self.entry.identifier != declaration_name: 591 self.emit_msg(ln, 592 f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") 593 return 594 595 args_pattern = r'([^,)]+)' 596 597 sub_prefixes = [ 598 (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), 599 (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''), 600 601 # Strip comments 602 (KernRe(r'\/\*.*?\*\/', re.S), ''), 603 604 # Strip attributes 605 (attribute, ' '), 606 (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), 607 (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), 608 (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), 609 (KernRe(r'\s*__packed\s*', re.S), ' '), 610 (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), 611 (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), 612 (KernRe(r'\s*____cacheline_aligned', re.S), ' '), 613 614 # Unwrap struct_group macros based on this definition: 615 # __struct_group(TAG, NAME, ATTRS, MEMBERS...) 616 # which has variants like: struct_group(NAME, MEMBERS...) 617 # Only MEMBERS arguments require documentation. 618 # 619 # Parsing them happens on two steps: 620 # 621 # 1. drop struct group arguments that aren't at MEMBERS, 622 # storing them as STRUCT_GROUP(MEMBERS) 623 # 624 # 2. remove STRUCT_GROUP() ancillary macro. 625 # 626 # The original logic used to remove STRUCT_GROUP() using an 627 # advanced regex: 628 # 629 # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; 630 # 631 # with two patterns that are incompatible with 632 # Python re module, as it has: 633 # 634 # - a recursive pattern: (?1) 635 # - an atomic grouping: (?>...) 636 # 637 # I tried a simpler version: but it didn't work either: 638 # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; 639 # 640 # As it doesn't properly match the end parenthesis on some cases. 641 # 642 # So, a better solution was crafted: there's now a NestedMatch 643 # class that ensures that delimiters after a search are properly 644 # matched. So, the implementation to drop STRUCT_GROUP() will be 645 # handled in separate. 646 647 (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), 648 (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), 649 (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), 650 (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), 651 652 # Replace macros 653 # 654 # TODO: use NestedMatch for FOO($1, $2, ...) matches 655 # 656 # it is better to also move those to the NestedMatch logic, 657 # to ensure that parenthesis will be properly matched. 658 659 (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), 660 (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), 661 (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), 662 (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), 663 (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), 664 (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), 665 (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), 666 (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), 667 (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), 668 ] 669 670 # Regexes here are guaranteed to have the end limiter matching 671 # the start delimiter. Yet, right now, only one replace group 672 # is allowed. 673 674 sub_nested_prefixes = [ 675 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), 676 ] 677 678 for search, sub in sub_prefixes: 679 members = search.sub(sub, members) 680 681 nested = NestedMatch() 682 683 for search, sub in sub_nested_prefixes: 684 members = nested.sub(search, sub, members) 685 686 # Keeps the original declaration as-is 687 declaration = members 688 689 # Split nested struct/union elements 690 # 691 # This loop was simpler at the original kernel-doc perl version, as 692 # while ($members =~ m/$struct_members/) { ... } 693 # reads 'members' string on each interaction. 694 # 695 # Python behavior is different: it parses 'members' only once, 696 # creating a list of tuples from the first interaction. 697 # 698 # On other words, this won't get nested structs. 699 # 700 # So, we need to have an extra loop on Python to override such 701 # re limitation. 702 703 while True: 704 tuples = struct_members.findall(members) 705 if not tuples: 706 break 707 708 for t in tuples: 709 newmember = "" 710 maintype = t[0] 711 s_ids = t[5] 712 content = t[3] 713 714 oldmember = "".join(t) 715 716 for s_id in s_ids.split(','): 717 s_id = s_id.strip() 718 719 newmember += f"{maintype} {s_id}; " 720 s_id = KernRe(r'[:\[].*').sub('', s_id) 721 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 722 723 for arg in content.split(';'): 724 arg = arg.strip() 725 726 if not arg: 727 continue 728 729 r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') 730 if r.match(arg): 731 # Pointer-to-function 732 dtype = r.group(1) 733 name = r.group(2) 734 extra = r.group(3) 735 736 if not name: 737 continue 738 739 if not s_id: 740 # Anonymous struct/union 741 newmember += f"{dtype}{name}{extra}; " 742 else: 743 newmember += f"{dtype}{s_id}.{name}{extra}; " 744 745 else: 746 arg = arg.strip() 747 # Handle bitmaps 748 arg = KernRe(r':\s*\d+\s*').sub('', arg) 749 750 # Handle arrays 751 arg = KernRe(r'\[.*\]').sub('', arg) 752 753 # Handle multiple IDs 754 arg = KernRe(r'\s*,\s*').sub(',', arg) 755 756 r = KernRe(r'(.*)\s+([\S+,]+)') 757 758 if r.search(arg): 759 dtype = r.group(1) 760 names = r.group(2) 761 else: 762 newmember += f"{arg}; " 763 continue 764 765 for name in names.split(','): 766 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() 767 768 if not name: 769 continue 770 771 if not s_id: 772 # Anonymous struct/union 773 newmember += f"{dtype} {name}; " 774 else: 775 newmember += f"{dtype} {s_id}.{name}; " 776 777 members = members.replace(oldmember, newmember) 778 779 # Ignore other nested elements, like enums 780 members = re.sub(r'(\{[^\{\}]*\})', '', members) 781 782 self.create_parameter_list(ln, decl_type, members, ';', 783 declaration_name) 784 self.check_sections(ln, declaration_name, decl_type, 785 self.entry.sectcheck, self.entry.struct_actual) 786 787 # Adjust declaration for better display 788 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 789 declaration = KernRe(r'\}\s+;').sub('};', declaration) 790 791 # Better handle inlined enums 792 while True: 793 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 794 if not r.search(declaration): 795 break 796 797 declaration = r.sub(r'\1,\n\2', declaration) 798 799 def_args = declaration.split('\n') 800 level = 1 801 declaration = "" 802 for clause in def_args: 803 804 clause = clause.strip() 805 clause = KernRe(r'\s+').sub(' ', clause, count=1) 806 807 if not clause: 808 continue 809 810 if '}' in clause and level > 1: 811 level -= 1 812 813 if not KernRe(r'^\s*#').match(clause): 814 declaration += "\t" * level 815 816 declaration += "\t" + clause + "\n" 817 if "{" in clause and "}" not in clause: 818 level += 1 819 820 self.output_declaration(decl_type, declaration_name, 821 struct=declaration_name, 822 definition=declaration, 823 parameterlist=self.entry.parameterlist, 824 parameterdescs=self.entry.parameterdescs, 825 parametertypes=self.entry.parametertypes, 826 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 827 sections=self.entry.sections, 828 section_start_lines=self.entry.section_start_lines, 829 purpose=self.entry.declaration_purpose) 830 831 def dump_enum(self, ln, proto): 832 """ 833 Stores an enum inside self.entries array. 834 """ 835 836 # Ignore members marked private 837 proto = KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) 838 proto = KernRe(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) 839 840 # Strip comments 841 proto = KernRe(r'\/\*.*?\*\/', flags=re.S).sub('', proto) 842 843 # Strip #define macros inside enums 844 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 845 846 # 847 # Parse out the name and members of the enum. Typedef form first. 848 # 849 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 850 if r.search(proto): 851 declaration_name = r.group(2) 852 members = r.group(1).rstrip() 853 # 854 # Failing that, look for a straight enum 855 # 856 else: 857 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 858 if r.match(proto): 859 declaration_name = r.group(1) 860 members = r.group(2).rstrip() 861 # 862 # OK, this isn't going to work. 863 # 864 else: 865 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 866 return 867 # 868 # Make sure we found what we were expecting. 869 # 870 if self.entry.identifier != declaration_name: 871 if self.entry.identifier == "": 872 self.emit_msg(ln, 873 f"{proto}: wrong kernel-doc identifier on prototype") 874 else: 875 self.emit_msg(ln, 876 f"expecting prototype for enum {self.entry.identifier}. " 877 f"Prototype was for enum {declaration_name} instead") 878 return 879 880 if not declaration_name: 881 declaration_name = "(anonymous)" 882 # 883 # Parse out the name of each enum member, and verify that we 884 # have a description for it. 885 # 886 member_set = set() 887 members = KernRe(r'\([^;)]*\)').sub('', members) 888 for arg in members.split(','): 889 if not arg: 890 continue 891 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 892 self.entry.parameterlist.append(arg) 893 if arg not in self.entry.parameterdescs: 894 self.entry.parameterdescs[arg] = self.undescribed 895 self.emit_msg(ln, 896 f"Enum value '{arg}' not described in enum '{declaration_name}'") 897 member_set.add(arg) 898 # 899 # Ensure that every described member actually exists in the enum. 900 # 901 for k in self.entry.parameterdescs: 902 if k not in member_set: 903 self.emit_msg(ln, 904 f"Excess enum value '%{k}' description in '{declaration_name}'") 905 906 self.output_declaration('enum', declaration_name, 907 enum=declaration_name, 908 parameterlist=self.entry.parameterlist, 909 parameterdescs=self.entry.parameterdescs, 910 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 911 sections=self.entry.sections, 912 section_start_lines=self.entry.section_start_lines, 913 purpose=self.entry.declaration_purpose) 914 915 def dump_declaration(self, ln, prototype): 916 """ 917 Stores a data declaration inside self.entries array. 918 """ 919 920 if self.entry.decl_type == "enum": 921 self.dump_enum(ln, prototype) 922 return 923 924 if self.entry.decl_type == "typedef": 925 self.dump_typedef(ln, prototype) 926 return 927 928 if self.entry.decl_type in ["union", "struct"]: 929 self.dump_struct(ln, prototype) 930 return 931 932 self.output_declaration(self.entry.decl_type, prototype, 933 entry=self.entry) 934 935 def dump_function(self, ln, prototype): 936 """ 937 Stores a function of function macro inside self.entries array. 938 """ 939 940 func_macro = False 941 return_type = '' 942 decl_type = 'function' 943 944 # Prefixes that would be removed 945 sub_prefixes = [ 946 (r"^static +", "", 0), 947 (r"^extern +", "", 0), 948 (r"^asmlinkage +", "", 0), 949 (r"^inline +", "", 0), 950 (r"^__inline__ +", "", 0), 951 (r"^__inline +", "", 0), 952 (r"^__always_inline +", "", 0), 953 (r"^noinline +", "", 0), 954 (r"^__FORTIFY_INLINE +", "", 0), 955 (r"__init +", "", 0), 956 (r"__init_or_module +", "", 0), 957 (r"__deprecated +", "", 0), 958 (r"__flatten +", "", 0), 959 (r"__meminit +", "", 0), 960 (r"__must_check +", "", 0), 961 (r"__weak +", "", 0), 962 (r"__sched +", "", 0), 963 (r"_noprof", "", 0), 964 (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), 965 (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), 966 (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), 967 (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), 968 (r"__attribute_const__ +", "", 0), 969 970 # It seems that Python support for re.X is broken: 971 # At least for me (Python 3.13), this didn't work 972# (r""" 973# __attribute__\s*\(\( 974# (?: 975# [\w\s]+ # attribute name 976# (?:\([^)]*\))? # attribute arguments 977# \s*,? # optional comma at the end 978# )+ 979# \)\)\s+ 980# """, "", re.X), 981 982 # So, remove whitespaces and comments from it 983 (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), 984 ] 985 986 for search, sub, flags in sub_prefixes: 987 prototype = KernRe(search, flags).sub(sub, prototype) 988 989 # Macros are a special case, as they change the prototype format 990 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 991 if new_proto != prototype: 992 is_define_proto = True 993 prototype = new_proto 994 else: 995 is_define_proto = False 996 997 # Yes, this truly is vile. We are looking for: 998 # 1. Return type (may be nothing if we're looking at a macro) 999 # 2. Function name 1000 # 3. Function parameters. 1001 # 1002 # All the while we have to watch out for function pointer parameters 1003 # (which IIRC is what the two sections are for), C types (these 1004 # regexps don't even start to express all the possibilities), and 1005 # so on. 1006 # 1007 # If you mess with these regexps, it's a good idea to check that 1008 # the following functions' documentation still comes out right: 1009 # - parport_register_device (function pointer parameters) 1010 # - atomic_set (macro) 1011 # - pci_match_device, __copy_to_user (long return type) 1012 1013 name = r'[a-zA-Z0-9_~:]+' 1014 prototype_end1 = r'[^\(]*' 1015 prototype_end2 = r'[^\{]*' 1016 prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)' 1017 1018 # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing group. 1019 # So, this needs to be mapped in Python with (?:...)? or (?:...)+ 1020 1021 type1 = r'(?:[\w\s]+)?' 1022 type2 = r'(?:[\w\s]+\*+)+' 1023 1024 found = False 1025 1026 if is_define_proto: 1027 r = KernRe(r'^()(' + name + r')\s+') 1028 1029 if r.search(prototype): 1030 return_type = '' 1031 declaration_name = r.group(2) 1032 func_macro = True 1033 1034 found = True 1035 1036 if not found: 1037 patterns = [ 1038 rf'^()({name})\s*{prototype_end}', 1039 rf'^({type1})\s+({name})\s*{prototype_end}', 1040 rf'^({type2})\s*({name})\s*{prototype_end}', 1041 ] 1042 1043 for p in patterns: 1044 r = KernRe(p) 1045 1046 if r.match(prototype): 1047 1048 return_type = r.group(1) 1049 declaration_name = r.group(2) 1050 args = r.group(3) 1051 1052 self.create_parameter_list(ln, decl_type, args, ',', 1053 declaration_name) 1054 1055 found = True 1056 break 1057 if not found: 1058 self.emit_msg(ln, 1059 f"cannot understand function prototype: '{prototype}'") 1060 return 1061 1062 if self.entry.identifier != declaration_name: 1063 self.emit_msg(ln, 1064 f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") 1065 return 1066 1067 prms = " ".join(self.entry.parameterlist) 1068 self.check_sections(ln, declaration_name, "function", 1069 self.entry.sectcheck, prms) 1070 1071 self.check_return_section(ln, declaration_name, return_type) 1072 1073 if 'typedef' in return_type: 1074 self.output_declaration(decl_type, declaration_name, 1075 function=declaration_name, 1076 typedef=True, 1077 functiontype=return_type, 1078 parameterlist=self.entry.parameterlist, 1079 parameterdescs=self.entry.parameterdescs, 1080 parametertypes=self.entry.parametertypes, 1081 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 1082 sections=self.entry.sections, 1083 section_start_lines=self.entry.section_start_lines, 1084 purpose=self.entry.declaration_purpose, 1085 func_macro=func_macro) 1086 else: 1087 self.output_declaration(decl_type, declaration_name, 1088 function=declaration_name, 1089 typedef=False, 1090 functiontype=return_type, 1091 parameterlist=self.entry.parameterlist, 1092 parameterdescs=self.entry.parameterdescs, 1093 parametertypes=self.entry.parametertypes, 1094 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 1095 sections=self.entry.sections, 1096 section_start_lines=self.entry.section_start_lines, 1097 purpose=self.entry.declaration_purpose, 1098 func_macro=func_macro) 1099 1100 def dump_typedef(self, ln, proto): 1101 """ 1102 Stores a typedef inside self.entries array. 1103 """ 1104 1105 typedef_type = r'((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1106 typedef_ident = r'\*?\s*(\w\S+)\s*' 1107 typedef_args = r'\s*\((.*)\);' 1108 1109 typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1110 typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args) 1111 1112 # Strip comments 1113 proto = KernRe(r'/\*.*?\*/', flags=re.S).sub('', proto) 1114 1115 # Parse function typedef prototypes 1116 for r in [typedef1, typedef2]: 1117 if not r.match(proto): 1118 continue 1119 1120 return_type = r.group(1).strip() 1121 declaration_name = r.group(2) 1122 args = r.group(3) 1123 1124 if self.entry.identifier != declaration_name: 1125 self.emit_msg(ln, 1126 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1127 return 1128 1129 decl_type = 'function' 1130 self.create_parameter_list(ln, decl_type, args, ',', declaration_name) 1131 1132 self.output_declaration(decl_type, declaration_name, 1133 function=declaration_name, 1134 typedef=True, 1135 functiontype=return_type, 1136 parameterlist=self.entry.parameterlist, 1137 parameterdescs=self.entry.parameterdescs, 1138 parametertypes=self.entry.parametertypes, 1139 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 1140 sections=self.entry.sections, 1141 section_start_lines=self.entry.section_start_lines, 1142 purpose=self.entry.declaration_purpose) 1143 return 1144 1145 # Handle nested parentheses or brackets 1146 r = KernRe(r'(\(*.\)\s*|\[*.\]\s*);$') 1147 while r.search(proto): 1148 proto = r.sub('', proto) 1149 1150 # Parse simple typedefs 1151 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1152 if r.match(proto): 1153 declaration_name = r.group(1) 1154 1155 if self.entry.identifier != declaration_name: 1156 self.emit_msg(ln, 1157 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1158 return 1159 1160 self.output_declaration('typedef', declaration_name, 1161 typedef=declaration_name, 1162 sections=self.entry.sections, 1163 section_start_lines=self.entry.section_start_lines, 1164 purpose=self.entry.declaration_purpose) 1165 return 1166 1167 self.emit_msg(ln, "error: Cannot parse typedef!") 1168 1169 @staticmethod 1170 def process_export(function_set, line): 1171 """ 1172 process EXPORT_SYMBOL* tags 1173 1174 This method doesn't use any variable from the class, so declare it 1175 with a staticmethod decorator. 1176 """ 1177 1178 # We support documenting some exported symbols with different 1179 # names. A horrible hack. 1180 suffixes = [ '_noprof' ] 1181 1182 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1183 # multiple export lines would violate Kernel coding style. 1184 1185 if export_symbol.search(line): 1186 symbol = export_symbol.group(2) 1187 elif export_symbol_ns.search(line): 1188 symbol = export_symbol_ns.group(2) 1189 else: 1190 return False 1191 # 1192 # Found an export, trim out any special suffixes 1193 # 1194 for suffix in suffixes: 1195 symbol = symbol.removesuffix(suffix) 1196 function_set.add(symbol) 1197 return True 1198 1199 def process_normal(self, ln, line): 1200 """ 1201 STATE_NORMAL: looking for the /** to begin everything. 1202 """ 1203 1204 if not doc_start.match(line): 1205 return 1206 1207 # start a new entry 1208 self.reset_state(ln) 1209 1210 # next line is always the function name 1211 self.state = state.NAME 1212 1213 def process_name(self, ln, line): 1214 """ 1215 STATE_NAME: Looking for the "name - description" line 1216 """ 1217 # 1218 # Check for a DOC: block and handle them specially. 1219 # 1220 if doc_block.search(line): 1221 1222 if not doc_block.group(1): 1223 self.entry.begin_section(ln, "Introduction") 1224 else: 1225 self.entry.begin_section(ln, doc_block.group(1)) 1226 1227 self.entry.identifier = self.entry.section 1228 self.state = state.DOCBLOCK 1229 # 1230 # Otherwise we're looking for a normal kerneldoc declaration line. 1231 # 1232 elif doc_decl.search(line): 1233 self.entry.identifier = doc_decl.group(1) 1234 1235 # Test for data declaration 1236 if doc_begin_data.search(line): 1237 self.entry.decl_type = doc_begin_data.group(1) 1238 self.entry.identifier = doc_begin_data.group(2) 1239 # 1240 # Look for a function description 1241 # 1242 elif doc_begin_func.search(line): 1243 self.entry.identifier = doc_begin_func.group(1) 1244 self.entry.decl_type = "function" 1245 # 1246 # We struck out. 1247 # 1248 else: 1249 self.emit_msg(ln, 1250 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") 1251 self.state = state.NORMAL 1252 return 1253 # 1254 # OK, set up for a new kerneldoc entry. 1255 # 1256 self.state = state.BODY 1257 self.entry.identifier = self.entry.identifier.strip(" ") 1258 # if there's no @param blocks need to set up default section here 1259 self.entry.begin_section(ln + 1) 1260 # 1261 # Find the description portion, which *should* be there but 1262 # isn't always. 1263 # (We should be able to capture this from the previous parsing - someday) 1264 # 1265 r = KernRe("[-:](.*)") 1266 if r.search(line): 1267 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1268 self.state = state.DECLARATION 1269 else: 1270 self.entry.declaration_purpose = "" 1271 1272 if not self.entry.declaration_purpose and self.config.wshort_desc: 1273 self.emit_msg(ln, 1274 f"missing initial short description on line:\n{line}") 1275 1276 if not self.entry.identifier and self.entry.decl_type != "enum": 1277 self.emit_msg(ln, 1278 f"wrong kernel-doc identifier on line:\n{line}") 1279 self.state = state.NORMAL 1280 1281 if self.config.verbose: 1282 self.emit_msg(ln, 1283 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1284 warning=False) 1285 # 1286 # Failed to find an identifier. Emit a warning 1287 # 1288 else: 1289 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1290 1291 # 1292 # Helper function to determine if a new section is being started. 1293 # 1294 def is_new_section(self, ln, line): 1295 if doc_sect.search(line): 1296 self.state = state.BODY 1297 # 1298 # Pick out the name of our new section, tweaking it if need be. 1299 # 1300 newsection = doc_sect.group(1) 1301 if newsection.lower() == 'description': 1302 newsection = 'Description' 1303 elif newsection.lower() == 'context': 1304 newsection = 'Context' 1305 self.state = state.SPECIAL_SECTION 1306 elif newsection.lower() in ["@return", "@returns", 1307 "return", "returns"]: 1308 newsection = "Return" 1309 self.state = state.SPECIAL_SECTION 1310 elif newsection[0] == '@': 1311 self.state = state.SPECIAL_SECTION 1312 # 1313 # Initialize the contents, and get the new section going. 1314 # 1315 newcontents = doc_sect.group(2) 1316 if not newcontents: 1317 newcontents = "" 1318 self.dump_section() 1319 self.entry.begin_section(ln, newsection) 1320 self.entry.leading_space = None 1321 1322 self.entry.add_text(newcontents.lstrip()) 1323 return True 1324 return False 1325 1326 # 1327 # Helper function to detect (and effect) the end of a kerneldoc comment. 1328 # 1329 def is_comment_end(self, ln, line): 1330 if doc_end.search(line): 1331 self.dump_section() 1332 1333 # Look for doc_com + <text> + doc_end: 1334 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') 1335 if r.match(line): 1336 self.emit_msg(ln, f"suspicious ending line: {line}") 1337 1338 self.entry.prototype = "" 1339 self.entry.new_start_line = ln + 1 1340 1341 self.state = state.PROTO 1342 return True 1343 return False 1344 1345 1346 def process_decl(self, ln, line): 1347 """ 1348 STATE_DECLARATION: We've seen the beginning of a declaration 1349 """ 1350 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1351 return 1352 # 1353 # Look for anything with the " * " line beginning. 1354 # 1355 if doc_content.search(line): 1356 cont = doc_content.group(1) 1357 # 1358 # A blank line means that we have moved out of the declaration 1359 # part of the comment (without any "special section" parameter 1360 # descriptions). 1361 # 1362 if cont == "": 1363 self.state = state.BODY 1364 # 1365 # Otherwise we have more of the declaration section to soak up. 1366 # 1367 else: 1368 self.entry.declaration_purpose = \ 1369 trim_whitespace(self.entry.declaration_purpose + ' ' + cont) 1370 else: 1371 # Unknown line, ignore 1372 self.emit_msg(ln, f"bad line: {line}") 1373 1374 1375 def process_special(self, ln, line): 1376 """ 1377 STATE_SPECIAL_SECTION: a section ending with a blank line 1378 """ 1379 # 1380 # If we have hit a blank line (only the " * " marker), then this 1381 # section is done. 1382 # 1383 if KernRe(r"\s*\*\s*$").match(line): 1384 self.entry.begin_section(ln, dump = True) 1385 self.state = state.BODY 1386 return 1387 # 1388 # Not a blank line, look for the other ways to end the section. 1389 # 1390 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1391 return 1392 # 1393 # OK, we should have a continuation of the text for this section. 1394 # 1395 if doc_content.search(line): 1396 cont = doc_content.group(1) 1397 # 1398 # If the lines of text after the first in a special section have 1399 # leading white space, we need to trim it out or Sphinx will get 1400 # confused. For the second line (the None case), see what we 1401 # find there and remember it. 1402 # 1403 if self.entry.leading_space is None: 1404 r = KernRe(r'^(\s+)') 1405 if r.match(cont): 1406 self.entry.leading_space = len(r.group(1)) 1407 else: 1408 self.entry.leading_space = 0 1409 # 1410 # Otherwise, before trimming any leading chars, be *sure* 1411 # that they are white space. We should maybe warn if this 1412 # isn't the case. 1413 # 1414 for i in range(0, self.entry.leading_space): 1415 if cont[i] != " ": 1416 self.entry.leading_space = i 1417 break 1418 # 1419 # Add the trimmed result to the section and we're done. 1420 # 1421 self.entry.add_text(cont[self.entry.leading_space:]) 1422 else: 1423 # Unknown line, ignore 1424 self.emit_msg(ln, f"bad line: {line}") 1425 1426 def process_body(self, ln, line): 1427 """ 1428 STATE_BODY: the bulk of a kerneldoc comment. 1429 """ 1430 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1431 return 1432 1433 if doc_content.search(line): 1434 cont = doc_content.group(1) 1435 self.entry.add_text(cont) 1436 else: 1437 # Unknown line, ignore 1438 self.emit_msg(ln, f"bad line: {line}") 1439 1440 def process_inline_name(self, ln, line): 1441 """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" 1442 1443 if doc_inline_sect.search(line): 1444 self.entry.begin_section(ln, doc_inline_sect.group(1)) 1445 self.entry.add_text(doc_inline_sect.group(2).lstrip()) 1446 self.state = state.INLINE_TEXT 1447 elif doc_inline_end.search(line): 1448 self.dump_section() 1449 self.state = state.PROTO 1450 elif doc_content.search(line): 1451 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") 1452 self.state = state.PROTO 1453 # else ... ?? 1454 1455 def process_inline_text(self, ln, line): 1456 """STATE_INLINE_TEXT: docbook comments within a prototype.""" 1457 1458 if doc_inline_end.search(line): 1459 self.dump_section() 1460 self.state = state.PROTO 1461 elif doc_content.search(line): 1462 self.entry.add_text(doc_content.group(1)) 1463 # else ... ?? 1464 1465 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1466 """ 1467 Handle syscall definitions 1468 """ 1469 1470 is_void = False 1471 1472 # Strip newlines/CR's 1473 proto = re.sub(r'[\r\n]+', ' ', proto) 1474 1475 # Check if it's a SYSCALL_DEFINE0 1476 if 'SYSCALL_DEFINE0' in proto: 1477 is_void = True 1478 1479 # Replace SYSCALL_DEFINE with correct return type & function name 1480 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1481 1482 r = KernRe(r'long\s+(sys_.*?),') 1483 if r.search(proto): 1484 proto = KernRe(',').sub('(', proto, count=1) 1485 elif is_void: 1486 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1487 1488 # Now delete all of the odd-numbered commas in the proto 1489 # so that argument types & names don't have a comma between them 1490 count = 0 1491 length = len(proto) 1492 1493 if is_void: 1494 length = 0 # skip the loop if is_void 1495 1496 for ix in range(length): 1497 if proto[ix] == ',': 1498 count += 1 1499 if count % 2 == 1: 1500 proto = proto[:ix] + ' ' + proto[ix + 1:] 1501 1502 return proto 1503 1504 def tracepoint_munge(self, ln, proto): 1505 """ 1506 Handle tracepoint definitions 1507 """ 1508 1509 tracepointname = None 1510 tracepointargs = None 1511 1512 # Match tracepoint name based on different patterns 1513 r = KernRe(r'TRACE_EVENT\((.*?),') 1514 if r.search(proto): 1515 tracepointname = r.group(1) 1516 1517 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1518 if r.search(proto): 1519 tracepointname = r.group(1) 1520 1521 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1522 if r.search(proto): 1523 tracepointname = r.group(2) 1524 1525 if tracepointname: 1526 tracepointname = tracepointname.lstrip() 1527 1528 r = KernRe(r'TP_PROTO\((.*?)\)') 1529 if r.search(proto): 1530 tracepointargs = r.group(1) 1531 1532 if not tracepointname or not tracepointargs: 1533 self.emit_msg(ln, 1534 f"Unrecognized tracepoint format:\n{proto}\n") 1535 else: 1536 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1537 self.entry.identifier = f"trace_{self.entry.identifier}" 1538 1539 return proto 1540 1541 def process_proto_function(self, ln, line): 1542 """Ancillary routine to process a function prototype""" 1543 1544 # strip C99-style comments to end of line 1545 line = KernRe(r"\/\/.*$", re.S).sub('', line) 1546 # 1547 # Soak up the line's worth of prototype text, stopping at { or ; if present. 1548 # 1549 if KernRe(r'\s*#\s*define').match(line): 1550 self.entry.prototype = line 1551 elif not line.startswith('#'): # skip other preprocessor stuff 1552 r = KernRe(r'([^\{]*)') 1553 if r.match(line): 1554 self.entry.prototype += r.group(1) + " " 1555 # 1556 # If we now have the whole prototype, clean it up and declare victory. 1557 # 1558 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1559 # strip comments and surrounding spaces 1560 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() 1561 # 1562 # Handle self.entry.prototypes for function pointers like: 1563 # int (*pcs_config)(struct foo) 1564 # by turning it into 1565 # int pcs_config(struct foo) 1566 # 1567 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1568 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1569 # 1570 # Handle special declaration syntaxes 1571 # 1572 if 'SYSCALL_DEFINE' in self.entry.prototype: 1573 self.entry.prototype = self.syscall_munge(ln, 1574 self.entry.prototype) 1575 else: 1576 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1577 if r.search(self.entry.prototype): 1578 self.entry.prototype = self.tracepoint_munge(ln, 1579 self.entry.prototype) 1580 # 1581 # ... and we're done 1582 # 1583 self.dump_function(ln, self.entry.prototype) 1584 self.reset_state(ln) 1585 1586 def process_proto_type(self, ln, line): 1587 """Ancillary routine to process a type""" 1588 1589 # Strip C99-style comments and surrounding whitespace 1590 line = KernRe(r"//.*$", re.S).sub('', line).strip() 1591 if not line: 1592 return # nothing to see here 1593 1594 # To distinguish preprocessor directive from regular declaration later. 1595 if line.startswith('#'): 1596 line += ";" 1597 # 1598 # Split the declaration on any of { } or ;, and accumulate pieces 1599 # until we hit a semicolon while not inside {brackets} 1600 # 1601 r = KernRe(r'(.*?)([{};])') 1602 for chunk in r.split(line): 1603 if chunk: # Ignore empty matches 1604 self.entry.prototype += chunk 1605 # 1606 # This cries out for a match statement ... someday after we can 1607 # drop Python 3.9 ... 1608 # 1609 if chunk == '{': 1610 self.entry.brcount += 1 1611 elif chunk == '}': 1612 self.entry.brcount -= 1 1613 elif chunk == ';' and self.entry.brcount <= 0: 1614 self.dump_declaration(ln, self.entry.prototype) 1615 self.reset_state(ln) 1616 return 1617 # 1618 # We hit the end of the line while still in the declaration; put 1619 # in a space to represent the newline. 1620 # 1621 self.entry.prototype += ' ' 1622 1623 def process_proto(self, ln, line): 1624 """STATE_PROTO: reading a function/whatever prototype.""" 1625 1626 if doc_inline_oneline.search(line): 1627 self.entry.begin_section(ln, doc_inline_oneline.group(1)) 1628 self.entry.add_text(doc_inline_oneline.group(2)) 1629 self.dump_section() 1630 1631 elif doc_inline_start.search(line): 1632 self.state = state.INLINE_NAME 1633 1634 elif self.entry.decl_type == 'function': 1635 self.process_proto_function(ln, line) 1636 1637 else: 1638 self.process_proto_type(ln, line) 1639 1640 def process_docblock(self, ln, line): 1641 """STATE_DOCBLOCK: within a DOC: block.""" 1642 1643 if doc_end.search(line): 1644 self.dump_section() 1645 self.output_declaration("doc", self.entry.identifier, 1646 sections=self.entry.sections, 1647 section_start_lines=self.entry.section_start_lines) 1648 self.reset_state(ln) 1649 1650 elif doc_content.search(line): 1651 self.entry.add_text(doc_content.group(1)) 1652 1653 def parse_export(self): 1654 """ 1655 Parses EXPORT_SYMBOL* macros from a single Kernel source file. 1656 """ 1657 1658 export_table = set() 1659 1660 try: 1661 with open(self.fname, "r", encoding="utf8", 1662 errors="backslashreplace") as fp: 1663 1664 for line in fp: 1665 self.process_export(export_table, line) 1666 1667 except IOError: 1668 return None 1669 1670 return export_table 1671 1672 # 1673 # The state/action table telling us which function to invoke in 1674 # each state. 1675 # 1676 state_actions = { 1677 state.NORMAL: process_normal, 1678 state.NAME: process_name, 1679 state.BODY: process_body, 1680 state.DECLARATION: process_decl, 1681 state.SPECIAL_SECTION: process_special, 1682 state.INLINE_NAME: process_inline_name, 1683 state.INLINE_TEXT: process_inline_text, 1684 state.PROTO: process_proto, 1685 state.DOCBLOCK: process_docblock, 1686 } 1687 1688 def parse_kdoc(self): 1689 """ 1690 Open and process each line of a C source file. 1691 The parsing is controlled via a state machine, and the line is passed 1692 to a different process function depending on the state. The process 1693 function may update the state as needed. 1694 1695 Besides parsing kernel-doc tags, it also parses export symbols. 1696 """ 1697 1698 prev = "" 1699 prev_ln = None 1700 export_table = set() 1701 1702 try: 1703 with open(self.fname, "r", encoding="utf8", 1704 errors="backslashreplace") as fp: 1705 for ln, line in enumerate(fp): 1706 1707 line = line.expandtabs().strip("\n") 1708 1709 # Group continuation lines on prototypes 1710 if self.state == state.PROTO: 1711 if line.endswith("\\"): 1712 prev += line.rstrip("\\") 1713 if not prev_ln: 1714 prev_ln = ln 1715 continue 1716 1717 if prev: 1718 ln = prev_ln 1719 line = prev + line 1720 prev = "" 1721 prev_ln = None 1722 1723 self.config.log.debug("%d %s: %s", 1724 ln, state.name[self.state], 1725 line) 1726 1727 # This is an optimization over the original script. 1728 # There, when export_file was used for the same file, 1729 # it was read twice. Here, we use the already-existing 1730 # loop to parse exported symbols as well. 1731 # 1732 if (self.state != state.NORMAL) or \ 1733 not self.process_export(export_table, line): 1734 # Hand this line to the appropriate state handler 1735 self.state_actions[self.state](self, ln, line) 1736 1737 except OSError: 1738 self.config.log.error(f"Error: Cannot open file {self.fname}") 1739 1740 return export_table, self.entries 1741