1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8kdoc_parser 9=========== 10 11Read a C language source or header FILE and extract embedded 12documentation comments 13""" 14 15import re 16from pprint import pformat 17 18from kdoc_re import NestedMatch, KernRe 19 20 21# 22# Regular expressions used to parse kernel-doc markups at KernelDoc class. 23# 24# Let's declare them in lowercase outside any class to make easier to 25# convert from the python script. 26# 27# As those are evaluated at the beginning, no need to cache them 28# 29 30# Allow whitespace at end of comment start. 31doc_start = KernRe(r'^/\*\*\s*$', cache=False) 32 33doc_end = KernRe(r'\*/', cache=False) 34doc_com = KernRe(r'\s*\*\s*', cache=False) 35doc_com_body = KernRe(r'\s*\* ?', cache=False) 36doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 37 38# @params and a strictly limited set of supported section names 39# Specifically: 40# Match @word: 41# @...: 42# @{section-name}: 43# while trying to not match literal block starts like "example::" 44# 45doc_sect = doc_com + \ 46 KernRe(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', 47 flags=re.I, cache=False) 48 49doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 50doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 51doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 52doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 53doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) 54attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", 55 flags=re.I | re.S, cache=False) 56 57export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 58export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 59 60type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 61 62# 63# Tests for the beginning of a kerneldoc block in its various forms. 64# 65doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 66doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) 67doc_begin_func = KernRe(str(doc_com) + # initial " * ' 68 r"(?:\w+\s*\*\s*)?" + # type (not captured) 69 r'(?:define\s+)?' + # possible "define" (not captured) 70 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 71 r'(?:[-:].*)?$', # description (not captured) 72 cache = False) 73 74# 75# A little helper to get rid of excess white space 76# 77multi_space = KernRe(r'\s\s+') 78def trim_whitespace(s): 79 return multi_space.sub(' ', s.strip()) 80 81class state: 82 """ 83 State machine enums 84 """ 85 86 # Parser states 87 NORMAL = 0 # normal code 88 NAME = 1 # looking for function name 89 DECLARATION = 2 # We have seen a declaration which might not be done 90 BODY = 3 # the body of the comment 91 SPECIAL_SECTION = 4 # doc section ending with a blank line 92 PROTO = 5 # scanning prototype 93 DOCBLOCK = 6 # documentation block 94 INLINE = 7 # gathering doc outside main block 95 96 name = [ 97 "NORMAL", 98 "NAME", 99 "DECLARATION", 100 "BODY", 101 "SPECIAL_SECTION", 102 "PROTO", 103 "DOCBLOCK", 104 "INLINE", 105 ] 106 107 # Inline documentation state 108 INLINE_NA = 0 # not applicable ($state != INLINE) 109 INLINE_NAME = 1 # looking for member name (@foo:) 110 INLINE_TEXT = 2 # looking for member documentation 111 INLINE_END = 3 # done 112 INLINE_ERROR = 4 # error - Comment without header was found. 113 # Spit a warning as it's not 114 # proper kernel-doc and ignore the rest. 115 116 inline_name = [ 117 "", 118 "_NAME", 119 "_TEXT", 120 "_END", 121 "_ERROR", 122 ] 123 124SECTION_DEFAULT = "Description" # default section 125 126class KernelEntry: 127 128 def __init__(self, config, ln): 129 self.config = config 130 131 self._contents = [] 132 self.sectcheck = "" 133 self.struct_actual = "" 134 self.prototype = "" 135 136 self.warnings = [] 137 138 self.parameterlist = [] 139 self.parameterdescs = {} 140 self.parametertypes = {} 141 self.parameterdesc_start_lines = {} 142 143 self.section_start_lines = {} 144 self.sectionlist = [] 145 self.sections = {} 146 147 self.anon_struct_union = False 148 149 self.leading_space = None 150 151 # State flags 152 self.brcount = 0 153 self.declaration_start_line = ln + 1 154 155 # 156 # Management of section contents 157 # 158 def add_text(self, text): 159 self._contents.append(text) 160 161 def contents(self): 162 return '\n'.join(self._contents) + '\n' 163 164 # TODO: rename to emit_message after removal of kernel-doc.pl 165 def emit_msg(self, log_msg, warning=True): 166 """Emit a message""" 167 168 if not warning: 169 self.config.log.info(log_msg) 170 return 171 172 # Delegate warning output to output logic, as this way it 173 # will report warnings/info only for symbols that are output 174 175 self.warnings.append(log_msg) 176 return 177 178 # 179 # Begin a new section. 180 # 181 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): 182 if dump: 183 self.dump_section(start_new = True) 184 self.section = title 185 self.new_start_line = line_no 186 187 def dump_section(self, start_new=True): 188 """ 189 Dumps section contents to arrays/hashes intended for that purpose. 190 """ 191 # 192 # If we have accumulated no contents in the default ("description") 193 # section, don't bother. 194 # 195 if self.section == SECTION_DEFAULT and not self._contents: 196 return 197 name = self.section 198 contents = self.contents() 199 200 if type_param.match(name): 201 name = type_param.group(1) 202 203 self.parameterdescs[name] = contents 204 self.parameterdesc_start_lines[name] = self.new_start_line 205 206 self.sectcheck += name + " " 207 self.new_start_line = 0 208 209 else: 210 if name in self.sections and self.sections[name] != "": 211 # Only warn on user-specified duplicate section names 212 if name != SECTION_DEFAULT: 213 self.emit_msg(self.new_start_line, 214 f"duplicate section name '{name}'\n") 215 # Treat as a new paragraph - add a blank line 216 self.sections[name] += '\n' + contents 217 else: 218 self.sections[name] = contents 219 self.sectionlist.append(name) 220 self.section_start_lines[name] = self.new_start_line 221 self.new_start_line = 0 222 223# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 224 225 if start_new: 226 self.section = SECTION_DEFAULT 227 self._contents = [] 228 229 230class KernelDoc: 231 """ 232 Read a C language source or header FILE and extract embedded 233 documentation comments. 234 """ 235 236 # Section names 237 238 section_context = "Context" 239 section_return = "Return" 240 241 undescribed = "-- undescribed --" 242 243 def __init__(self, config, fname): 244 """Initialize internal variables""" 245 246 self.fname = fname 247 self.config = config 248 249 # Initial state for the state machines 250 self.state = state.NORMAL 251 self.inline_doc_state = state.INLINE_NA 252 253 # Store entry currently being processed 254 self.entry = None 255 256 # Place all potential outputs into an array 257 self.entries = [] 258 259 def emit_msg(self, ln, msg, warning=True): 260 """Emit a message""" 261 262 log_msg = f"{self.fname}:{ln} {msg}" 263 264 if self.entry: 265 self.entry.emit_msg(log_msg, warning) 266 return 267 268 if warning: 269 self.config.log.warning(log_msg) 270 else: 271 self.config.log.info(log_msg) 272 273 def dump_section(self, start_new=True): 274 """ 275 Dumps section contents to arrays/hashes intended for that purpose. 276 """ 277 278 if self.entry: 279 self.entry.dump_section(start_new) 280 281 # TODO: rename it to store_declaration after removal of kernel-doc.pl 282 def output_declaration(self, dtype, name, **args): 283 """ 284 Stores the entry into an entry array. 285 286 The actual output and output filters will be handled elsewhere 287 """ 288 289 # The implementation here is different than the original kernel-doc: 290 # instead of checking for output filters or actually output anything, 291 # it just stores the declaration content at self.entries, as the 292 # output will happen on a separate class. 293 # 294 # For now, we're keeping the same name of the function just to make 295 # easier to compare the source code of both scripts 296 297 args["declaration_start_line"] = self.entry.declaration_start_line 298 args["type"] = dtype 299 args["warnings"] = self.entry.warnings 300 301 # TODO: use colletions.OrderedDict to remove sectionlist 302 303 sections = args.get('sections', {}) 304 sectionlist = args.get('sectionlist', []) 305 306 # Drop empty sections 307 # TODO: improve empty sections logic to emit warnings 308 for section in ["Description", "Return"]: 309 if section in sectionlist: 310 if not sections[section].rstrip(): 311 del sections[section] 312 sectionlist.remove(section) 313 314 self.entries.append((name, args)) 315 316 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 317 318 def reset_state(self, ln): 319 """ 320 Ancillary routine to create a new entry. It initializes all 321 variables used by the state machine. 322 """ 323 324 self.entry = KernelEntry(self.config, ln) 325 326 # State flags 327 self.state = state.NORMAL 328 self.inline_doc_state = state.INLINE_NA 329 330 def push_parameter(self, ln, decl_type, param, dtype, 331 org_arg, declaration_name): 332 """ 333 Store parameters and their descriptions at self.entry. 334 """ 335 336 if self.entry.anon_struct_union and dtype == "" and param == "}": 337 return # Ignore the ending }; from anonymous struct/union 338 339 self.entry.anon_struct_union = False 340 341 param = KernRe(r'[\[\)].*').sub('', param, count=1) 342 343 if dtype == "" and param.endswith("..."): 344 if KernRe(r'\w\.\.\.$').search(param): 345 # For named variable parameters of the form `x...`, 346 # remove the dots 347 param = param[:-3] 348 else: 349 # Handles unnamed variable parameters 350 param = "..." 351 352 if param not in self.entry.parameterdescs or \ 353 not self.entry.parameterdescs[param]: 354 355 self.entry.parameterdescs[param] = "variable arguments" 356 357 elif dtype == "" and (not param or param == "void"): 358 param = "void" 359 self.entry.parameterdescs[param] = "no arguments" 360 361 elif dtype == "" and param in ["struct", "union"]: 362 # Handle unnamed (anonymous) union or struct 363 dtype = param 364 param = "{unnamed_" + param + "}" 365 self.entry.parameterdescs[param] = "anonymous\n" 366 self.entry.anon_struct_union = True 367 368 # Handle cache group enforcing variables: they do not need 369 # to be described in header files 370 elif "__cacheline_group" in param: 371 # Ignore __cacheline_group_begin and __cacheline_group_end 372 return 373 374 # Warn if parameter has no description 375 # (but ignore ones starting with # as these are not parameters 376 # but inline preprocessor statements) 377 if param not in self.entry.parameterdescs and not param.startswith("#"): 378 self.entry.parameterdescs[param] = self.undescribed 379 380 if "." not in param: 381 if decl_type == 'function': 382 dname = f"{decl_type} parameter" 383 else: 384 dname = f"{decl_type} member" 385 386 self.emit_msg(ln, 387 f"{dname} '{param}' not described in '{declaration_name}'") 388 389 # Strip spaces from param so that it is one continuous string on 390 # parameterlist. This fixes a problem where check_sections() 391 # cannot find a parameter like "addr[6 + 2]" because it actually 392 # appears as "addr[6", "+", "2]" on the parameter list. 393 # However, it's better to maintain the param string unchanged for 394 # output, so just weaken the string compare in check_sections() 395 # to ignore "[blah" in a parameter string. 396 397 self.entry.parameterlist.append(param) 398 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 399 self.entry.parametertypes[param] = org_arg 400 401 def save_struct_actual(self, actual): 402 """ 403 Strip all spaces from the actual param so that it looks like 404 one string item. 405 """ 406 407 actual = KernRe(r'\s*').sub("", actual, count=1) 408 409 self.entry.struct_actual += actual + " " 410 411 def create_parameter_list(self, ln, decl_type, args, 412 splitter, declaration_name): 413 """ 414 Creates a list of parameters, storing them at self.entry. 415 """ 416 417 # temporarily replace all commas inside function pointer definition 418 arg_expr = KernRe(r'(\([^\),]+),') 419 while arg_expr.search(args): 420 args = arg_expr.sub(r"\1#", args) 421 422 for arg in args.split(splitter): 423 # Strip comments 424 arg = KernRe(r'\/\*.*\*\/').sub('', arg) 425 426 # Ignore argument attributes 427 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 428 429 # Strip leading/trailing spaces 430 arg = arg.strip() 431 arg = KernRe(r'\s+').sub(' ', arg, count=1) 432 433 if arg.startswith('#'): 434 # Treat preprocessor directive as a typeless variable just to fill 435 # corresponding data structures "correctly". Catch it later in 436 # output_* subs. 437 438 # Treat preprocessor directive as a typeless variable 439 self.push_parameter(ln, decl_type, arg, "", 440 "", declaration_name) 441 442 elif KernRe(r'\(.+\)\s*\(').search(arg): 443 # Pointer-to-function 444 445 arg = arg.replace('#', ',') 446 447 r = KernRe(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') 448 if r.match(arg): 449 param = r.group(1) 450 else: 451 self.emit_msg(ln, f"Invalid param: {arg}") 452 param = arg 453 454 dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) 455 self.save_struct_actual(param) 456 self.push_parameter(ln, decl_type, param, dtype, 457 arg, declaration_name) 458 459 elif KernRe(r'\(.+\)\s*\[').search(arg): 460 # Array-of-pointers 461 462 arg = arg.replace('#', ',') 463 r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') 464 if r.match(arg): 465 param = r.group(1) 466 else: 467 self.emit_msg(ln, f"Invalid param: {arg}") 468 param = arg 469 470 dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) 471 472 self.save_struct_actual(param) 473 self.push_parameter(ln, decl_type, param, dtype, 474 arg, declaration_name) 475 476 elif arg: 477 arg = KernRe(r'\s*:\s*').sub(":", arg) 478 arg = KernRe(r'\s*\[').sub('[', arg) 479 480 args = KernRe(r'\s*,\s*').split(arg) 481 if args[0] and '*' in args[0]: 482 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 483 484 first_arg = [] 485 r = KernRe(r'^(.*\s+)(.*?\[.*\].*)$') 486 if args[0] and r.match(args[0]): 487 args.pop(0) 488 first_arg.extend(r.group(1)) 489 first_arg.append(r.group(2)) 490 else: 491 first_arg = KernRe(r'\s+').split(args.pop(0)) 492 493 args.insert(0, first_arg.pop()) 494 dtype = ' '.join(first_arg) 495 496 for param in args: 497 if KernRe(r'^(\*+)\s*(.*)').match(param): 498 r = KernRe(r'^(\*+)\s*(.*)') 499 if not r.match(param): 500 self.emit_msg(ln, f"Invalid param: {param}") 501 continue 502 503 param = r.group(1) 504 505 self.save_struct_actual(r.group(2)) 506 self.push_parameter(ln, decl_type, r.group(2), 507 f"{dtype} {r.group(1)}", 508 arg, declaration_name) 509 510 elif KernRe(r'(.*?):(\w+)').search(param): 511 r = KernRe(r'(.*?):(\w+)') 512 if not r.match(param): 513 self.emit_msg(ln, f"Invalid param: {param}") 514 continue 515 516 if dtype != "": # Skip unnamed bit-fields 517 self.save_struct_actual(r.group(1)) 518 self.push_parameter(ln, decl_type, r.group(1), 519 f"{dtype}:{r.group(2)}", 520 arg, declaration_name) 521 else: 522 self.save_struct_actual(param) 523 self.push_parameter(ln, decl_type, param, dtype, 524 arg, declaration_name) 525 526 def check_sections(self, ln, decl_name, decl_type, sectcheck, prmscheck): 527 """ 528 Check for errors inside sections, emitting warnings if not found 529 parameters are described. 530 """ 531 532 sects = sectcheck.split() 533 prms = prmscheck.split() 534 err = False 535 536 for sx in range(len(sects)): # pylint: disable=C0200 537 err = True 538 for px in range(len(prms)): # pylint: disable=C0200 539 prm_clean = prms[px] 540 prm_clean = KernRe(r'\[.*\]').sub('', prm_clean) 541 prm_clean = attribute.sub('', prm_clean) 542 543 # ignore array size in a parameter string; 544 # however, the original param string may contain 545 # spaces, e.g.: addr[6 + 2] 546 # and this appears in @prms as "addr[6" since the 547 # parameter list is split at spaces; 548 # hence just ignore "[..." for the sections check; 549 prm_clean = KernRe(r'\[.*').sub('', prm_clean) 550 551 if prm_clean == sects[sx]: 552 err = False 553 break 554 555 if err: 556 if decl_type == 'function': 557 dname = f"{decl_type} parameter" 558 else: 559 dname = f"{decl_type} member" 560 561 self.emit_msg(ln, 562 f"Excess {dname} '{sects[sx]}' description in '{decl_name}'") 563 564 def check_return_section(self, ln, declaration_name, return_type): 565 """ 566 If the function doesn't return void, warns about the lack of a 567 return description. 568 """ 569 570 if not self.config.wreturn: 571 return 572 573 # Ignore an empty return type (It's a macro) 574 # Ignore functions with a "void" return type (but not "void *") 575 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 576 return 577 578 if not self.entry.sections.get("Return", None): 579 self.emit_msg(ln, 580 f"No description found for return value of '{declaration_name}'") 581 582 def dump_struct(self, ln, proto): 583 """ 584 Store an entry for an struct or union 585 """ 586 587 type_pattern = r'(struct|union)' 588 589 qualifiers = [ 590 "__attribute__", 591 "__packed", 592 "__aligned", 593 "____cacheline_aligned_in_smp", 594 "____cacheline_aligned", 595 ] 596 597 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 598 struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') 599 600 # Extract struct/union definition 601 members = None 602 declaration_name = None 603 decl_type = None 604 605 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 606 if r.search(proto): 607 decl_type = r.group(1) 608 declaration_name = r.group(2) 609 members = r.group(3) 610 else: 611 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 612 613 if r.search(proto): 614 decl_type = r.group(1) 615 declaration_name = r.group(3) 616 members = r.group(2) 617 618 if not members: 619 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 620 return 621 622 if self.entry.identifier != declaration_name: 623 self.emit_msg(ln, 624 f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") 625 return 626 627 args_pattern = r'([^,)]+)' 628 629 sub_prefixes = [ 630 (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), 631 (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''), 632 633 # Strip comments 634 (KernRe(r'\/\*.*?\*\/', re.S), ''), 635 636 # Strip attributes 637 (attribute, ' '), 638 (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), 639 (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), 640 (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), 641 (KernRe(r'\s*__packed\s*', re.S), ' '), 642 (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), 643 (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), 644 (KernRe(r'\s*____cacheline_aligned', re.S), ' '), 645 646 # Unwrap struct_group macros based on this definition: 647 # __struct_group(TAG, NAME, ATTRS, MEMBERS...) 648 # which has variants like: struct_group(NAME, MEMBERS...) 649 # Only MEMBERS arguments require documentation. 650 # 651 # Parsing them happens on two steps: 652 # 653 # 1. drop struct group arguments that aren't at MEMBERS, 654 # storing them as STRUCT_GROUP(MEMBERS) 655 # 656 # 2. remove STRUCT_GROUP() ancillary macro. 657 # 658 # The original logic used to remove STRUCT_GROUP() using an 659 # advanced regex: 660 # 661 # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; 662 # 663 # with two patterns that are incompatible with 664 # Python re module, as it has: 665 # 666 # - a recursive pattern: (?1) 667 # - an atomic grouping: (?>...) 668 # 669 # I tried a simpler version: but it didn't work either: 670 # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; 671 # 672 # As it doesn't properly match the end parenthesis on some cases. 673 # 674 # So, a better solution was crafted: there's now a NestedMatch 675 # class that ensures that delimiters after a search are properly 676 # matched. So, the implementation to drop STRUCT_GROUP() will be 677 # handled in separate. 678 679 (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), 680 (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), 681 (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), 682 (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), 683 684 # Replace macros 685 # 686 # TODO: use NestedMatch for FOO($1, $2, ...) matches 687 # 688 # it is better to also move those to the NestedMatch logic, 689 # to ensure that parenthesis will be properly matched. 690 691 (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), 692 (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), 693 (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), 694 (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), 695 (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), 696 (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), 697 (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), 698 (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), 699 (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), 700 ] 701 702 # Regexes here are guaranteed to have the end limiter matching 703 # the start delimiter. Yet, right now, only one replace group 704 # is allowed. 705 706 sub_nested_prefixes = [ 707 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), 708 ] 709 710 for search, sub in sub_prefixes: 711 members = search.sub(sub, members) 712 713 nested = NestedMatch() 714 715 for search, sub in sub_nested_prefixes: 716 members = nested.sub(search, sub, members) 717 718 # Keeps the original declaration as-is 719 declaration = members 720 721 # Split nested struct/union elements 722 # 723 # This loop was simpler at the original kernel-doc perl version, as 724 # while ($members =~ m/$struct_members/) { ... } 725 # reads 'members' string on each interaction. 726 # 727 # Python behavior is different: it parses 'members' only once, 728 # creating a list of tuples from the first interaction. 729 # 730 # On other words, this won't get nested structs. 731 # 732 # So, we need to have an extra loop on Python to override such 733 # re limitation. 734 735 while True: 736 tuples = struct_members.findall(members) 737 if not tuples: 738 break 739 740 for t in tuples: 741 newmember = "" 742 maintype = t[0] 743 s_ids = t[5] 744 content = t[3] 745 746 oldmember = "".join(t) 747 748 for s_id in s_ids.split(','): 749 s_id = s_id.strip() 750 751 newmember += f"{maintype} {s_id}; " 752 s_id = KernRe(r'[:\[].*').sub('', s_id) 753 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 754 755 for arg in content.split(';'): 756 arg = arg.strip() 757 758 if not arg: 759 continue 760 761 r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') 762 if r.match(arg): 763 # Pointer-to-function 764 dtype = r.group(1) 765 name = r.group(2) 766 extra = r.group(3) 767 768 if not name: 769 continue 770 771 if not s_id: 772 # Anonymous struct/union 773 newmember += f"{dtype}{name}{extra}; " 774 else: 775 newmember += f"{dtype}{s_id}.{name}{extra}; " 776 777 else: 778 arg = arg.strip() 779 # Handle bitmaps 780 arg = KernRe(r':\s*\d+\s*').sub('', arg) 781 782 # Handle arrays 783 arg = KernRe(r'\[.*\]').sub('', arg) 784 785 # Handle multiple IDs 786 arg = KernRe(r'\s*,\s*').sub(',', arg) 787 788 r = KernRe(r'(.*)\s+([\S+,]+)') 789 790 if r.search(arg): 791 dtype = r.group(1) 792 names = r.group(2) 793 else: 794 newmember += f"{arg}; " 795 continue 796 797 for name in names.split(','): 798 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() 799 800 if not name: 801 continue 802 803 if not s_id: 804 # Anonymous struct/union 805 newmember += f"{dtype} {name}; " 806 else: 807 newmember += f"{dtype} {s_id}.{name}; " 808 809 members = members.replace(oldmember, newmember) 810 811 # Ignore other nested elements, like enums 812 members = re.sub(r'(\{[^\{\}]*\})', '', members) 813 814 self.create_parameter_list(ln, decl_type, members, ';', 815 declaration_name) 816 self.check_sections(ln, declaration_name, decl_type, 817 self.entry.sectcheck, self.entry.struct_actual) 818 819 # Adjust declaration for better display 820 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 821 declaration = KernRe(r'\}\s+;').sub('};', declaration) 822 823 # Better handle inlined enums 824 while True: 825 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 826 if not r.search(declaration): 827 break 828 829 declaration = r.sub(r'\1,\n\2', declaration) 830 831 def_args = declaration.split('\n') 832 level = 1 833 declaration = "" 834 for clause in def_args: 835 836 clause = clause.strip() 837 clause = KernRe(r'\s+').sub(' ', clause, count=1) 838 839 if not clause: 840 continue 841 842 if '}' in clause and level > 1: 843 level -= 1 844 845 if not KernRe(r'^\s*#').match(clause): 846 declaration += "\t" * level 847 848 declaration += "\t" + clause + "\n" 849 if "{" in clause and "}" not in clause: 850 level += 1 851 852 self.output_declaration(decl_type, declaration_name, 853 struct=declaration_name, 854 definition=declaration, 855 parameterlist=self.entry.parameterlist, 856 parameterdescs=self.entry.parameterdescs, 857 parametertypes=self.entry.parametertypes, 858 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 859 sectionlist=self.entry.sectionlist, 860 sections=self.entry.sections, 861 section_start_lines=self.entry.section_start_lines, 862 purpose=self.entry.declaration_purpose) 863 864 def dump_enum(self, ln, proto): 865 """ 866 Stores an enum inside self.entries array. 867 """ 868 869 # Ignore members marked private 870 proto = KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) 871 proto = KernRe(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) 872 873 # Strip comments 874 proto = KernRe(r'\/\*.*?\*\/', flags=re.S).sub('', proto) 875 876 # Strip #define macros inside enums 877 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 878 879 members = None 880 declaration_name = None 881 882 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 883 if r.search(proto): 884 declaration_name = r.group(2) 885 members = r.group(1).rstrip() 886 else: 887 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 888 if r.match(proto): 889 declaration_name = r.group(1) 890 members = r.group(2).rstrip() 891 892 if not members: 893 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 894 return 895 896 if self.entry.identifier != declaration_name: 897 if self.entry.identifier == "": 898 self.emit_msg(ln, 899 f"{proto}: wrong kernel-doc identifier on prototype") 900 else: 901 self.emit_msg(ln, 902 f"expecting prototype for enum {self.entry.identifier}. Prototype was for enum {declaration_name} instead") 903 return 904 905 if not declaration_name: 906 declaration_name = "(anonymous)" 907 908 member_set = set() 909 910 members = KernRe(r'\([^;]*?[\)]').sub('', members) 911 912 for arg in members.split(','): 913 if not arg: 914 continue 915 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 916 self.entry.parameterlist.append(arg) 917 if arg not in self.entry.parameterdescs: 918 self.entry.parameterdescs[arg] = self.undescribed 919 self.emit_msg(ln, 920 f"Enum value '{arg}' not described in enum '{declaration_name}'") 921 member_set.add(arg) 922 923 for k in self.entry.parameterdescs: 924 if k not in member_set: 925 self.emit_msg(ln, 926 f"Excess enum value '%{k}' description in '{declaration_name}'") 927 928 self.output_declaration('enum', declaration_name, 929 enum=declaration_name, 930 parameterlist=self.entry.parameterlist, 931 parameterdescs=self.entry.parameterdescs, 932 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 933 sectionlist=self.entry.sectionlist, 934 sections=self.entry.sections, 935 section_start_lines=self.entry.section_start_lines, 936 purpose=self.entry.declaration_purpose) 937 938 def dump_declaration(self, ln, prototype): 939 """ 940 Stores a data declaration inside self.entries array. 941 """ 942 943 if self.entry.decl_type == "enum": 944 self.dump_enum(ln, prototype) 945 return 946 947 if self.entry.decl_type == "typedef": 948 self.dump_typedef(ln, prototype) 949 return 950 951 if self.entry.decl_type in ["union", "struct"]: 952 self.dump_struct(ln, prototype) 953 return 954 955 self.output_declaration(self.entry.decl_type, prototype, 956 entry=self.entry) 957 958 def dump_function(self, ln, prototype): 959 """ 960 Stores a function of function macro inside self.entries array. 961 """ 962 963 func_macro = False 964 return_type = '' 965 decl_type = 'function' 966 967 # Prefixes that would be removed 968 sub_prefixes = [ 969 (r"^static +", "", 0), 970 (r"^extern +", "", 0), 971 (r"^asmlinkage +", "", 0), 972 (r"^inline +", "", 0), 973 (r"^__inline__ +", "", 0), 974 (r"^__inline +", "", 0), 975 (r"^__always_inline +", "", 0), 976 (r"^noinline +", "", 0), 977 (r"^__FORTIFY_INLINE +", "", 0), 978 (r"__init +", "", 0), 979 (r"__init_or_module +", "", 0), 980 (r"__deprecated +", "", 0), 981 (r"__flatten +", "", 0), 982 (r"__meminit +", "", 0), 983 (r"__must_check +", "", 0), 984 (r"__weak +", "", 0), 985 (r"__sched +", "", 0), 986 (r"_noprof", "", 0), 987 (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), 988 (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), 989 (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), 990 (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), 991 (r"__attribute_const__ +", "", 0), 992 993 # It seems that Python support for re.X is broken: 994 # At least for me (Python 3.13), this didn't work 995# (r""" 996# __attribute__\s*\(\( 997# (?: 998# [\w\s]+ # attribute name 999# (?:\([^)]*\))? # attribute arguments 1000# \s*,? # optional comma at the end 1001# )+ 1002# \)\)\s+ 1003# """, "", re.X), 1004 1005 # So, remove whitespaces and comments from it 1006 (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), 1007 ] 1008 1009 for search, sub, flags in sub_prefixes: 1010 prototype = KernRe(search, flags).sub(sub, prototype) 1011 1012 # Macros are a special case, as they change the prototype format 1013 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 1014 if new_proto != prototype: 1015 is_define_proto = True 1016 prototype = new_proto 1017 else: 1018 is_define_proto = False 1019 1020 # Yes, this truly is vile. We are looking for: 1021 # 1. Return type (may be nothing if we're looking at a macro) 1022 # 2. Function name 1023 # 3. Function parameters. 1024 # 1025 # All the while we have to watch out for function pointer parameters 1026 # (which IIRC is what the two sections are for), C types (these 1027 # regexps don't even start to express all the possibilities), and 1028 # so on. 1029 # 1030 # If you mess with these regexps, it's a good idea to check that 1031 # the following functions' documentation still comes out right: 1032 # - parport_register_device (function pointer parameters) 1033 # - atomic_set (macro) 1034 # - pci_match_device, __copy_to_user (long return type) 1035 1036 name = r'[a-zA-Z0-9_~:]+' 1037 prototype_end1 = r'[^\(]*' 1038 prototype_end2 = r'[^\{]*' 1039 prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)' 1040 1041 # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing group. 1042 # So, this needs to be mapped in Python with (?:...)? or (?:...)+ 1043 1044 type1 = r'(?:[\w\s]+)?' 1045 type2 = r'(?:[\w\s]+\*+)+' 1046 1047 found = False 1048 1049 if is_define_proto: 1050 r = KernRe(r'^()(' + name + r')\s+') 1051 1052 if r.search(prototype): 1053 return_type = '' 1054 declaration_name = r.group(2) 1055 func_macro = True 1056 1057 found = True 1058 1059 if not found: 1060 patterns = [ 1061 rf'^()({name})\s*{prototype_end}', 1062 rf'^({type1})\s+({name})\s*{prototype_end}', 1063 rf'^({type2})\s*({name})\s*{prototype_end}', 1064 ] 1065 1066 for p in patterns: 1067 r = KernRe(p) 1068 1069 if r.match(prototype): 1070 1071 return_type = r.group(1) 1072 declaration_name = r.group(2) 1073 args = r.group(3) 1074 1075 self.create_parameter_list(ln, decl_type, args, ',', 1076 declaration_name) 1077 1078 found = True 1079 break 1080 if not found: 1081 self.emit_msg(ln, 1082 f"cannot understand function prototype: '{prototype}'") 1083 return 1084 1085 if self.entry.identifier != declaration_name: 1086 self.emit_msg(ln, 1087 f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") 1088 return 1089 1090 prms = " ".join(self.entry.parameterlist) 1091 self.check_sections(ln, declaration_name, "function", 1092 self.entry.sectcheck, prms) 1093 1094 self.check_return_section(ln, declaration_name, return_type) 1095 1096 if 'typedef' in return_type: 1097 self.output_declaration(decl_type, declaration_name, 1098 function=declaration_name, 1099 typedef=True, 1100 functiontype=return_type, 1101 parameterlist=self.entry.parameterlist, 1102 parameterdescs=self.entry.parameterdescs, 1103 parametertypes=self.entry.parametertypes, 1104 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 1105 sectionlist=self.entry.sectionlist, 1106 sections=self.entry.sections, 1107 section_start_lines=self.entry.section_start_lines, 1108 purpose=self.entry.declaration_purpose, 1109 func_macro=func_macro) 1110 else: 1111 self.output_declaration(decl_type, declaration_name, 1112 function=declaration_name, 1113 typedef=False, 1114 functiontype=return_type, 1115 parameterlist=self.entry.parameterlist, 1116 parameterdescs=self.entry.parameterdescs, 1117 parametertypes=self.entry.parametertypes, 1118 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 1119 sectionlist=self.entry.sectionlist, 1120 sections=self.entry.sections, 1121 section_start_lines=self.entry.section_start_lines, 1122 purpose=self.entry.declaration_purpose, 1123 func_macro=func_macro) 1124 1125 def dump_typedef(self, ln, proto): 1126 """ 1127 Stores a typedef inside self.entries array. 1128 """ 1129 1130 typedef_type = r'((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1131 typedef_ident = r'\*?\s*(\w\S+)\s*' 1132 typedef_args = r'\s*\((.*)\);' 1133 1134 typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1135 typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args) 1136 1137 # Strip comments 1138 proto = KernRe(r'/\*.*?\*/', flags=re.S).sub('', proto) 1139 1140 # Parse function typedef prototypes 1141 for r in [typedef1, typedef2]: 1142 if not r.match(proto): 1143 continue 1144 1145 return_type = r.group(1).strip() 1146 declaration_name = r.group(2) 1147 args = r.group(3) 1148 1149 if self.entry.identifier != declaration_name: 1150 self.emit_msg(ln, 1151 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1152 return 1153 1154 decl_type = 'function' 1155 self.create_parameter_list(ln, decl_type, args, ',', declaration_name) 1156 1157 self.output_declaration(decl_type, declaration_name, 1158 function=declaration_name, 1159 typedef=True, 1160 functiontype=return_type, 1161 parameterlist=self.entry.parameterlist, 1162 parameterdescs=self.entry.parameterdescs, 1163 parametertypes=self.entry.parametertypes, 1164 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 1165 sectionlist=self.entry.sectionlist, 1166 sections=self.entry.sections, 1167 section_start_lines=self.entry.section_start_lines, 1168 purpose=self.entry.declaration_purpose) 1169 return 1170 1171 # Handle nested parentheses or brackets 1172 r = KernRe(r'(\(*.\)\s*|\[*.\]\s*);$') 1173 while r.search(proto): 1174 proto = r.sub('', proto) 1175 1176 # Parse simple typedefs 1177 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1178 if r.match(proto): 1179 declaration_name = r.group(1) 1180 1181 if self.entry.identifier != declaration_name: 1182 self.emit_msg(ln, 1183 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1184 return 1185 1186 self.output_declaration('typedef', declaration_name, 1187 typedef=declaration_name, 1188 sectionlist=self.entry.sectionlist, 1189 sections=self.entry.sections, 1190 section_start_lines=self.entry.section_start_lines, 1191 purpose=self.entry.declaration_purpose) 1192 return 1193 1194 self.emit_msg(ln, "error: Cannot parse typedef!") 1195 1196 @staticmethod 1197 def process_export(function_set, line): 1198 """ 1199 process EXPORT_SYMBOL* tags 1200 1201 This method doesn't use any variable from the class, so declare it 1202 with a staticmethod decorator. 1203 """ 1204 1205 # We support documenting some exported symbols with different 1206 # names. A horrible hack. 1207 suffixes = [ '_noprof' ] 1208 1209 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1210 # multiple export lines would violate Kernel coding style. 1211 1212 if export_symbol.search(line): 1213 symbol = export_symbol.group(2) 1214 elif export_symbol_ns.search(line): 1215 symbol = export_symbol_ns.group(2) 1216 else: 1217 return False 1218 # 1219 # Found an export, trim out any special suffixes 1220 # 1221 for suffix in suffixes: 1222 symbol = symbol.removesuffix(suffix) 1223 function_set.add(symbol) 1224 return True 1225 1226 def process_normal(self, ln, line): 1227 """ 1228 STATE_NORMAL: looking for the /** to begin everything. 1229 """ 1230 1231 if not doc_start.match(line): 1232 return 1233 1234 # start a new entry 1235 self.reset_state(ln) 1236 1237 # next line is always the function name 1238 self.state = state.NAME 1239 1240 def process_name(self, ln, line): 1241 """ 1242 STATE_NAME: Looking for the "name - description" line 1243 """ 1244 # 1245 # Check for a DOC: block and handle them specially. 1246 # 1247 if doc_block.search(line): 1248 1249 if not doc_block.group(1): 1250 self.entry.begin_section(ln, "Introduction") 1251 else: 1252 self.entry.begin_section(ln, doc_block.group(1)) 1253 1254 self.entry.identifier = self.entry.section 1255 self.state = state.DOCBLOCK 1256 # 1257 # Otherwise we're looking for a normal kerneldoc declaration line. 1258 # 1259 elif doc_decl.search(line): 1260 self.entry.identifier = doc_decl.group(1) 1261 1262 # Test for data declaration 1263 if doc_begin_data.search(line): 1264 self.entry.decl_type = doc_begin_data.group(1) 1265 self.entry.identifier = doc_begin_data.group(2) 1266 # 1267 # Look for a function description 1268 # 1269 elif doc_begin_func.search(line): 1270 self.entry.identifier = doc_begin_func.group(1) 1271 self.entry.decl_type = "function" 1272 # 1273 # We struck out. 1274 # 1275 else: 1276 self.emit_msg(ln, 1277 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") 1278 self.state = state.NORMAL 1279 return 1280 # 1281 # OK, set up for a new kerneldoc entry. 1282 # 1283 self.state = state.BODY 1284 self.entry.identifier = self.entry.identifier.strip(" ") 1285 # if there's no @param blocks need to set up default section here 1286 self.entry.begin_section(ln + 1) 1287 # 1288 # Find the description portion, which *should* be there but 1289 # isn't always. 1290 # (We should be able to capture this from the previous parsing - someday) 1291 # 1292 r = KernRe("[-:](.*)") 1293 if r.search(line): 1294 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1295 self.state = state.DECLARATION 1296 else: 1297 self.entry.declaration_purpose = "" 1298 1299 if not self.entry.declaration_purpose and self.config.wshort_desc: 1300 self.emit_msg(ln, 1301 f"missing initial short description on line:\n{line}") 1302 1303 if not self.entry.identifier and self.entry.decl_type != "enum": 1304 self.emit_msg(ln, 1305 f"wrong kernel-doc identifier on line:\n{line}") 1306 self.state = state.NORMAL 1307 1308 if self.config.verbose: 1309 self.emit_msg(ln, 1310 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1311 warning=False) 1312 # 1313 # Failed to find an identifier. Emit a warning 1314 # 1315 else: 1316 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1317 1318 # 1319 # Helper function to determine if a new section is being started. 1320 # 1321 def is_new_section(self, ln, line): 1322 if doc_sect.search(line): 1323 self.state = state.BODY 1324 # 1325 # Pick out the name of our new section, tweaking it if need be. 1326 # 1327 newsection = doc_sect.group(1) 1328 if newsection.lower() == 'description': 1329 newsection = 'Description' 1330 elif newsection.lower() == 'context': 1331 newsection = 'Context' 1332 self.state = state.SPECIAL_SECTION 1333 elif newsection.lower() in ["@return", "@returns", 1334 "return", "returns"]: 1335 newsection = "Return" 1336 self.state = state.SPECIAL_SECTION 1337 elif newsection[0] == '@': 1338 self.state = state.SPECIAL_SECTION 1339 # 1340 # Initialize the contents, and get the new section going. 1341 # 1342 newcontents = doc_sect.group(2) 1343 if not newcontents: 1344 newcontents = "" 1345 self.dump_section() 1346 self.entry.begin_section(ln, newsection) 1347 self.entry.leading_space = None 1348 1349 self.entry.add_text(newcontents.lstrip()) 1350 return True 1351 return False 1352 1353 # 1354 # Helper function to detect (and effect) the end of a kerneldoc comment. 1355 # 1356 def is_comment_end(self, ln, line): 1357 if doc_end.search(line): 1358 self.dump_section() 1359 1360 # Look for doc_com + <text> + doc_end: 1361 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') 1362 if r.match(line): 1363 self.emit_msg(ln, f"suspicious ending line: {line}") 1364 1365 self.entry.prototype = "" 1366 self.entry.new_start_line = ln + 1 1367 1368 self.state = state.PROTO 1369 return True 1370 return False 1371 1372 1373 def process_decl(self, ln, line): 1374 """ 1375 STATE_DECLARATION: We've seen the beginning of a declaration 1376 """ 1377 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1378 return 1379 # 1380 # Look for anything with the " * " line beginning. 1381 # 1382 if doc_content.search(line): 1383 cont = doc_content.group(1) 1384 # 1385 # A blank line means that we have moved out of the declaration 1386 # part of the comment (without any "special section" parameter 1387 # descriptions). 1388 # 1389 if cont == "": 1390 self.state = state.BODY 1391 # 1392 # Otherwise we have more of the declaration section to soak up. 1393 # 1394 else: 1395 self.entry.declaration_purpose = \ 1396 trim_whitespace(self.entry.declaration_purpose + ' ' + cont) 1397 else: 1398 # Unknown line, ignore 1399 self.emit_msg(ln, f"bad line: {line}") 1400 1401 1402 def process_special(self, ln, line): 1403 """ 1404 STATE_SPECIAL_SECTION: a section ending with a blank line 1405 """ 1406 # 1407 # If we have hit a blank line (only the " * " marker), then this 1408 # section is done. 1409 # 1410 if KernRe(r"\s*\*\s*$").match(line): 1411 self.entry.begin_section(ln, dump = True) 1412 self.state = state.BODY 1413 return 1414 # 1415 # Not a blank line, look for the other ways to end the section. 1416 # 1417 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1418 return 1419 # 1420 # OK, we should have a continuation of the text for this section. 1421 # 1422 if doc_content.search(line): 1423 cont = doc_content.group(1) 1424 # 1425 # If the lines of text after the first in a special section have 1426 # leading white space, we need to trim it out or Sphinx will get 1427 # confused. For the second line (the None case), see what we 1428 # find there and remember it. 1429 # 1430 if self.entry.leading_space is None: 1431 r = KernRe(r'^(\s+)') 1432 if r.match(cont): 1433 self.entry.leading_space = len(r.group(1)) 1434 else: 1435 self.entry.leading_space = 0 1436 # 1437 # Otherwise, before trimming any leading chars, be *sure* 1438 # that they are white space. We should maybe warn if this 1439 # isn't the case. 1440 # 1441 for i in range(0, self.entry.leading_space): 1442 if cont[i] != " ": 1443 self.entry.leading_space = i 1444 break 1445 # 1446 # Add the trimmed result to the section and we're done. 1447 # 1448 self.entry.add_text(cont[self.entry.leading_space:]) 1449 else: 1450 # Unknown line, ignore 1451 self.emit_msg(ln, f"bad line: {line}") 1452 1453 def process_body(self, ln, line): 1454 """ 1455 STATE_BODY: the bulk of a kerneldoc comment. 1456 """ 1457 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1458 return 1459 1460 if doc_content.search(line): 1461 cont = doc_content.group(1) 1462 self.entry.add_text(cont) 1463 else: 1464 # Unknown line, ignore 1465 self.emit_msg(ln, f"bad line: {line}") 1466 1467 def process_inline(self, ln, line): 1468 """STATE_INLINE: docbook comments within a prototype.""" 1469 1470 if self.inline_doc_state == state.INLINE_NAME and \ 1471 doc_inline_sect.search(line): 1472 self.entry.begin_section(ln, doc_inline_sect.group(1)) 1473 1474 self.entry.add_text(doc_inline_sect.group(2).lstrip()) 1475 self.inline_doc_state = state.INLINE_TEXT 1476 # Documentation block end */ 1477 return 1478 1479 if doc_inline_end.search(line): 1480 self.dump_section() 1481 self.state = state.PROTO 1482 self.inline_doc_state = state.INLINE_NA 1483 return 1484 1485 if doc_content.search(line): 1486 if self.inline_doc_state == state.INLINE_TEXT: 1487 self.entry.add_text(doc_content.group(1)) 1488 1489 elif self.inline_doc_state == state.INLINE_NAME: 1490 self.emit_msg(ln, 1491 f"Incorrect use of kernel-doc format: {line}") 1492 1493 self.inline_doc_state = state.INLINE_ERROR 1494 1495 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1496 """ 1497 Handle syscall definitions 1498 """ 1499 1500 is_void = False 1501 1502 # Strip newlines/CR's 1503 proto = re.sub(r'[\r\n]+', ' ', proto) 1504 1505 # Check if it's a SYSCALL_DEFINE0 1506 if 'SYSCALL_DEFINE0' in proto: 1507 is_void = True 1508 1509 # Replace SYSCALL_DEFINE with correct return type & function name 1510 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1511 1512 r = KernRe(r'long\s+(sys_.*?),') 1513 if r.search(proto): 1514 proto = KernRe(',').sub('(', proto, count=1) 1515 elif is_void: 1516 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1517 1518 # Now delete all of the odd-numbered commas in the proto 1519 # so that argument types & names don't have a comma between them 1520 count = 0 1521 length = len(proto) 1522 1523 if is_void: 1524 length = 0 # skip the loop if is_void 1525 1526 for ix in range(length): 1527 if proto[ix] == ',': 1528 count += 1 1529 if count % 2 == 1: 1530 proto = proto[:ix] + ' ' + proto[ix + 1:] 1531 1532 return proto 1533 1534 def tracepoint_munge(self, ln, proto): 1535 """ 1536 Handle tracepoint definitions 1537 """ 1538 1539 tracepointname = None 1540 tracepointargs = None 1541 1542 # Match tracepoint name based on different patterns 1543 r = KernRe(r'TRACE_EVENT\((.*?),') 1544 if r.search(proto): 1545 tracepointname = r.group(1) 1546 1547 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1548 if r.search(proto): 1549 tracepointname = r.group(1) 1550 1551 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1552 if r.search(proto): 1553 tracepointname = r.group(2) 1554 1555 if tracepointname: 1556 tracepointname = tracepointname.lstrip() 1557 1558 r = KernRe(r'TP_PROTO\((.*?)\)') 1559 if r.search(proto): 1560 tracepointargs = r.group(1) 1561 1562 if not tracepointname or not tracepointargs: 1563 self.emit_msg(ln, 1564 f"Unrecognized tracepoint format:\n{proto}\n") 1565 else: 1566 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1567 self.entry.identifier = f"trace_{self.entry.identifier}" 1568 1569 return proto 1570 1571 def process_proto_function(self, ln, line): 1572 """Ancillary routine to process a function prototype""" 1573 1574 # strip C99-style comments to end of line 1575 r = KernRe(r"\/\/.*$", re.S) 1576 line = r.sub('', line) 1577 1578 if KernRe(r'\s*#\s*define').match(line): 1579 self.entry.prototype = line 1580 elif line.startswith('#'): 1581 # Strip other macros like #ifdef/#ifndef/#endif/... 1582 pass 1583 else: 1584 r = KernRe(r'([^\{]*)') 1585 if r.match(line): 1586 self.entry.prototype += r.group(1) + " " 1587 1588 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1589 # strip comments 1590 r = KernRe(r'/\*.*?\*/') 1591 self.entry.prototype = r.sub('', self.entry.prototype) 1592 1593 # strip newlines/cr's 1594 r = KernRe(r'[\r\n]+') 1595 self.entry.prototype = r.sub(' ', self.entry.prototype) 1596 1597 # strip leading spaces 1598 r = KernRe(r'^\s+') 1599 self.entry.prototype = r.sub('', self.entry.prototype) 1600 1601 # Handle self.entry.prototypes for function pointers like: 1602 # int (*pcs_config)(struct foo) 1603 1604 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1605 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1606 1607 if 'SYSCALL_DEFINE' in self.entry.prototype: 1608 self.entry.prototype = self.syscall_munge(ln, 1609 self.entry.prototype) 1610 1611 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1612 if r.search(self.entry.prototype): 1613 self.entry.prototype = self.tracepoint_munge(ln, 1614 self.entry.prototype) 1615 1616 self.dump_function(ln, self.entry.prototype) 1617 self.reset_state(ln) 1618 1619 def process_proto_type(self, ln, line): 1620 """Ancillary routine to process a type""" 1621 1622 # Strip newlines/cr's. 1623 line = KernRe(r'[\r\n]+', re.S).sub(' ', line) 1624 1625 # Strip leading spaces 1626 line = KernRe(r'^\s+', re.S).sub('', line) 1627 1628 # Strip trailing spaces 1629 line = KernRe(r'\s+$', re.S).sub('', line) 1630 1631 # Strip C99-style comments to the end of the line 1632 line = KernRe(r"\/\/.*$", re.S).sub('', line) 1633 1634 # To distinguish preprocessor directive from regular declaration later. 1635 if line.startswith('#'): 1636 line += ";" 1637 1638 r = KernRe(r'([^\{\};]*)([\{\};])(.*)') 1639 while True: 1640 if r.search(line): 1641 if self.entry.prototype: 1642 self.entry.prototype += " " 1643 self.entry.prototype += r.group(1) + r.group(2) 1644 1645 self.entry.brcount += r.group(2).count('{') 1646 self.entry.brcount -= r.group(2).count('}') 1647 1648 self.entry.brcount = max(self.entry.brcount, 0) 1649 1650 if r.group(2) == ';' and self.entry.brcount == 0: 1651 self.dump_declaration(ln, self.entry.prototype) 1652 self.reset_state(ln) 1653 break 1654 1655 line = r.group(3) 1656 else: 1657 self.entry.prototype += line 1658 break 1659 1660 def process_proto(self, ln, line): 1661 """STATE_PROTO: reading a function/whatever prototype.""" 1662 1663 if doc_inline_oneline.search(line): 1664 self.entry.begin_section(ln, doc_inline_oneline.group(1)) 1665 self.entry.add_text(doc_inline_oneline.group(2)) 1666 self.dump_section() 1667 1668 elif doc_inline_start.search(line): 1669 self.state = state.INLINE 1670 self.inline_doc_state = state.INLINE_NAME 1671 1672 elif self.entry.decl_type == 'function': 1673 self.process_proto_function(ln, line) 1674 1675 else: 1676 self.process_proto_type(ln, line) 1677 1678 def process_docblock(self, ln, line): 1679 """STATE_DOCBLOCK: within a DOC: block.""" 1680 1681 if doc_end.search(line): 1682 self.dump_section() 1683 self.output_declaration("doc", self.entry.identifier, 1684 sectionlist=self.entry.sectionlist, 1685 sections=self.entry.sections, 1686 section_start_lines=self.entry.section_start_lines) 1687 self.reset_state(ln) 1688 1689 elif doc_content.search(line): 1690 self.entry.add_text(doc_content.group(1)) 1691 1692 def parse_export(self): 1693 """ 1694 Parses EXPORT_SYMBOL* macros from a single Kernel source file. 1695 """ 1696 1697 export_table = set() 1698 1699 try: 1700 with open(self.fname, "r", encoding="utf8", 1701 errors="backslashreplace") as fp: 1702 1703 for line in fp: 1704 self.process_export(export_table, line) 1705 1706 except IOError: 1707 return None 1708 1709 return export_table 1710 1711 # 1712 # The state/action table telling us which function to invoke in 1713 # each state. 1714 # 1715 state_actions = { 1716 state.NORMAL: process_normal, 1717 state.NAME: process_name, 1718 state.BODY: process_body, 1719 state.DECLARATION: process_decl, 1720 state.SPECIAL_SECTION: process_special, 1721 state.INLINE: process_inline, 1722 state.PROTO: process_proto, 1723 state.DOCBLOCK: process_docblock, 1724 } 1725 1726 def parse_kdoc(self): 1727 """ 1728 Open and process each line of a C source file. 1729 The parsing is controlled via a state machine, and the line is passed 1730 to a different process function depending on the state. The process 1731 function may update the state as needed. 1732 1733 Besides parsing kernel-doc tags, it also parses export symbols. 1734 """ 1735 1736 prev = "" 1737 prev_ln = None 1738 export_table = set() 1739 1740 try: 1741 with open(self.fname, "r", encoding="utf8", 1742 errors="backslashreplace") as fp: 1743 for ln, line in enumerate(fp): 1744 1745 line = line.expandtabs().strip("\n") 1746 1747 # Group continuation lines on prototypes 1748 if self.state == state.PROTO: 1749 if line.endswith("\\"): 1750 prev += line.rstrip("\\") 1751 if not prev_ln: 1752 prev_ln = ln 1753 continue 1754 1755 if prev: 1756 ln = prev_ln 1757 line = prev + line 1758 prev = "" 1759 prev_ln = None 1760 1761 self.config.log.debug("%d %s%s: %s", 1762 ln, state.name[self.state], 1763 state.inline_name[self.inline_doc_state], 1764 line) 1765 1766 # This is an optimization over the original script. 1767 # There, when export_file was used for the same file, 1768 # it was read twice. Here, we use the already-existing 1769 # loop to parse exported symbols as well. 1770 # 1771 if (self.state != state.NORMAL) or \ 1772 not self.process_export(export_table, line): 1773 # Hand this line to the appropriate state handler 1774 self.state_actions[self.state](self, ln, line) 1775 1776 except OSError: 1777 self.config.log.error(f"Error: Cannot open file {self.fname}") 1778 1779 return export_table, self.entries 1780