1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8kdoc_parser 9=========== 10 11Read a C language source or header FILE and extract embedded 12documentation comments 13""" 14 15import re 16from pprint import pformat 17 18from kdoc_re import NestedMatch, KernRe 19 20 21# 22# Regular expressions used to parse kernel-doc markups at KernelDoc class. 23# 24# Let's declare them in lowercase outside any class to make easier to 25# convert from the python script. 26# 27# As those are evaluated at the beginning, no need to cache them 28# 29 30# Allow whitespace at end of comment start. 31doc_start = KernRe(r'^/\*\*\s*$', cache=False) 32 33doc_end = KernRe(r'\*/', cache=False) 34doc_com = KernRe(r'\s*\*\s*', cache=False) 35doc_com_body = KernRe(r'\s*\* ?', cache=False) 36doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 37 38# @params and a strictly limited set of supported section names 39# Specifically: 40# Match @word: 41# @...: 42# @{section-name}: 43# while trying to not match literal block starts like "example::" 44# 45doc_sect = doc_com + \ 46 KernRe(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', 47 flags=re.I, cache=False) 48 49doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 50doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 51doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 52doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 53doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) 54attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", 55 flags=re.I | re.S, cache=False) 56 57export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 58export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 59 60type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 61 62# 63# Tests for the beginning of a kerneldoc block in its various forms. 64# 65doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 66doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) 67doc_begin_func = KernRe(str(doc_com) + # initial " * ' 68 r"(?:\w+\s*\*\s*)?" + # type (not captured) 69 r'(?:define\s+)?' + # possible "define" (not captured) 70 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 71 r'(?:[-:].*)?$', # description (not captured) 72 cache = False) 73 74# 75# A little helper to get rid of excess white space 76# 77multi_space = KernRe(r'\s\s+') 78def trim_whitespace(s): 79 return multi_space.sub(' ', s.strip()) 80 81class state: 82 """ 83 State machine enums 84 """ 85 86 # Parser states 87 NORMAL = 0 # normal code 88 NAME = 1 # looking for function name 89 DECLARATION = 2 # We have seen a declaration which might not be done 90 BODY = 3 # the body of the comment 91 SPECIAL_SECTION = 4 # doc section ending with a blank line 92 PROTO = 5 # scanning prototype 93 DOCBLOCK = 6 # documentation block 94 INLINE_NAME = 7 # gathering doc outside main block 95 INLINE_TEXT = 8 # reading the body of inline docs 96 97 name = [ 98 "NORMAL", 99 "NAME", 100 "DECLARATION", 101 "BODY", 102 "SPECIAL_SECTION", 103 "PROTO", 104 "DOCBLOCK", 105 "INLINE_NAME", 106 "INLINE_TEXT", 107 ] 108 109 110SECTION_DEFAULT = "Description" # default section 111 112class KernelEntry: 113 114 def __init__(self, config, ln): 115 self.config = config 116 117 self._contents = [] 118 self.sectcheck = "" 119 self.struct_actual = "" 120 self.prototype = "" 121 122 self.warnings = [] 123 124 self.parameterlist = [] 125 self.parameterdescs = {} 126 self.parametertypes = {} 127 self.parameterdesc_start_lines = {} 128 129 self.section_start_lines = {} 130 self.sectionlist = [] 131 self.sections = {} 132 133 self.anon_struct_union = False 134 135 self.leading_space = None 136 137 # State flags 138 self.brcount = 0 139 self.declaration_start_line = ln + 1 140 141 # 142 # Management of section contents 143 # 144 def add_text(self, text): 145 self._contents.append(text) 146 147 def contents(self): 148 return '\n'.join(self._contents) + '\n' 149 150 # TODO: rename to emit_message after removal of kernel-doc.pl 151 def emit_msg(self, log_msg, warning=True): 152 """Emit a message""" 153 154 if not warning: 155 self.config.log.info(log_msg) 156 return 157 158 # Delegate warning output to output logic, as this way it 159 # will report warnings/info only for symbols that are output 160 161 self.warnings.append(log_msg) 162 return 163 164 # 165 # Begin a new section. 166 # 167 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): 168 if dump: 169 self.dump_section(start_new = True) 170 self.section = title 171 self.new_start_line = line_no 172 173 def dump_section(self, start_new=True): 174 """ 175 Dumps section contents to arrays/hashes intended for that purpose. 176 """ 177 # 178 # If we have accumulated no contents in the default ("description") 179 # section, don't bother. 180 # 181 if self.section == SECTION_DEFAULT and not self._contents: 182 return 183 name = self.section 184 contents = self.contents() 185 186 if type_param.match(name): 187 name = type_param.group(1) 188 189 self.parameterdescs[name] = contents 190 self.parameterdesc_start_lines[name] = self.new_start_line 191 192 self.sectcheck += name + " " 193 self.new_start_line = 0 194 195 else: 196 if name in self.sections and self.sections[name] != "": 197 # Only warn on user-specified duplicate section names 198 if name != SECTION_DEFAULT: 199 self.emit_msg(self.new_start_line, 200 f"duplicate section name '{name}'\n") 201 # Treat as a new paragraph - add a blank line 202 self.sections[name] += '\n' + contents 203 else: 204 self.sections[name] = contents 205 self.sectionlist.append(name) 206 self.section_start_lines[name] = self.new_start_line 207 self.new_start_line = 0 208 209# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 210 211 if start_new: 212 self.section = SECTION_DEFAULT 213 self._contents = [] 214 215 216class KernelDoc: 217 """ 218 Read a C language source or header FILE and extract embedded 219 documentation comments. 220 """ 221 222 # Section names 223 224 section_context = "Context" 225 section_return = "Return" 226 227 undescribed = "-- undescribed --" 228 229 def __init__(self, config, fname): 230 """Initialize internal variables""" 231 232 self.fname = fname 233 self.config = config 234 235 # Initial state for the state machines 236 self.state = state.NORMAL 237 238 # Store entry currently being processed 239 self.entry = None 240 241 # Place all potential outputs into an array 242 self.entries = [] 243 244 def emit_msg(self, ln, msg, warning=True): 245 """Emit a message""" 246 247 log_msg = f"{self.fname}:{ln} {msg}" 248 249 if self.entry: 250 self.entry.emit_msg(log_msg, warning) 251 return 252 253 if warning: 254 self.config.log.warning(log_msg) 255 else: 256 self.config.log.info(log_msg) 257 258 def dump_section(self, start_new=True): 259 """ 260 Dumps section contents to arrays/hashes intended for that purpose. 261 """ 262 263 if self.entry: 264 self.entry.dump_section(start_new) 265 266 # TODO: rename it to store_declaration after removal of kernel-doc.pl 267 def output_declaration(self, dtype, name, **args): 268 """ 269 Stores the entry into an entry array. 270 271 The actual output and output filters will be handled elsewhere 272 """ 273 274 # The implementation here is different than the original kernel-doc: 275 # instead of checking for output filters or actually output anything, 276 # it just stores the declaration content at self.entries, as the 277 # output will happen on a separate class. 278 # 279 # For now, we're keeping the same name of the function just to make 280 # easier to compare the source code of both scripts 281 282 args["declaration_start_line"] = self.entry.declaration_start_line 283 args["type"] = dtype 284 args["warnings"] = self.entry.warnings 285 286 # TODO: use colletions.OrderedDict to remove sectionlist 287 288 sections = args.get('sections', {}) 289 sectionlist = args.get('sectionlist', []) 290 291 # Drop empty sections 292 # TODO: improve empty sections logic to emit warnings 293 for section in ["Description", "Return"]: 294 if section in sectionlist: 295 if not sections[section].rstrip(): 296 del sections[section] 297 sectionlist.remove(section) 298 299 self.entries.append((name, args)) 300 301 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 302 303 def reset_state(self, ln): 304 """ 305 Ancillary routine to create a new entry. It initializes all 306 variables used by the state machine. 307 """ 308 309 self.entry = KernelEntry(self.config, ln) 310 311 # State flags 312 self.state = state.NORMAL 313 314 def push_parameter(self, ln, decl_type, param, dtype, 315 org_arg, declaration_name): 316 """ 317 Store parameters and their descriptions at self.entry. 318 """ 319 320 if self.entry.anon_struct_union and dtype == "" and param == "}": 321 return # Ignore the ending }; from anonymous struct/union 322 323 self.entry.anon_struct_union = False 324 325 param = KernRe(r'[\[\)].*').sub('', param, count=1) 326 327 if dtype == "" and param.endswith("..."): 328 if KernRe(r'\w\.\.\.$').search(param): 329 # For named variable parameters of the form `x...`, 330 # remove the dots 331 param = param[:-3] 332 else: 333 # Handles unnamed variable parameters 334 param = "..." 335 336 if param not in self.entry.parameterdescs or \ 337 not self.entry.parameterdescs[param]: 338 339 self.entry.parameterdescs[param] = "variable arguments" 340 341 elif dtype == "" and (not param or param == "void"): 342 param = "void" 343 self.entry.parameterdescs[param] = "no arguments" 344 345 elif dtype == "" and param in ["struct", "union"]: 346 # Handle unnamed (anonymous) union or struct 347 dtype = param 348 param = "{unnamed_" + param + "}" 349 self.entry.parameterdescs[param] = "anonymous\n" 350 self.entry.anon_struct_union = True 351 352 # Handle cache group enforcing variables: they do not need 353 # to be described in header files 354 elif "__cacheline_group" in param: 355 # Ignore __cacheline_group_begin and __cacheline_group_end 356 return 357 358 # Warn if parameter has no description 359 # (but ignore ones starting with # as these are not parameters 360 # but inline preprocessor statements) 361 if param not in self.entry.parameterdescs and not param.startswith("#"): 362 self.entry.parameterdescs[param] = self.undescribed 363 364 if "." not in param: 365 if decl_type == 'function': 366 dname = f"{decl_type} parameter" 367 else: 368 dname = f"{decl_type} member" 369 370 self.emit_msg(ln, 371 f"{dname} '{param}' not described in '{declaration_name}'") 372 373 # Strip spaces from param so that it is one continuous string on 374 # parameterlist. This fixes a problem where check_sections() 375 # cannot find a parameter like "addr[6 + 2]" because it actually 376 # appears as "addr[6", "+", "2]" on the parameter list. 377 # However, it's better to maintain the param string unchanged for 378 # output, so just weaken the string compare in check_sections() 379 # to ignore "[blah" in a parameter string. 380 381 self.entry.parameterlist.append(param) 382 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 383 self.entry.parametertypes[param] = org_arg 384 385 def save_struct_actual(self, actual): 386 """ 387 Strip all spaces from the actual param so that it looks like 388 one string item. 389 """ 390 391 actual = KernRe(r'\s*').sub("", actual, count=1) 392 393 self.entry.struct_actual += actual + " " 394 395 def create_parameter_list(self, ln, decl_type, args, 396 splitter, declaration_name): 397 """ 398 Creates a list of parameters, storing them at self.entry. 399 """ 400 401 # temporarily replace all commas inside function pointer definition 402 arg_expr = KernRe(r'(\([^\),]+),') 403 while arg_expr.search(args): 404 args = arg_expr.sub(r"\1#", args) 405 406 for arg in args.split(splitter): 407 # Strip comments 408 arg = KernRe(r'\/\*.*\*\/').sub('', arg) 409 410 # Ignore argument attributes 411 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 412 413 # Strip leading/trailing spaces 414 arg = arg.strip() 415 arg = KernRe(r'\s+').sub(' ', arg, count=1) 416 417 if arg.startswith('#'): 418 # Treat preprocessor directive as a typeless variable just to fill 419 # corresponding data structures "correctly". Catch it later in 420 # output_* subs. 421 422 # Treat preprocessor directive as a typeless variable 423 self.push_parameter(ln, decl_type, arg, "", 424 "", declaration_name) 425 426 elif KernRe(r'\(.+\)\s*\(').search(arg): 427 # Pointer-to-function 428 429 arg = arg.replace('#', ',') 430 431 r = KernRe(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') 432 if r.match(arg): 433 param = r.group(1) 434 else: 435 self.emit_msg(ln, f"Invalid param: {arg}") 436 param = arg 437 438 dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) 439 self.save_struct_actual(param) 440 self.push_parameter(ln, decl_type, param, dtype, 441 arg, declaration_name) 442 443 elif KernRe(r'\(.+\)\s*\[').search(arg): 444 # Array-of-pointers 445 446 arg = arg.replace('#', ',') 447 r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') 448 if r.match(arg): 449 param = r.group(1) 450 else: 451 self.emit_msg(ln, f"Invalid param: {arg}") 452 param = arg 453 454 dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) 455 456 self.save_struct_actual(param) 457 self.push_parameter(ln, decl_type, param, dtype, 458 arg, declaration_name) 459 460 elif arg: 461 arg = KernRe(r'\s*:\s*').sub(":", arg) 462 arg = KernRe(r'\s*\[').sub('[', arg) 463 464 args = KernRe(r'\s*,\s*').split(arg) 465 if args[0] and '*' in args[0]: 466 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 467 468 first_arg = [] 469 r = KernRe(r'^(.*\s+)(.*?\[.*\].*)$') 470 if args[0] and r.match(args[0]): 471 args.pop(0) 472 first_arg.extend(r.group(1)) 473 first_arg.append(r.group(2)) 474 else: 475 first_arg = KernRe(r'\s+').split(args.pop(0)) 476 477 args.insert(0, first_arg.pop()) 478 dtype = ' '.join(first_arg) 479 480 for param in args: 481 if KernRe(r'^(\*+)\s*(.*)').match(param): 482 r = KernRe(r'^(\*+)\s*(.*)') 483 if not r.match(param): 484 self.emit_msg(ln, f"Invalid param: {param}") 485 continue 486 487 param = r.group(1) 488 489 self.save_struct_actual(r.group(2)) 490 self.push_parameter(ln, decl_type, r.group(2), 491 f"{dtype} {r.group(1)}", 492 arg, declaration_name) 493 494 elif KernRe(r'(.*?):(\w+)').search(param): 495 r = KernRe(r'(.*?):(\w+)') 496 if not r.match(param): 497 self.emit_msg(ln, f"Invalid param: {param}") 498 continue 499 500 if dtype != "": # Skip unnamed bit-fields 501 self.save_struct_actual(r.group(1)) 502 self.push_parameter(ln, decl_type, r.group(1), 503 f"{dtype}:{r.group(2)}", 504 arg, declaration_name) 505 else: 506 self.save_struct_actual(param) 507 self.push_parameter(ln, decl_type, param, dtype, 508 arg, declaration_name) 509 510 def check_sections(self, ln, decl_name, decl_type, sectcheck, prmscheck): 511 """ 512 Check for errors inside sections, emitting warnings if not found 513 parameters are described. 514 """ 515 516 sects = sectcheck.split() 517 prms = prmscheck.split() 518 err = False 519 520 for sx in range(len(sects)): # pylint: disable=C0200 521 err = True 522 for px in range(len(prms)): # pylint: disable=C0200 523 prm_clean = prms[px] 524 prm_clean = KernRe(r'\[.*\]').sub('', prm_clean) 525 prm_clean = attribute.sub('', prm_clean) 526 527 # ignore array size in a parameter string; 528 # however, the original param string may contain 529 # spaces, e.g.: addr[6 + 2] 530 # and this appears in @prms as "addr[6" since the 531 # parameter list is split at spaces; 532 # hence just ignore "[..." for the sections check; 533 prm_clean = KernRe(r'\[.*').sub('', prm_clean) 534 535 if prm_clean == sects[sx]: 536 err = False 537 break 538 539 if err: 540 if decl_type == 'function': 541 dname = f"{decl_type} parameter" 542 else: 543 dname = f"{decl_type} member" 544 545 self.emit_msg(ln, 546 f"Excess {dname} '{sects[sx]}' description in '{decl_name}'") 547 548 def check_return_section(self, ln, declaration_name, return_type): 549 """ 550 If the function doesn't return void, warns about the lack of a 551 return description. 552 """ 553 554 if not self.config.wreturn: 555 return 556 557 # Ignore an empty return type (It's a macro) 558 # Ignore functions with a "void" return type (but not "void *") 559 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 560 return 561 562 if not self.entry.sections.get("Return", None): 563 self.emit_msg(ln, 564 f"No description found for return value of '{declaration_name}'") 565 566 def dump_struct(self, ln, proto): 567 """ 568 Store an entry for an struct or union 569 """ 570 571 type_pattern = r'(struct|union)' 572 573 qualifiers = [ 574 "__attribute__", 575 "__packed", 576 "__aligned", 577 "____cacheline_aligned_in_smp", 578 "____cacheline_aligned", 579 ] 580 581 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 582 struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') 583 584 # Extract struct/union definition 585 members = None 586 declaration_name = None 587 decl_type = None 588 589 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 590 if r.search(proto): 591 decl_type = r.group(1) 592 declaration_name = r.group(2) 593 members = r.group(3) 594 else: 595 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 596 597 if r.search(proto): 598 decl_type = r.group(1) 599 declaration_name = r.group(3) 600 members = r.group(2) 601 602 if not members: 603 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 604 return 605 606 if self.entry.identifier != declaration_name: 607 self.emit_msg(ln, 608 f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") 609 return 610 611 args_pattern = r'([^,)]+)' 612 613 sub_prefixes = [ 614 (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), 615 (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''), 616 617 # Strip comments 618 (KernRe(r'\/\*.*?\*\/', re.S), ''), 619 620 # Strip attributes 621 (attribute, ' '), 622 (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), 623 (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), 624 (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), 625 (KernRe(r'\s*__packed\s*', re.S), ' '), 626 (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), 627 (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), 628 (KernRe(r'\s*____cacheline_aligned', re.S), ' '), 629 630 # Unwrap struct_group macros based on this definition: 631 # __struct_group(TAG, NAME, ATTRS, MEMBERS...) 632 # which has variants like: struct_group(NAME, MEMBERS...) 633 # Only MEMBERS arguments require documentation. 634 # 635 # Parsing them happens on two steps: 636 # 637 # 1. drop struct group arguments that aren't at MEMBERS, 638 # storing them as STRUCT_GROUP(MEMBERS) 639 # 640 # 2. remove STRUCT_GROUP() ancillary macro. 641 # 642 # The original logic used to remove STRUCT_GROUP() using an 643 # advanced regex: 644 # 645 # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; 646 # 647 # with two patterns that are incompatible with 648 # Python re module, as it has: 649 # 650 # - a recursive pattern: (?1) 651 # - an atomic grouping: (?>...) 652 # 653 # I tried a simpler version: but it didn't work either: 654 # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; 655 # 656 # As it doesn't properly match the end parenthesis on some cases. 657 # 658 # So, a better solution was crafted: there's now a NestedMatch 659 # class that ensures that delimiters after a search are properly 660 # matched. So, the implementation to drop STRUCT_GROUP() will be 661 # handled in separate. 662 663 (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), 664 (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), 665 (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), 666 (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), 667 668 # Replace macros 669 # 670 # TODO: use NestedMatch for FOO($1, $2, ...) matches 671 # 672 # it is better to also move those to the NestedMatch logic, 673 # to ensure that parenthesis will be properly matched. 674 675 (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), 676 (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), 677 (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), 678 (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), 679 (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), 680 (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), 681 (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), 682 (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), 683 (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), 684 ] 685 686 # Regexes here are guaranteed to have the end limiter matching 687 # the start delimiter. Yet, right now, only one replace group 688 # is allowed. 689 690 sub_nested_prefixes = [ 691 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), 692 ] 693 694 for search, sub in sub_prefixes: 695 members = search.sub(sub, members) 696 697 nested = NestedMatch() 698 699 for search, sub in sub_nested_prefixes: 700 members = nested.sub(search, sub, members) 701 702 # Keeps the original declaration as-is 703 declaration = members 704 705 # Split nested struct/union elements 706 # 707 # This loop was simpler at the original kernel-doc perl version, as 708 # while ($members =~ m/$struct_members/) { ... } 709 # reads 'members' string on each interaction. 710 # 711 # Python behavior is different: it parses 'members' only once, 712 # creating a list of tuples from the first interaction. 713 # 714 # On other words, this won't get nested structs. 715 # 716 # So, we need to have an extra loop on Python to override such 717 # re limitation. 718 719 while True: 720 tuples = struct_members.findall(members) 721 if not tuples: 722 break 723 724 for t in tuples: 725 newmember = "" 726 maintype = t[0] 727 s_ids = t[5] 728 content = t[3] 729 730 oldmember = "".join(t) 731 732 for s_id in s_ids.split(','): 733 s_id = s_id.strip() 734 735 newmember += f"{maintype} {s_id}; " 736 s_id = KernRe(r'[:\[].*').sub('', s_id) 737 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 738 739 for arg in content.split(';'): 740 arg = arg.strip() 741 742 if not arg: 743 continue 744 745 r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') 746 if r.match(arg): 747 # Pointer-to-function 748 dtype = r.group(1) 749 name = r.group(2) 750 extra = r.group(3) 751 752 if not name: 753 continue 754 755 if not s_id: 756 # Anonymous struct/union 757 newmember += f"{dtype}{name}{extra}; " 758 else: 759 newmember += f"{dtype}{s_id}.{name}{extra}; " 760 761 else: 762 arg = arg.strip() 763 # Handle bitmaps 764 arg = KernRe(r':\s*\d+\s*').sub('', arg) 765 766 # Handle arrays 767 arg = KernRe(r'\[.*\]').sub('', arg) 768 769 # Handle multiple IDs 770 arg = KernRe(r'\s*,\s*').sub(',', arg) 771 772 r = KernRe(r'(.*)\s+([\S+,]+)') 773 774 if r.search(arg): 775 dtype = r.group(1) 776 names = r.group(2) 777 else: 778 newmember += f"{arg}; " 779 continue 780 781 for name in names.split(','): 782 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() 783 784 if not name: 785 continue 786 787 if not s_id: 788 # Anonymous struct/union 789 newmember += f"{dtype} {name}; " 790 else: 791 newmember += f"{dtype} {s_id}.{name}; " 792 793 members = members.replace(oldmember, newmember) 794 795 # Ignore other nested elements, like enums 796 members = re.sub(r'(\{[^\{\}]*\})', '', members) 797 798 self.create_parameter_list(ln, decl_type, members, ';', 799 declaration_name) 800 self.check_sections(ln, declaration_name, decl_type, 801 self.entry.sectcheck, self.entry.struct_actual) 802 803 # Adjust declaration for better display 804 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 805 declaration = KernRe(r'\}\s+;').sub('};', declaration) 806 807 # Better handle inlined enums 808 while True: 809 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 810 if not r.search(declaration): 811 break 812 813 declaration = r.sub(r'\1,\n\2', declaration) 814 815 def_args = declaration.split('\n') 816 level = 1 817 declaration = "" 818 for clause in def_args: 819 820 clause = clause.strip() 821 clause = KernRe(r'\s+').sub(' ', clause, count=1) 822 823 if not clause: 824 continue 825 826 if '}' in clause and level > 1: 827 level -= 1 828 829 if not KernRe(r'^\s*#').match(clause): 830 declaration += "\t" * level 831 832 declaration += "\t" + clause + "\n" 833 if "{" in clause and "}" not in clause: 834 level += 1 835 836 self.output_declaration(decl_type, declaration_name, 837 struct=declaration_name, 838 definition=declaration, 839 parameterlist=self.entry.parameterlist, 840 parameterdescs=self.entry.parameterdescs, 841 parametertypes=self.entry.parametertypes, 842 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 843 sectionlist=self.entry.sectionlist, 844 sections=self.entry.sections, 845 section_start_lines=self.entry.section_start_lines, 846 purpose=self.entry.declaration_purpose) 847 848 def dump_enum(self, ln, proto): 849 """ 850 Stores an enum inside self.entries array. 851 """ 852 853 # Ignore members marked private 854 proto = KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) 855 proto = KernRe(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) 856 857 # Strip comments 858 proto = KernRe(r'\/\*.*?\*\/', flags=re.S).sub('', proto) 859 860 # Strip #define macros inside enums 861 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 862 863 # 864 # Parse out the name and members of the enum. Typedef form first. 865 # 866 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 867 if r.search(proto): 868 declaration_name = r.group(2) 869 members = r.group(1).rstrip() 870 # 871 # Failing that, look for a straight enum 872 # 873 else: 874 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 875 if r.match(proto): 876 declaration_name = r.group(1) 877 members = r.group(2).rstrip() 878 # 879 # OK, this isn't going to work. 880 # 881 else: 882 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 883 return 884 # 885 # Make sure we found what we were expecting. 886 # 887 if self.entry.identifier != declaration_name: 888 if self.entry.identifier == "": 889 self.emit_msg(ln, 890 f"{proto}: wrong kernel-doc identifier on prototype") 891 else: 892 self.emit_msg(ln, 893 f"expecting prototype for enum {self.entry.identifier}. " 894 f"Prototype was for enum {declaration_name} instead") 895 return 896 897 if not declaration_name: 898 declaration_name = "(anonymous)" 899 # 900 # Parse out the name of each enum member, and verify that we 901 # have a description for it. 902 # 903 member_set = set() 904 members = KernRe(r'\([^;)]*\)').sub('', members) 905 for arg in members.split(','): 906 if not arg: 907 continue 908 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 909 self.entry.parameterlist.append(arg) 910 if arg not in self.entry.parameterdescs: 911 self.entry.parameterdescs[arg] = self.undescribed 912 self.emit_msg(ln, 913 f"Enum value '{arg}' not described in enum '{declaration_name}'") 914 member_set.add(arg) 915 # 916 # Ensure that every described member actually exists in the enum. 917 # 918 for k in self.entry.parameterdescs: 919 if k not in member_set: 920 self.emit_msg(ln, 921 f"Excess enum value '%{k}' description in '{declaration_name}'") 922 923 self.output_declaration('enum', declaration_name, 924 enum=declaration_name, 925 parameterlist=self.entry.parameterlist, 926 parameterdescs=self.entry.parameterdescs, 927 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 928 sectionlist=self.entry.sectionlist, 929 sections=self.entry.sections, 930 section_start_lines=self.entry.section_start_lines, 931 purpose=self.entry.declaration_purpose) 932 933 def dump_declaration(self, ln, prototype): 934 """ 935 Stores a data declaration inside self.entries array. 936 """ 937 938 if self.entry.decl_type == "enum": 939 self.dump_enum(ln, prototype) 940 return 941 942 if self.entry.decl_type == "typedef": 943 self.dump_typedef(ln, prototype) 944 return 945 946 if self.entry.decl_type in ["union", "struct"]: 947 self.dump_struct(ln, prototype) 948 return 949 950 self.output_declaration(self.entry.decl_type, prototype, 951 entry=self.entry) 952 953 def dump_function(self, ln, prototype): 954 """ 955 Stores a function of function macro inside self.entries array. 956 """ 957 958 func_macro = False 959 return_type = '' 960 decl_type = 'function' 961 962 # Prefixes that would be removed 963 sub_prefixes = [ 964 (r"^static +", "", 0), 965 (r"^extern +", "", 0), 966 (r"^asmlinkage +", "", 0), 967 (r"^inline +", "", 0), 968 (r"^__inline__ +", "", 0), 969 (r"^__inline +", "", 0), 970 (r"^__always_inline +", "", 0), 971 (r"^noinline +", "", 0), 972 (r"^__FORTIFY_INLINE +", "", 0), 973 (r"__init +", "", 0), 974 (r"__init_or_module +", "", 0), 975 (r"__deprecated +", "", 0), 976 (r"__flatten +", "", 0), 977 (r"__meminit +", "", 0), 978 (r"__must_check +", "", 0), 979 (r"__weak +", "", 0), 980 (r"__sched +", "", 0), 981 (r"_noprof", "", 0), 982 (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), 983 (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), 984 (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), 985 (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), 986 (r"__attribute_const__ +", "", 0), 987 988 # It seems that Python support for re.X is broken: 989 # At least for me (Python 3.13), this didn't work 990# (r""" 991# __attribute__\s*\(\( 992# (?: 993# [\w\s]+ # attribute name 994# (?:\([^)]*\))? # attribute arguments 995# \s*,? # optional comma at the end 996# )+ 997# \)\)\s+ 998# """, "", re.X), 999 1000 # So, remove whitespaces and comments from it 1001 (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), 1002 ] 1003 1004 for search, sub, flags in sub_prefixes: 1005 prototype = KernRe(search, flags).sub(sub, prototype) 1006 1007 # Macros are a special case, as they change the prototype format 1008 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 1009 if new_proto != prototype: 1010 is_define_proto = True 1011 prototype = new_proto 1012 else: 1013 is_define_proto = False 1014 1015 # Yes, this truly is vile. We are looking for: 1016 # 1. Return type (may be nothing if we're looking at a macro) 1017 # 2. Function name 1018 # 3. Function parameters. 1019 # 1020 # All the while we have to watch out for function pointer parameters 1021 # (which IIRC is what the two sections are for), C types (these 1022 # regexps don't even start to express all the possibilities), and 1023 # so on. 1024 # 1025 # If you mess with these regexps, it's a good idea to check that 1026 # the following functions' documentation still comes out right: 1027 # - parport_register_device (function pointer parameters) 1028 # - atomic_set (macro) 1029 # - pci_match_device, __copy_to_user (long return type) 1030 1031 name = r'[a-zA-Z0-9_~:]+' 1032 prototype_end1 = r'[^\(]*' 1033 prototype_end2 = r'[^\{]*' 1034 prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)' 1035 1036 # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing group. 1037 # So, this needs to be mapped in Python with (?:...)? or (?:...)+ 1038 1039 type1 = r'(?:[\w\s]+)?' 1040 type2 = r'(?:[\w\s]+\*+)+' 1041 1042 found = False 1043 1044 if is_define_proto: 1045 r = KernRe(r'^()(' + name + r')\s+') 1046 1047 if r.search(prototype): 1048 return_type = '' 1049 declaration_name = r.group(2) 1050 func_macro = True 1051 1052 found = True 1053 1054 if not found: 1055 patterns = [ 1056 rf'^()({name})\s*{prototype_end}', 1057 rf'^({type1})\s+({name})\s*{prototype_end}', 1058 rf'^({type2})\s*({name})\s*{prototype_end}', 1059 ] 1060 1061 for p in patterns: 1062 r = KernRe(p) 1063 1064 if r.match(prototype): 1065 1066 return_type = r.group(1) 1067 declaration_name = r.group(2) 1068 args = r.group(3) 1069 1070 self.create_parameter_list(ln, decl_type, args, ',', 1071 declaration_name) 1072 1073 found = True 1074 break 1075 if not found: 1076 self.emit_msg(ln, 1077 f"cannot understand function prototype: '{prototype}'") 1078 return 1079 1080 if self.entry.identifier != declaration_name: 1081 self.emit_msg(ln, 1082 f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") 1083 return 1084 1085 prms = " ".join(self.entry.parameterlist) 1086 self.check_sections(ln, declaration_name, "function", 1087 self.entry.sectcheck, prms) 1088 1089 self.check_return_section(ln, declaration_name, return_type) 1090 1091 if 'typedef' in return_type: 1092 self.output_declaration(decl_type, declaration_name, 1093 function=declaration_name, 1094 typedef=True, 1095 functiontype=return_type, 1096 parameterlist=self.entry.parameterlist, 1097 parameterdescs=self.entry.parameterdescs, 1098 parametertypes=self.entry.parametertypes, 1099 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 1100 sectionlist=self.entry.sectionlist, 1101 sections=self.entry.sections, 1102 section_start_lines=self.entry.section_start_lines, 1103 purpose=self.entry.declaration_purpose, 1104 func_macro=func_macro) 1105 else: 1106 self.output_declaration(decl_type, declaration_name, 1107 function=declaration_name, 1108 typedef=False, 1109 functiontype=return_type, 1110 parameterlist=self.entry.parameterlist, 1111 parameterdescs=self.entry.parameterdescs, 1112 parametertypes=self.entry.parametertypes, 1113 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 1114 sectionlist=self.entry.sectionlist, 1115 sections=self.entry.sections, 1116 section_start_lines=self.entry.section_start_lines, 1117 purpose=self.entry.declaration_purpose, 1118 func_macro=func_macro) 1119 1120 def dump_typedef(self, ln, proto): 1121 """ 1122 Stores a typedef inside self.entries array. 1123 """ 1124 1125 typedef_type = r'((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1126 typedef_ident = r'\*?\s*(\w\S+)\s*' 1127 typedef_args = r'\s*\((.*)\);' 1128 1129 typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1130 typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args) 1131 1132 # Strip comments 1133 proto = KernRe(r'/\*.*?\*/', flags=re.S).sub('', proto) 1134 1135 # Parse function typedef prototypes 1136 for r in [typedef1, typedef2]: 1137 if not r.match(proto): 1138 continue 1139 1140 return_type = r.group(1).strip() 1141 declaration_name = r.group(2) 1142 args = r.group(3) 1143 1144 if self.entry.identifier != declaration_name: 1145 self.emit_msg(ln, 1146 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1147 return 1148 1149 decl_type = 'function' 1150 self.create_parameter_list(ln, decl_type, args, ',', declaration_name) 1151 1152 self.output_declaration(decl_type, declaration_name, 1153 function=declaration_name, 1154 typedef=True, 1155 functiontype=return_type, 1156 parameterlist=self.entry.parameterlist, 1157 parameterdescs=self.entry.parameterdescs, 1158 parametertypes=self.entry.parametertypes, 1159 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 1160 sectionlist=self.entry.sectionlist, 1161 sections=self.entry.sections, 1162 section_start_lines=self.entry.section_start_lines, 1163 purpose=self.entry.declaration_purpose) 1164 return 1165 1166 # Handle nested parentheses or brackets 1167 r = KernRe(r'(\(*.\)\s*|\[*.\]\s*);$') 1168 while r.search(proto): 1169 proto = r.sub('', proto) 1170 1171 # Parse simple typedefs 1172 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1173 if r.match(proto): 1174 declaration_name = r.group(1) 1175 1176 if self.entry.identifier != declaration_name: 1177 self.emit_msg(ln, 1178 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1179 return 1180 1181 self.output_declaration('typedef', declaration_name, 1182 typedef=declaration_name, 1183 sectionlist=self.entry.sectionlist, 1184 sections=self.entry.sections, 1185 section_start_lines=self.entry.section_start_lines, 1186 purpose=self.entry.declaration_purpose) 1187 return 1188 1189 self.emit_msg(ln, "error: Cannot parse typedef!") 1190 1191 @staticmethod 1192 def process_export(function_set, line): 1193 """ 1194 process EXPORT_SYMBOL* tags 1195 1196 This method doesn't use any variable from the class, so declare it 1197 with a staticmethod decorator. 1198 """ 1199 1200 # We support documenting some exported symbols with different 1201 # names. A horrible hack. 1202 suffixes = [ '_noprof' ] 1203 1204 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1205 # multiple export lines would violate Kernel coding style. 1206 1207 if export_symbol.search(line): 1208 symbol = export_symbol.group(2) 1209 elif export_symbol_ns.search(line): 1210 symbol = export_symbol_ns.group(2) 1211 else: 1212 return False 1213 # 1214 # Found an export, trim out any special suffixes 1215 # 1216 for suffix in suffixes: 1217 symbol = symbol.removesuffix(suffix) 1218 function_set.add(symbol) 1219 return True 1220 1221 def process_normal(self, ln, line): 1222 """ 1223 STATE_NORMAL: looking for the /** to begin everything. 1224 """ 1225 1226 if not doc_start.match(line): 1227 return 1228 1229 # start a new entry 1230 self.reset_state(ln) 1231 1232 # next line is always the function name 1233 self.state = state.NAME 1234 1235 def process_name(self, ln, line): 1236 """ 1237 STATE_NAME: Looking for the "name - description" line 1238 """ 1239 # 1240 # Check for a DOC: block and handle them specially. 1241 # 1242 if doc_block.search(line): 1243 1244 if not doc_block.group(1): 1245 self.entry.begin_section(ln, "Introduction") 1246 else: 1247 self.entry.begin_section(ln, doc_block.group(1)) 1248 1249 self.entry.identifier = self.entry.section 1250 self.state = state.DOCBLOCK 1251 # 1252 # Otherwise we're looking for a normal kerneldoc declaration line. 1253 # 1254 elif doc_decl.search(line): 1255 self.entry.identifier = doc_decl.group(1) 1256 1257 # Test for data declaration 1258 if doc_begin_data.search(line): 1259 self.entry.decl_type = doc_begin_data.group(1) 1260 self.entry.identifier = doc_begin_data.group(2) 1261 # 1262 # Look for a function description 1263 # 1264 elif doc_begin_func.search(line): 1265 self.entry.identifier = doc_begin_func.group(1) 1266 self.entry.decl_type = "function" 1267 # 1268 # We struck out. 1269 # 1270 else: 1271 self.emit_msg(ln, 1272 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") 1273 self.state = state.NORMAL 1274 return 1275 # 1276 # OK, set up for a new kerneldoc entry. 1277 # 1278 self.state = state.BODY 1279 self.entry.identifier = self.entry.identifier.strip(" ") 1280 # if there's no @param blocks need to set up default section here 1281 self.entry.begin_section(ln + 1) 1282 # 1283 # Find the description portion, which *should* be there but 1284 # isn't always. 1285 # (We should be able to capture this from the previous parsing - someday) 1286 # 1287 r = KernRe("[-:](.*)") 1288 if r.search(line): 1289 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1290 self.state = state.DECLARATION 1291 else: 1292 self.entry.declaration_purpose = "" 1293 1294 if not self.entry.declaration_purpose and self.config.wshort_desc: 1295 self.emit_msg(ln, 1296 f"missing initial short description on line:\n{line}") 1297 1298 if not self.entry.identifier and self.entry.decl_type != "enum": 1299 self.emit_msg(ln, 1300 f"wrong kernel-doc identifier on line:\n{line}") 1301 self.state = state.NORMAL 1302 1303 if self.config.verbose: 1304 self.emit_msg(ln, 1305 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1306 warning=False) 1307 # 1308 # Failed to find an identifier. Emit a warning 1309 # 1310 else: 1311 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1312 1313 # 1314 # Helper function to determine if a new section is being started. 1315 # 1316 def is_new_section(self, ln, line): 1317 if doc_sect.search(line): 1318 self.state = state.BODY 1319 # 1320 # Pick out the name of our new section, tweaking it if need be. 1321 # 1322 newsection = doc_sect.group(1) 1323 if newsection.lower() == 'description': 1324 newsection = 'Description' 1325 elif newsection.lower() == 'context': 1326 newsection = 'Context' 1327 self.state = state.SPECIAL_SECTION 1328 elif newsection.lower() in ["@return", "@returns", 1329 "return", "returns"]: 1330 newsection = "Return" 1331 self.state = state.SPECIAL_SECTION 1332 elif newsection[0] == '@': 1333 self.state = state.SPECIAL_SECTION 1334 # 1335 # Initialize the contents, and get the new section going. 1336 # 1337 newcontents = doc_sect.group(2) 1338 if not newcontents: 1339 newcontents = "" 1340 self.dump_section() 1341 self.entry.begin_section(ln, newsection) 1342 self.entry.leading_space = None 1343 1344 self.entry.add_text(newcontents.lstrip()) 1345 return True 1346 return False 1347 1348 # 1349 # Helper function to detect (and effect) the end of a kerneldoc comment. 1350 # 1351 def is_comment_end(self, ln, line): 1352 if doc_end.search(line): 1353 self.dump_section() 1354 1355 # Look for doc_com + <text> + doc_end: 1356 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') 1357 if r.match(line): 1358 self.emit_msg(ln, f"suspicious ending line: {line}") 1359 1360 self.entry.prototype = "" 1361 self.entry.new_start_line = ln + 1 1362 1363 self.state = state.PROTO 1364 return True 1365 return False 1366 1367 1368 def process_decl(self, ln, line): 1369 """ 1370 STATE_DECLARATION: We've seen the beginning of a declaration 1371 """ 1372 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1373 return 1374 # 1375 # Look for anything with the " * " line beginning. 1376 # 1377 if doc_content.search(line): 1378 cont = doc_content.group(1) 1379 # 1380 # A blank line means that we have moved out of the declaration 1381 # part of the comment (without any "special section" parameter 1382 # descriptions). 1383 # 1384 if cont == "": 1385 self.state = state.BODY 1386 # 1387 # Otherwise we have more of the declaration section to soak up. 1388 # 1389 else: 1390 self.entry.declaration_purpose = \ 1391 trim_whitespace(self.entry.declaration_purpose + ' ' + cont) 1392 else: 1393 # Unknown line, ignore 1394 self.emit_msg(ln, f"bad line: {line}") 1395 1396 1397 def process_special(self, ln, line): 1398 """ 1399 STATE_SPECIAL_SECTION: a section ending with a blank line 1400 """ 1401 # 1402 # If we have hit a blank line (only the " * " marker), then this 1403 # section is done. 1404 # 1405 if KernRe(r"\s*\*\s*$").match(line): 1406 self.entry.begin_section(ln, dump = True) 1407 self.state = state.BODY 1408 return 1409 # 1410 # Not a blank line, look for the other ways to end the section. 1411 # 1412 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1413 return 1414 # 1415 # OK, we should have a continuation of the text for this section. 1416 # 1417 if doc_content.search(line): 1418 cont = doc_content.group(1) 1419 # 1420 # If the lines of text after the first in a special section have 1421 # leading white space, we need to trim it out or Sphinx will get 1422 # confused. For the second line (the None case), see what we 1423 # find there and remember it. 1424 # 1425 if self.entry.leading_space is None: 1426 r = KernRe(r'^(\s+)') 1427 if r.match(cont): 1428 self.entry.leading_space = len(r.group(1)) 1429 else: 1430 self.entry.leading_space = 0 1431 # 1432 # Otherwise, before trimming any leading chars, be *sure* 1433 # that they are white space. We should maybe warn if this 1434 # isn't the case. 1435 # 1436 for i in range(0, self.entry.leading_space): 1437 if cont[i] != " ": 1438 self.entry.leading_space = i 1439 break 1440 # 1441 # Add the trimmed result to the section and we're done. 1442 # 1443 self.entry.add_text(cont[self.entry.leading_space:]) 1444 else: 1445 # Unknown line, ignore 1446 self.emit_msg(ln, f"bad line: {line}") 1447 1448 def process_body(self, ln, line): 1449 """ 1450 STATE_BODY: the bulk of a kerneldoc comment. 1451 """ 1452 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1453 return 1454 1455 if doc_content.search(line): 1456 cont = doc_content.group(1) 1457 self.entry.add_text(cont) 1458 else: 1459 # Unknown line, ignore 1460 self.emit_msg(ln, f"bad line: {line}") 1461 1462 def process_inline_name(self, ln, line): 1463 """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" 1464 1465 if doc_inline_sect.search(line): 1466 self.entry.begin_section(ln, doc_inline_sect.group(1)) 1467 self.entry.add_text(doc_inline_sect.group(2).lstrip()) 1468 self.state = state.INLINE_TEXT 1469 elif doc_inline_end.search(line): 1470 self.dump_section() 1471 self.state = state.PROTO 1472 elif doc_content.search(line): 1473 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") 1474 self.state = state.PROTO 1475 # else ... ?? 1476 1477 def process_inline_text(self, ln, line): 1478 """STATE_INLINE_TEXT: docbook comments within a prototype.""" 1479 1480 if doc_inline_end.search(line): 1481 self.dump_section() 1482 self.state = state.PROTO 1483 elif doc_content.search(line): 1484 self.entry.add_text(doc_content.group(1)) 1485 # else ... ?? 1486 1487 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1488 """ 1489 Handle syscall definitions 1490 """ 1491 1492 is_void = False 1493 1494 # Strip newlines/CR's 1495 proto = re.sub(r'[\r\n]+', ' ', proto) 1496 1497 # Check if it's a SYSCALL_DEFINE0 1498 if 'SYSCALL_DEFINE0' in proto: 1499 is_void = True 1500 1501 # Replace SYSCALL_DEFINE with correct return type & function name 1502 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1503 1504 r = KernRe(r'long\s+(sys_.*?),') 1505 if r.search(proto): 1506 proto = KernRe(',').sub('(', proto, count=1) 1507 elif is_void: 1508 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1509 1510 # Now delete all of the odd-numbered commas in the proto 1511 # so that argument types & names don't have a comma between them 1512 count = 0 1513 length = len(proto) 1514 1515 if is_void: 1516 length = 0 # skip the loop if is_void 1517 1518 for ix in range(length): 1519 if proto[ix] == ',': 1520 count += 1 1521 if count % 2 == 1: 1522 proto = proto[:ix] + ' ' + proto[ix + 1:] 1523 1524 return proto 1525 1526 def tracepoint_munge(self, ln, proto): 1527 """ 1528 Handle tracepoint definitions 1529 """ 1530 1531 tracepointname = None 1532 tracepointargs = None 1533 1534 # Match tracepoint name based on different patterns 1535 r = KernRe(r'TRACE_EVENT\((.*?),') 1536 if r.search(proto): 1537 tracepointname = r.group(1) 1538 1539 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1540 if r.search(proto): 1541 tracepointname = r.group(1) 1542 1543 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1544 if r.search(proto): 1545 tracepointname = r.group(2) 1546 1547 if tracepointname: 1548 tracepointname = tracepointname.lstrip() 1549 1550 r = KernRe(r'TP_PROTO\((.*?)\)') 1551 if r.search(proto): 1552 tracepointargs = r.group(1) 1553 1554 if not tracepointname or not tracepointargs: 1555 self.emit_msg(ln, 1556 f"Unrecognized tracepoint format:\n{proto}\n") 1557 else: 1558 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1559 self.entry.identifier = f"trace_{self.entry.identifier}" 1560 1561 return proto 1562 1563 def process_proto_function(self, ln, line): 1564 """Ancillary routine to process a function prototype""" 1565 1566 # strip C99-style comments to end of line 1567 line = KernRe(r"\/\/.*$", re.S).sub('', line) 1568 # 1569 # Soak up the line's worth of prototype text, stopping at { or ; if present. 1570 # 1571 if KernRe(r'\s*#\s*define').match(line): 1572 self.entry.prototype = line 1573 elif not line.startswith('#'): # skip other preprocessor stuff 1574 r = KernRe(r'([^\{]*)') 1575 if r.match(line): 1576 self.entry.prototype += r.group(1) + " " 1577 # 1578 # If we now have the whole prototype, clean it up and declare victory. 1579 # 1580 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1581 # strip comments and surrounding spaces 1582 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() 1583 # 1584 # Handle self.entry.prototypes for function pointers like: 1585 # int (*pcs_config)(struct foo) 1586 # by turning it into 1587 # int pcs_config(struct foo) 1588 # 1589 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1590 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1591 # 1592 # Handle special declaration syntaxes 1593 # 1594 if 'SYSCALL_DEFINE' in self.entry.prototype: 1595 self.entry.prototype = self.syscall_munge(ln, 1596 self.entry.prototype) 1597 else: 1598 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1599 if r.search(self.entry.prototype): 1600 self.entry.prototype = self.tracepoint_munge(ln, 1601 self.entry.prototype) 1602 # 1603 # ... and we're done 1604 # 1605 self.dump_function(ln, self.entry.prototype) 1606 self.reset_state(ln) 1607 1608 def process_proto_type(self, ln, line): 1609 """Ancillary routine to process a type""" 1610 1611 # Strip C99-style comments and surrounding whitespace 1612 line = KernRe(r"//.*$", re.S).sub('', line).strip() 1613 if not line: 1614 return # nothing to see here 1615 1616 # To distinguish preprocessor directive from regular declaration later. 1617 if line.startswith('#'): 1618 line += ";" 1619 # 1620 # Split the declaration on any of { } or ;, and accumulate pieces 1621 # until we hit a semicolon while not inside {brackets} 1622 # 1623 r = KernRe(r'(.*?)([{};])') 1624 for chunk in r.split(line): 1625 if chunk: # Ignore empty matches 1626 self.entry.prototype += chunk 1627 # 1628 # This cries out for a match statement ... someday after we can 1629 # drop Python 3.9 ... 1630 # 1631 if chunk == '{': 1632 self.entry.brcount += 1 1633 elif chunk == '}': 1634 self.entry.brcount -= 1 1635 elif chunk == ';' and self.entry.brcount <= 0: 1636 self.dump_declaration(ln, self.entry.prototype) 1637 self.reset_state(ln) 1638 return 1639 # 1640 # We hit the end of the line while still in the declaration; put 1641 # in a space to represent the newline. 1642 # 1643 self.entry.prototype += ' ' 1644 1645 def process_proto(self, ln, line): 1646 """STATE_PROTO: reading a function/whatever prototype.""" 1647 1648 if doc_inline_oneline.search(line): 1649 self.entry.begin_section(ln, doc_inline_oneline.group(1)) 1650 self.entry.add_text(doc_inline_oneline.group(2)) 1651 self.dump_section() 1652 1653 elif doc_inline_start.search(line): 1654 self.state = state.INLINE_NAME 1655 1656 elif self.entry.decl_type == 'function': 1657 self.process_proto_function(ln, line) 1658 1659 else: 1660 self.process_proto_type(ln, line) 1661 1662 def process_docblock(self, ln, line): 1663 """STATE_DOCBLOCK: within a DOC: block.""" 1664 1665 if doc_end.search(line): 1666 self.dump_section() 1667 self.output_declaration("doc", self.entry.identifier, 1668 sectionlist=self.entry.sectionlist, 1669 sections=self.entry.sections, 1670 section_start_lines=self.entry.section_start_lines) 1671 self.reset_state(ln) 1672 1673 elif doc_content.search(line): 1674 self.entry.add_text(doc_content.group(1)) 1675 1676 def parse_export(self): 1677 """ 1678 Parses EXPORT_SYMBOL* macros from a single Kernel source file. 1679 """ 1680 1681 export_table = set() 1682 1683 try: 1684 with open(self.fname, "r", encoding="utf8", 1685 errors="backslashreplace") as fp: 1686 1687 for line in fp: 1688 self.process_export(export_table, line) 1689 1690 except IOError: 1691 return None 1692 1693 return export_table 1694 1695 # 1696 # The state/action table telling us which function to invoke in 1697 # each state. 1698 # 1699 state_actions = { 1700 state.NORMAL: process_normal, 1701 state.NAME: process_name, 1702 state.BODY: process_body, 1703 state.DECLARATION: process_decl, 1704 state.SPECIAL_SECTION: process_special, 1705 state.INLINE_NAME: process_inline_name, 1706 state.INLINE_TEXT: process_inline_text, 1707 state.PROTO: process_proto, 1708 state.DOCBLOCK: process_docblock, 1709 } 1710 1711 def parse_kdoc(self): 1712 """ 1713 Open and process each line of a C source file. 1714 The parsing is controlled via a state machine, and the line is passed 1715 to a different process function depending on the state. The process 1716 function may update the state as needed. 1717 1718 Besides parsing kernel-doc tags, it also parses export symbols. 1719 """ 1720 1721 prev = "" 1722 prev_ln = None 1723 export_table = set() 1724 1725 try: 1726 with open(self.fname, "r", encoding="utf8", 1727 errors="backslashreplace") as fp: 1728 for ln, line in enumerate(fp): 1729 1730 line = line.expandtabs().strip("\n") 1731 1732 # Group continuation lines on prototypes 1733 if self.state == state.PROTO: 1734 if line.endswith("\\"): 1735 prev += line.rstrip("\\") 1736 if not prev_ln: 1737 prev_ln = ln 1738 continue 1739 1740 if prev: 1741 ln = prev_ln 1742 line = prev + line 1743 prev = "" 1744 prev_ln = None 1745 1746 self.config.log.debug("%d %s: %s", 1747 ln, state.name[self.state], 1748 line) 1749 1750 # This is an optimization over the original script. 1751 # There, when export_file was used for the same file, 1752 # it was read twice. Here, we use the already-existing 1753 # loop to parse exported symbols as well. 1754 # 1755 if (self.state != state.NORMAL) or \ 1756 not self.process_export(export_table, line): 1757 # Hand this line to the appropriate state handler 1758 self.state_actions[self.state](self, ln, line) 1759 1760 except OSError: 1761 self.config.log.error(f"Error: Cannot open file {self.fname}") 1762 1763 return export_table, self.entries 1764