1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8kdoc_parser 9=========== 10 11Read a C language source or header FILE and extract embedded 12documentation comments 13""" 14 15import re 16from pprint import pformat 17 18from kdoc_re import NestedMatch, KernRe 19from kdoc_item import KdocItem 20 21# 22# Regular expressions used to parse kernel-doc markups at KernelDoc class. 23# 24# Let's declare them in lowercase outside any class to make easier to 25# convert from the python script. 26# 27# As those are evaluated at the beginning, no need to cache them 28# 29 30# Allow whitespace at end of comment start. 31doc_start = KernRe(r'^/\*\*\s*$', cache=False) 32 33doc_end = KernRe(r'\*/', cache=False) 34doc_com = KernRe(r'\s*\*\s*', cache=False) 35doc_com_body = KernRe(r'\s*\* ?', cache=False) 36doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 37 38# @params and a strictly limited set of supported section names 39# Specifically: 40# Match @word: 41# @...: 42# @{section-name}: 43# while trying to not match literal block starts like "example::" 44# 45doc_sect = doc_com + \ 46 KernRe(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', 47 flags=re.I, cache=False) 48 49doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 50doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 51doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 52doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 53doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) 54attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", 55 flags=re.I | re.S, cache=False) 56 57export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 58export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 59 60type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 61 62# 63# Tests for the beginning of a kerneldoc block in its various forms. 64# 65doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 66doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) 67doc_begin_func = KernRe(str(doc_com) + # initial " * ' 68 r"(?:\w+\s*\*\s*)?" + # type (not captured) 69 r'(?:define\s+)?' + # possible "define" (not captured) 70 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 71 r'(?:[-:].*)?$', # description (not captured) 72 cache = False) 73 74# 75# A little helper to get rid of excess white space 76# 77multi_space = KernRe(r'\s\s+') 78def trim_whitespace(s): 79 return multi_space.sub(' ', s.strip()) 80 81class state: 82 """ 83 State machine enums 84 """ 85 86 # Parser states 87 NORMAL = 0 # normal code 88 NAME = 1 # looking for function name 89 DECLARATION = 2 # We have seen a declaration which might not be done 90 BODY = 3 # the body of the comment 91 SPECIAL_SECTION = 4 # doc section ending with a blank line 92 PROTO = 5 # scanning prototype 93 DOCBLOCK = 6 # documentation block 94 INLINE_NAME = 7 # gathering doc outside main block 95 INLINE_TEXT = 8 # reading the body of inline docs 96 97 name = [ 98 "NORMAL", 99 "NAME", 100 "DECLARATION", 101 "BODY", 102 "SPECIAL_SECTION", 103 "PROTO", 104 "DOCBLOCK", 105 "INLINE_NAME", 106 "INLINE_TEXT", 107 ] 108 109 110SECTION_DEFAULT = "Description" # default section 111 112class KernelEntry: 113 114 def __init__(self, config, ln): 115 self.config = config 116 117 self._contents = [] 118 self.sectcheck = "" 119 self.prototype = "" 120 121 self.warnings = [] 122 123 self.parameterlist = [] 124 self.parameterdescs = {} 125 self.parametertypes = {} 126 self.parameterdesc_start_lines = {} 127 128 self.section_start_lines = {} 129 self.sections = {} 130 131 self.anon_struct_union = False 132 133 self.leading_space = None 134 135 # State flags 136 self.brcount = 0 137 self.declaration_start_line = ln + 1 138 139 # 140 # Management of section contents 141 # 142 def add_text(self, text): 143 self._contents.append(text) 144 145 def contents(self): 146 return '\n'.join(self._contents) + '\n' 147 148 # TODO: rename to emit_message after removal of kernel-doc.pl 149 def emit_msg(self, log_msg, warning=True): 150 """Emit a message""" 151 152 if not warning: 153 self.config.log.info(log_msg) 154 return 155 156 # Delegate warning output to output logic, as this way it 157 # will report warnings/info only for symbols that are output 158 159 self.warnings.append(log_msg) 160 return 161 162 # 163 # Begin a new section. 164 # 165 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): 166 if dump: 167 self.dump_section(start_new = True) 168 self.section = title 169 self.new_start_line = line_no 170 171 def dump_section(self, start_new=True): 172 """ 173 Dumps section contents to arrays/hashes intended for that purpose. 174 """ 175 # 176 # If we have accumulated no contents in the default ("description") 177 # section, don't bother. 178 # 179 if self.section == SECTION_DEFAULT and not self._contents: 180 return 181 name = self.section 182 contents = self.contents() 183 184 if type_param.match(name): 185 name = type_param.group(1) 186 187 self.parameterdescs[name] = contents 188 self.parameterdesc_start_lines[name] = self.new_start_line 189 190 self.sectcheck += name + " " 191 self.new_start_line = 0 192 193 else: 194 if name in self.sections and self.sections[name] != "": 195 # Only warn on user-specified duplicate section names 196 if name != SECTION_DEFAULT: 197 self.emit_msg(self.new_start_line, 198 f"duplicate section name '{name}'\n") 199 # Treat as a new paragraph - add a blank line 200 self.sections[name] += '\n' + contents 201 else: 202 self.sections[name] = contents 203 self.section_start_lines[name] = self.new_start_line 204 self.new_start_line = 0 205 206# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 207 208 if start_new: 209 self.section = SECTION_DEFAULT 210 self._contents = [] 211 212 213class KernelDoc: 214 """ 215 Read a C language source or header FILE and extract embedded 216 documentation comments. 217 """ 218 219 # Section names 220 221 section_context = "Context" 222 section_return = "Return" 223 224 undescribed = "-- undescribed --" 225 226 def __init__(self, config, fname): 227 """Initialize internal variables""" 228 229 self.fname = fname 230 self.config = config 231 232 # Initial state for the state machines 233 self.state = state.NORMAL 234 235 # Store entry currently being processed 236 self.entry = None 237 238 # Place all potential outputs into an array 239 self.entries = [] 240 241 def emit_msg(self, ln, msg, warning=True): 242 """Emit a message""" 243 244 log_msg = f"{self.fname}:{ln} {msg}" 245 246 if self.entry: 247 self.entry.emit_msg(log_msg, warning) 248 return 249 250 if warning: 251 self.config.log.warning(log_msg) 252 else: 253 self.config.log.info(log_msg) 254 255 def dump_section(self, start_new=True): 256 """ 257 Dumps section contents to arrays/hashes intended for that purpose. 258 """ 259 260 if self.entry: 261 self.entry.dump_section(start_new) 262 263 # TODO: rename it to store_declaration after removal of kernel-doc.pl 264 def output_declaration(self, dtype, name, **args): 265 """ 266 Stores the entry into an entry array. 267 268 The actual output and output filters will be handled elsewhere 269 """ 270 271 item = KdocItem(name, dtype, self.entry.declaration_start_line, **args) 272 item.warnings = self.entry.warnings 273 274 # Drop empty sections 275 # TODO: improve empty sections logic to emit warnings 276 sections = self.entry.sections 277 for section in ["Description", "Return"]: 278 if section in sections and not sections[section].rstrip(): 279 del sections[section] 280 item.set_sections(sections, self.entry.section_start_lines) 281 282 self.entries.append(item) 283 284 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 285 286 def reset_state(self, ln): 287 """ 288 Ancillary routine to create a new entry. It initializes all 289 variables used by the state machine. 290 """ 291 292 self.entry = KernelEntry(self.config, ln) 293 294 # State flags 295 self.state = state.NORMAL 296 297 def push_parameter(self, ln, decl_type, param, dtype, 298 org_arg, declaration_name): 299 """ 300 Store parameters and their descriptions at self.entry. 301 """ 302 303 if self.entry.anon_struct_union and dtype == "" and param == "}": 304 return # Ignore the ending }; from anonymous struct/union 305 306 self.entry.anon_struct_union = False 307 308 param = KernRe(r'[\[\)].*').sub('', param, count=1) 309 310 if dtype == "" and param.endswith("..."): 311 if KernRe(r'\w\.\.\.$').search(param): 312 # For named variable parameters of the form `x...`, 313 # remove the dots 314 param = param[:-3] 315 else: 316 # Handles unnamed variable parameters 317 param = "..." 318 319 if param not in self.entry.parameterdescs or \ 320 not self.entry.parameterdescs[param]: 321 322 self.entry.parameterdescs[param] = "variable arguments" 323 324 elif dtype == "" and (not param or param == "void"): 325 param = "void" 326 self.entry.parameterdescs[param] = "no arguments" 327 328 elif dtype == "" and param in ["struct", "union"]: 329 # Handle unnamed (anonymous) union or struct 330 dtype = param 331 param = "{unnamed_" + param + "}" 332 self.entry.parameterdescs[param] = "anonymous\n" 333 self.entry.anon_struct_union = True 334 335 # Handle cache group enforcing variables: they do not need 336 # to be described in header files 337 elif "__cacheline_group" in param: 338 # Ignore __cacheline_group_begin and __cacheline_group_end 339 return 340 341 # Warn if parameter has no description 342 # (but ignore ones starting with # as these are not parameters 343 # but inline preprocessor statements) 344 if param not in self.entry.parameterdescs and not param.startswith("#"): 345 self.entry.parameterdescs[param] = self.undescribed 346 347 if "." not in param: 348 if decl_type == 'function': 349 dname = f"{decl_type} parameter" 350 else: 351 dname = f"{decl_type} member" 352 353 self.emit_msg(ln, 354 f"{dname} '{param}' not described in '{declaration_name}'") 355 356 # Strip spaces from param so that it is one continuous string on 357 # parameterlist. This fixes a problem where check_sections() 358 # cannot find a parameter like "addr[6 + 2]" because it actually 359 # appears as "addr[6", "+", "2]" on the parameter list. 360 # However, it's better to maintain the param string unchanged for 361 # output, so just weaken the string compare in check_sections() 362 # to ignore "[blah" in a parameter string. 363 364 self.entry.parameterlist.append(param) 365 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 366 self.entry.parametertypes[param] = org_arg 367 368 369 def create_parameter_list(self, ln, decl_type, args, 370 splitter, declaration_name): 371 """ 372 Creates a list of parameters, storing them at self.entry. 373 """ 374 375 # temporarily replace all commas inside function pointer definition 376 arg_expr = KernRe(r'(\([^\),]+),') 377 while arg_expr.search(args): 378 args = arg_expr.sub(r"\1#", args) 379 380 for arg in args.split(splitter): 381 # Strip comments 382 arg = KernRe(r'\/\*.*\*\/').sub('', arg) 383 384 # Ignore argument attributes 385 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 386 387 # Strip leading/trailing spaces 388 arg = arg.strip() 389 arg = KernRe(r'\s+').sub(' ', arg, count=1) 390 391 if arg.startswith('#'): 392 # Treat preprocessor directive as a typeless variable just to fill 393 # corresponding data structures "correctly". Catch it later in 394 # output_* subs. 395 396 # Treat preprocessor directive as a typeless variable 397 self.push_parameter(ln, decl_type, arg, "", 398 "", declaration_name) 399 400 elif KernRe(r'\(.+\)\s*\(').search(arg): 401 # Pointer-to-function 402 403 arg = arg.replace('#', ',') 404 405 r = KernRe(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') 406 if r.match(arg): 407 param = r.group(1) 408 else: 409 self.emit_msg(ln, f"Invalid param: {arg}") 410 param = arg 411 412 dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) 413 self.push_parameter(ln, decl_type, param, dtype, 414 arg, declaration_name) 415 416 elif KernRe(r'\(.+\)\s*\[').search(arg): 417 # Array-of-pointers 418 419 arg = arg.replace('#', ',') 420 r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') 421 if r.match(arg): 422 param = r.group(1) 423 else: 424 self.emit_msg(ln, f"Invalid param: {arg}") 425 param = arg 426 427 dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) 428 429 self.push_parameter(ln, decl_type, param, dtype, 430 arg, declaration_name) 431 432 elif arg: 433 arg = KernRe(r'\s*:\s*').sub(":", arg) 434 arg = KernRe(r'\s*\[').sub('[', arg) 435 436 args = KernRe(r'\s*,\s*').split(arg) 437 if args[0] and '*' in args[0]: 438 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 439 440 first_arg = [] 441 r = KernRe(r'^(.*\s+)(.*?\[.*\].*)$') 442 if args[0] and r.match(args[0]): 443 args.pop(0) 444 first_arg.extend(r.group(1)) 445 first_arg.append(r.group(2)) 446 else: 447 first_arg = KernRe(r'\s+').split(args.pop(0)) 448 449 args.insert(0, first_arg.pop()) 450 dtype = ' '.join(first_arg) 451 452 for param in args: 453 if KernRe(r'^(\*+)\s*(.*)').match(param): 454 r = KernRe(r'^(\*+)\s*(.*)') 455 if not r.match(param): 456 self.emit_msg(ln, f"Invalid param: {param}") 457 continue 458 459 param = r.group(1) 460 461 self.push_parameter(ln, decl_type, r.group(2), 462 f"{dtype} {r.group(1)}", 463 arg, declaration_name) 464 465 elif KernRe(r'(.*?):(\w+)').search(param): 466 r = KernRe(r'(.*?):(\w+)') 467 if not r.match(param): 468 self.emit_msg(ln, f"Invalid param: {param}") 469 continue 470 471 if dtype != "": # Skip unnamed bit-fields 472 self.push_parameter(ln, decl_type, r.group(1), 473 f"{dtype}:{r.group(2)}", 474 arg, declaration_name) 475 else: 476 self.push_parameter(ln, decl_type, param, dtype, 477 arg, declaration_name) 478 479 def check_sections(self, ln, decl_name, decl_type, sectcheck, prmscheck): 480 """ 481 Check for errors inside sections, emitting warnings if not found 482 parameters are described. 483 """ 484 485 sects = sectcheck.split() 486 prms = prmscheck.split() 487 488 for sx in range(len(sects)): # pylint: disable=C0200 489 err = True 490 for px in range(len(prms)): # pylint: disable=C0200 491 if prms[px] == sects[sx]: 492 err = False 493 break 494 495 if err: 496 if decl_type == 'function': 497 dname = f"{decl_type} parameter" 498 else: 499 dname = f"{decl_type} member" 500 501 self.emit_msg(ln, 502 f"Excess {dname} '{sects[sx]}' description in '{decl_name}'") 503 504 def check_return_section(self, ln, declaration_name, return_type): 505 """ 506 If the function doesn't return void, warns about the lack of a 507 return description. 508 """ 509 510 if not self.config.wreturn: 511 return 512 513 # Ignore an empty return type (It's a macro) 514 # Ignore functions with a "void" return type (but not "void *") 515 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 516 return 517 518 if not self.entry.sections.get("Return", None): 519 self.emit_msg(ln, 520 f"No description found for return value of '{declaration_name}'") 521 522 def dump_struct(self, ln, proto): 523 """ 524 Store an entry for an struct or union 525 """ 526 527 type_pattern = r'(struct|union)' 528 529 qualifiers = [ 530 "__attribute__", 531 "__packed", 532 "__aligned", 533 "____cacheline_aligned_in_smp", 534 "____cacheline_aligned", 535 ] 536 537 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 538 struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') 539 540 # Extract struct/union definition 541 members = None 542 declaration_name = None 543 decl_type = None 544 545 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 546 if r.search(proto): 547 decl_type = r.group(1) 548 declaration_name = r.group(2) 549 members = r.group(3) 550 else: 551 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 552 553 if r.search(proto): 554 decl_type = r.group(1) 555 declaration_name = r.group(3) 556 members = r.group(2) 557 558 if not members: 559 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 560 return 561 562 if self.entry.identifier != declaration_name: 563 self.emit_msg(ln, 564 f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") 565 return 566 567 args_pattern = r'([^,)]+)' 568 569 sub_prefixes = [ 570 (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), 571 (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''), 572 573 # Strip comments 574 (KernRe(r'\/\*.*?\*\/', re.S), ''), 575 576 # Strip attributes 577 (attribute, ' '), 578 (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), 579 (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), 580 (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), 581 (KernRe(r'\s*__packed\s*', re.S), ' '), 582 (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), 583 (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), 584 (KernRe(r'\s*____cacheline_aligned', re.S), ' '), 585 586 # Unwrap struct_group macros based on this definition: 587 # __struct_group(TAG, NAME, ATTRS, MEMBERS...) 588 # which has variants like: struct_group(NAME, MEMBERS...) 589 # Only MEMBERS arguments require documentation. 590 # 591 # Parsing them happens on two steps: 592 # 593 # 1. drop struct group arguments that aren't at MEMBERS, 594 # storing them as STRUCT_GROUP(MEMBERS) 595 # 596 # 2. remove STRUCT_GROUP() ancillary macro. 597 # 598 # The original logic used to remove STRUCT_GROUP() using an 599 # advanced regex: 600 # 601 # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; 602 # 603 # with two patterns that are incompatible with 604 # Python re module, as it has: 605 # 606 # - a recursive pattern: (?1) 607 # - an atomic grouping: (?>...) 608 # 609 # I tried a simpler version: but it didn't work either: 610 # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; 611 # 612 # As it doesn't properly match the end parenthesis on some cases. 613 # 614 # So, a better solution was crafted: there's now a NestedMatch 615 # class that ensures that delimiters after a search are properly 616 # matched. So, the implementation to drop STRUCT_GROUP() will be 617 # handled in separate. 618 619 (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), 620 (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), 621 (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), 622 (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), 623 624 # Replace macros 625 # 626 # TODO: use NestedMatch for FOO($1, $2, ...) matches 627 # 628 # it is better to also move those to the NestedMatch logic, 629 # to ensure that parenthesis will be properly matched. 630 631 (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), 632 (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), 633 (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), 634 (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), 635 (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), 636 (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), 637 (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), 638 (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), 639 (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), 640 ] 641 642 # Regexes here are guaranteed to have the end limiter matching 643 # the start delimiter. Yet, right now, only one replace group 644 # is allowed. 645 646 sub_nested_prefixes = [ 647 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), 648 ] 649 650 for search, sub in sub_prefixes: 651 members = search.sub(sub, members) 652 653 nested = NestedMatch() 654 655 for search, sub in sub_nested_prefixes: 656 members = nested.sub(search, sub, members) 657 658 # Keeps the original declaration as-is 659 declaration = members 660 661 # Split nested struct/union elements 662 # 663 # This loop was simpler at the original kernel-doc perl version, as 664 # while ($members =~ m/$struct_members/) { ... } 665 # reads 'members' string on each interaction. 666 # 667 # Python behavior is different: it parses 'members' only once, 668 # creating a list of tuples from the first interaction. 669 # 670 # On other words, this won't get nested structs. 671 # 672 # So, we need to have an extra loop on Python to override such 673 # re limitation. 674 675 while True: 676 tuples = struct_members.findall(members) 677 if not tuples: 678 break 679 680 for t in tuples: 681 newmember = "" 682 maintype = t[0] 683 s_ids = t[5] 684 content = t[3] 685 686 oldmember = "".join(t) 687 688 for s_id in s_ids.split(','): 689 s_id = s_id.strip() 690 691 newmember += f"{maintype} {s_id}; " 692 s_id = KernRe(r'[:\[].*').sub('', s_id) 693 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 694 695 for arg in content.split(';'): 696 arg = arg.strip() 697 698 if not arg: 699 continue 700 701 r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') 702 if r.match(arg): 703 # Pointer-to-function 704 dtype = r.group(1) 705 name = r.group(2) 706 extra = r.group(3) 707 708 if not name: 709 continue 710 711 if not s_id: 712 # Anonymous struct/union 713 newmember += f"{dtype}{name}{extra}; " 714 else: 715 newmember += f"{dtype}{s_id}.{name}{extra}; " 716 717 else: 718 arg = arg.strip() 719 # Handle bitmaps 720 arg = KernRe(r':\s*\d+\s*').sub('', arg) 721 722 # Handle arrays 723 arg = KernRe(r'\[.*\]').sub('', arg) 724 725 # Handle multiple IDs 726 arg = KernRe(r'\s*,\s*').sub(',', arg) 727 728 r = KernRe(r'(.*)\s+([\S+,]+)') 729 730 if r.search(arg): 731 dtype = r.group(1) 732 names = r.group(2) 733 else: 734 newmember += f"{arg}; " 735 continue 736 737 for name in names.split(','): 738 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() 739 740 if not name: 741 continue 742 743 if not s_id: 744 # Anonymous struct/union 745 newmember += f"{dtype} {name}; " 746 else: 747 newmember += f"{dtype} {s_id}.{name}; " 748 749 members = members.replace(oldmember, newmember) 750 751 # Ignore other nested elements, like enums 752 members = re.sub(r'(\{[^\{\}]*\})', '', members) 753 754 self.create_parameter_list(ln, decl_type, members, ';', 755 declaration_name) 756 self.check_sections(ln, declaration_name, decl_type, 757 self.entry.sectcheck, ' '.join(self.entry.parameterlist)) 758 759 # Adjust declaration for better display 760 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 761 declaration = KernRe(r'\}\s+;').sub('};', declaration) 762 763 # Better handle inlined enums 764 while True: 765 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 766 if not r.search(declaration): 767 break 768 769 declaration = r.sub(r'\1,\n\2', declaration) 770 771 def_args = declaration.split('\n') 772 level = 1 773 declaration = "" 774 for clause in def_args: 775 776 clause = clause.strip() 777 clause = KernRe(r'\s+').sub(' ', clause, count=1) 778 779 if not clause: 780 continue 781 782 if '}' in clause and level > 1: 783 level -= 1 784 785 if not KernRe(r'^\s*#').match(clause): 786 declaration += "\t" * level 787 788 declaration += "\t" + clause + "\n" 789 if "{" in clause and "}" not in clause: 790 level += 1 791 792 self.output_declaration(decl_type, declaration_name, 793 struct=declaration_name, 794 definition=declaration, 795 parameterlist=self.entry.parameterlist, 796 parameterdescs=self.entry.parameterdescs, 797 parametertypes=self.entry.parametertypes, 798 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 799 purpose=self.entry.declaration_purpose) 800 801 def dump_enum(self, ln, proto): 802 """ 803 Stores an enum inside self.entries array. 804 """ 805 806 # Ignore members marked private 807 proto = KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) 808 proto = KernRe(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) 809 810 # Strip comments 811 proto = KernRe(r'\/\*.*?\*\/', flags=re.S).sub('', proto) 812 813 # Strip #define macros inside enums 814 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 815 816 # 817 # Parse out the name and members of the enum. Typedef form first. 818 # 819 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 820 if r.search(proto): 821 declaration_name = r.group(2) 822 members = r.group(1).rstrip() 823 # 824 # Failing that, look for a straight enum 825 # 826 else: 827 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 828 if r.match(proto): 829 declaration_name = r.group(1) 830 members = r.group(2).rstrip() 831 # 832 # OK, this isn't going to work. 833 # 834 else: 835 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 836 return 837 # 838 # Make sure we found what we were expecting. 839 # 840 if self.entry.identifier != declaration_name: 841 if self.entry.identifier == "": 842 self.emit_msg(ln, 843 f"{proto}: wrong kernel-doc identifier on prototype") 844 else: 845 self.emit_msg(ln, 846 f"expecting prototype for enum {self.entry.identifier}. " 847 f"Prototype was for enum {declaration_name} instead") 848 return 849 850 if not declaration_name: 851 declaration_name = "(anonymous)" 852 # 853 # Parse out the name of each enum member, and verify that we 854 # have a description for it. 855 # 856 member_set = set() 857 members = KernRe(r'\([^;)]*\)').sub('', members) 858 for arg in members.split(','): 859 if not arg: 860 continue 861 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 862 self.entry.parameterlist.append(arg) 863 if arg not in self.entry.parameterdescs: 864 self.entry.parameterdescs[arg] = self.undescribed 865 self.emit_msg(ln, 866 f"Enum value '{arg}' not described in enum '{declaration_name}'") 867 member_set.add(arg) 868 # 869 # Ensure that every described member actually exists in the enum. 870 # 871 for k in self.entry.parameterdescs: 872 if k not in member_set: 873 self.emit_msg(ln, 874 f"Excess enum value '%{k}' description in '{declaration_name}'") 875 876 self.output_declaration('enum', declaration_name, 877 enum=declaration_name, 878 parameterlist=self.entry.parameterlist, 879 parameterdescs=self.entry.parameterdescs, 880 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 881 purpose=self.entry.declaration_purpose) 882 883 def dump_declaration(self, ln, prototype): 884 """ 885 Stores a data declaration inside self.entries array. 886 """ 887 888 if self.entry.decl_type == "enum": 889 self.dump_enum(ln, prototype) 890 return 891 892 if self.entry.decl_type == "typedef": 893 self.dump_typedef(ln, prototype) 894 return 895 896 if self.entry.decl_type in ["union", "struct"]: 897 self.dump_struct(ln, prototype) 898 return 899 900 self.output_declaration(self.entry.decl_type, prototype, 901 entry=self.entry) 902 903 def dump_function(self, ln, prototype): 904 """ 905 Stores a function of function macro inside self.entries array. 906 """ 907 908 func_macro = False 909 return_type = '' 910 decl_type = 'function' 911 912 # Prefixes that would be removed 913 sub_prefixes = [ 914 (r"^static +", "", 0), 915 (r"^extern +", "", 0), 916 (r"^asmlinkage +", "", 0), 917 (r"^inline +", "", 0), 918 (r"^__inline__ +", "", 0), 919 (r"^__inline +", "", 0), 920 (r"^__always_inline +", "", 0), 921 (r"^noinline +", "", 0), 922 (r"^__FORTIFY_INLINE +", "", 0), 923 (r"__init +", "", 0), 924 (r"__init_or_module +", "", 0), 925 (r"__deprecated +", "", 0), 926 (r"__flatten +", "", 0), 927 (r"__meminit +", "", 0), 928 (r"__must_check +", "", 0), 929 (r"__weak +", "", 0), 930 (r"__sched +", "", 0), 931 (r"_noprof", "", 0), 932 (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), 933 (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), 934 (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), 935 (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), 936 (r"__attribute_const__ +", "", 0), 937 938 # It seems that Python support for re.X is broken: 939 # At least for me (Python 3.13), this didn't work 940# (r""" 941# __attribute__\s*\(\( 942# (?: 943# [\w\s]+ # attribute name 944# (?:\([^)]*\))? # attribute arguments 945# \s*,? # optional comma at the end 946# )+ 947# \)\)\s+ 948# """, "", re.X), 949 950 # So, remove whitespaces and comments from it 951 (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), 952 ] 953 954 for search, sub, flags in sub_prefixes: 955 prototype = KernRe(search, flags).sub(sub, prototype) 956 957 # Macros are a special case, as they change the prototype format 958 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 959 if new_proto != prototype: 960 is_define_proto = True 961 prototype = new_proto 962 else: 963 is_define_proto = False 964 965 # Yes, this truly is vile. We are looking for: 966 # 1. Return type (may be nothing if we're looking at a macro) 967 # 2. Function name 968 # 3. Function parameters. 969 # 970 # All the while we have to watch out for function pointer parameters 971 # (which IIRC is what the two sections are for), C types (these 972 # regexps don't even start to express all the possibilities), and 973 # so on. 974 # 975 # If you mess with these regexps, it's a good idea to check that 976 # the following functions' documentation still comes out right: 977 # - parport_register_device (function pointer parameters) 978 # - atomic_set (macro) 979 # - pci_match_device, __copy_to_user (long return type) 980 981 name = r'[a-zA-Z0-9_~:]+' 982 prototype_end1 = r'[^\(]*' 983 prototype_end2 = r'[^\{]*' 984 prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)' 985 986 # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing group. 987 # So, this needs to be mapped in Python with (?:...)? or (?:...)+ 988 989 type1 = r'(?:[\w\s]+)?' 990 type2 = r'(?:[\w\s]+\*+)+' 991 992 found = False 993 994 if is_define_proto: 995 r = KernRe(r'^()(' + name + r')\s+') 996 997 if r.search(prototype): 998 return_type = '' 999 declaration_name = r.group(2) 1000 func_macro = True 1001 1002 found = True 1003 1004 if not found: 1005 patterns = [ 1006 rf'^()({name})\s*{prototype_end}', 1007 rf'^({type1})\s+({name})\s*{prototype_end}', 1008 rf'^({type2})\s*({name})\s*{prototype_end}', 1009 ] 1010 1011 for p in patterns: 1012 r = KernRe(p) 1013 1014 if r.match(prototype): 1015 1016 return_type = r.group(1) 1017 declaration_name = r.group(2) 1018 args = r.group(3) 1019 1020 self.create_parameter_list(ln, decl_type, args, ',', 1021 declaration_name) 1022 1023 found = True 1024 break 1025 if not found: 1026 self.emit_msg(ln, 1027 f"cannot understand function prototype: '{prototype}'") 1028 return 1029 1030 if self.entry.identifier != declaration_name: 1031 self.emit_msg(ln, 1032 f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") 1033 return 1034 1035 prms = " ".join(self.entry.parameterlist) 1036 self.check_sections(ln, declaration_name, "function", 1037 self.entry.sectcheck, prms) 1038 1039 self.check_return_section(ln, declaration_name, return_type) 1040 1041 if 'typedef' in return_type: 1042 self.output_declaration(decl_type, declaration_name, 1043 function=declaration_name, 1044 typedef=True, 1045 functiontype=return_type, 1046 parameterlist=self.entry.parameterlist, 1047 parameterdescs=self.entry.parameterdescs, 1048 parametertypes=self.entry.parametertypes, 1049 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 1050 purpose=self.entry.declaration_purpose, 1051 func_macro=func_macro) 1052 else: 1053 self.output_declaration(decl_type, declaration_name, 1054 function=declaration_name, 1055 typedef=False, 1056 functiontype=return_type, 1057 parameterlist=self.entry.parameterlist, 1058 parameterdescs=self.entry.parameterdescs, 1059 parametertypes=self.entry.parametertypes, 1060 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 1061 purpose=self.entry.declaration_purpose, 1062 func_macro=func_macro) 1063 1064 def dump_typedef(self, ln, proto): 1065 """ 1066 Stores a typedef inside self.entries array. 1067 """ 1068 1069 typedef_type = r'((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1070 typedef_ident = r'\*?\s*(\w\S+)\s*' 1071 typedef_args = r'\s*\((.*)\);' 1072 1073 typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1074 typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args) 1075 1076 # Strip comments 1077 proto = KernRe(r'/\*.*?\*/', flags=re.S).sub('', proto) 1078 1079 # Parse function typedef prototypes 1080 for r in [typedef1, typedef2]: 1081 if not r.match(proto): 1082 continue 1083 1084 return_type = r.group(1).strip() 1085 declaration_name = r.group(2) 1086 args = r.group(3) 1087 1088 if self.entry.identifier != declaration_name: 1089 self.emit_msg(ln, 1090 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1091 return 1092 1093 decl_type = 'function' 1094 self.create_parameter_list(ln, decl_type, args, ',', declaration_name) 1095 1096 self.output_declaration(decl_type, declaration_name, 1097 function=declaration_name, 1098 typedef=True, 1099 functiontype=return_type, 1100 parameterlist=self.entry.parameterlist, 1101 parameterdescs=self.entry.parameterdescs, 1102 parametertypes=self.entry.parametertypes, 1103 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 1104 purpose=self.entry.declaration_purpose) 1105 return 1106 1107 # Handle nested parentheses or brackets 1108 r = KernRe(r'(\(*.\)\s*|\[*.\]\s*);$') 1109 while r.search(proto): 1110 proto = r.sub('', proto) 1111 1112 # Parse simple typedefs 1113 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1114 if r.match(proto): 1115 declaration_name = r.group(1) 1116 1117 if self.entry.identifier != declaration_name: 1118 self.emit_msg(ln, 1119 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1120 return 1121 1122 self.output_declaration('typedef', declaration_name, 1123 typedef=declaration_name, 1124 purpose=self.entry.declaration_purpose) 1125 return 1126 1127 self.emit_msg(ln, "error: Cannot parse typedef!") 1128 1129 @staticmethod 1130 def process_export(function_set, line): 1131 """ 1132 process EXPORT_SYMBOL* tags 1133 1134 This method doesn't use any variable from the class, so declare it 1135 with a staticmethod decorator. 1136 """ 1137 1138 # We support documenting some exported symbols with different 1139 # names. A horrible hack. 1140 suffixes = [ '_noprof' ] 1141 1142 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1143 # multiple export lines would violate Kernel coding style. 1144 1145 if export_symbol.search(line): 1146 symbol = export_symbol.group(2) 1147 elif export_symbol_ns.search(line): 1148 symbol = export_symbol_ns.group(2) 1149 else: 1150 return False 1151 # 1152 # Found an export, trim out any special suffixes 1153 # 1154 for suffix in suffixes: 1155 symbol = symbol.removesuffix(suffix) 1156 function_set.add(symbol) 1157 return True 1158 1159 def process_normal(self, ln, line): 1160 """ 1161 STATE_NORMAL: looking for the /** to begin everything. 1162 """ 1163 1164 if not doc_start.match(line): 1165 return 1166 1167 # start a new entry 1168 self.reset_state(ln) 1169 1170 # next line is always the function name 1171 self.state = state.NAME 1172 1173 def process_name(self, ln, line): 1174 """ 1175 STATE_NAME: Looking for the "name - description" line 1176 """ 1177 # 1178 # Check for a DOC: block and handle them specially. 1179 # 1180 if doc_block.search(line): 1181 1182 if not doc_block.group(1): 1183 self.entry.begin_section(ln, "Introduction") 1184 else: 1185 self.entry.begin_section(ln, doc_block.group(1)) 1186 1187 self.entry.identifier = self.entry.section 1188 self.state = state.DOCBLOCK 1189 # 1190 # Otherwise we're looking for a normal kerneldoc declaration line. 1191 # 1192 elif doc_decl.search(line): 1193 self.entry.identifier = doc_decl.group(1) 1194 1195 # Test for data declaration 1196 if doc_begin_data.search(line): 1197 self.entry.decl_type = doc_begin_data.group(1) 1198 self.entry.identifier = doc_begin_data.group(2) 1199 # 1200 # Look for a function description 1201 # 1202 elif doc_begin_func.search(line): 1203 self.entry.identifier = doc_begin_func.group(1) 1204 self.entry.decl_type = "function" 1205 # 1206 # We struck out. 1207 # 1208 else: 1209 self.emit_msg(ln, 1210 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") 1211 self.state = state.NORMAL 1212 return 1213 # 1214 # OK, set up for a new kerneldoc entry. 1215 # 1216 self.state = state.BODY 1217 self.entry.identifier = self.entry.identifier.strip(" ") 1218 # if there's no @param blocks need to set up default section here 1219 self.entry.begin_section(ln + 1) 1220 # 1221 # Find the description portion, which *should* be there but 1222 # isn't always. 1223 # (We should be able to capture this from the previous parsing - someday) 1224 # 1225 r = KernRe("[-:](.*)") 1226 if r.search(line): 1227 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1228 self.state = state.DECLARATION 1229 else: 1230 self.entry.declaration_purpose = "" 1231 1232 if not self.entry.declaration_purpose and self.config.wshort_desc: 1233 self.emit_msg(ln, 1234 f"missing initial short description on line:\n{line}") 1235 1236 if not self.entry.identifier and self.entry.decl_type != "enum": 1237 self.emit_msg(ln, 1238 f"wrong kernel-doc identifier on line:\n{line}") 1239 self.state = state.NORMAL 1240 1241 if self.config.verbose: 1242 self.emit_msg(ln, 1243 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1244 warning=False) 1245 # 1246 # Failed to find an identifier. Emit a warning 1247 # 1248 else: 1249 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1250 1251 # 1252 # Helper function to determine if a new section is being started. 1253 # 1254 def is_new_section(self, ln, line): 1255 if doc_sect.search(line): 1256 self.state = state.BODY 1257 # 1258 # Pick out the name of our new section, tweaking it if need be. 1259 # 1260 newsection = doc_sect.group(1) 1261 if newsection.lower() == 'description': 1262 newsection = 'Description' 1263 elif newsection.lower() == 'context': 1264 newsection = 'Context' 1265 self.state = state.SPECIAL_SECTION 1266 elif newsection.lower() in ["@return", "@returns", 1267 "return", "returns"]: 1268 newsection = "Return" 1269 self.state = state.SPECIAL_SECTION 1270 elif newsection[0] == '@': 1271 self.state = state.SPECIAL_SECTION 1272 # 1273 # Initialize the contents, and get the new section going. 1274 # 1275 newcontents = doc_sect.group(2) 1276 if not newcontents: 1277 newcontents = "" 1278 self.dump_section() 1279 self.entry.begin_section(ln, newsection) 1280 self.entry.leading_space = None 1281 1282 self.entry.add_text(newcontents.lstrip()) 1283 return True 1284 return False 1285 1286 # 1287 # Helper function to detect (and effect) the end of a kerneldoc comment. 1288 # 1289 def is_comment_end(self, ln, line): 1290 if doc_end.search(line): 1291 self.dump_section() 1292 1293 # Look for doc_com + <text> + doc_end: 1294 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') 1295 if r.match(line): 1296 self.emit_msg(ln, f"suspicious ending line: {line}") 1297 1298 self.entry.prototype = "" 1299 self.entry.new_start_line = ln + 1 1300 1301 self.state = state.PROTO 1302 return True 1303 return False 1304 1305 1306 def process_decl(self, ln, line): 1307 """ 1308 STATE_DECLARATION: We've seen the beginning of a declaration 1309 """ 1310 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1311 return 1312 # 1313 # Look for anything with the " * " line beginning. 1314 # 1315 if doc_content.search(line): 1316 cont = doc_content.group(1) 1317 # 1318 # A blank line means that we have moved out of the declaration 1319 # part of the comment (without any "special section" parameter 1320 # descriptions). 1321 # 1322 if cont == "": 1323 self.state = state.BODY 1324 # 1325 # Otherwise we have more of the declaration section to soak up. 1326 # 1327 else: 1328 self.entry.declaration_purpose = \ 1329 trim_whitespace(self.entry.declaration_purpose + ' ' + cont) 1330 else: 1331 # Unknown line, ignore 1332 self.emit_msg(ln, f"bad line: {line}") 1333 1334 1335 def process_special(self, ln, line): 1336 """ 1337 STATE_SPECIAL_SECTION: a section ending with a blank line 1338 """ 1339 # 1340 # If we have hit a blank line (only the " * " marker), then this 1341 # section is done. 1342 # 1343 if KernRe(r"\s*\*\s*$").match(line): 1344 self.entry.begin_section(ln, dump = True) 1345 self.state = state.BODY 1346 return 1347 # 1348 # Not a blank line, look for the other ways to end the section. 1349 # 1350 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1351 return 1352 # 1353 # OK, we should have a continuation of the text for this section. 1354 # 1355 if doc_content.search(line): 1356 cont = doc_content.group(1) 1357 # 1358 # If the lines of text after the first in a special section have 1359 # leading white space, we need to trim it out or Sphinx will get 1360 # confused. For the second line (the None case), see what we 1361 # find there and remember it. 1362 # 1363 if self.entry.leading_space is None: 1364 r = KernRe(r'^(\s+)') 1365 if r.match(cont): 1366 self.entry.leading_space = len(r.group(1)) 1367 else: 1368 self.entry.leading_space = 0 1369 # 1370 # Otherwise, before trimming any leading chars, be *sure* 1371 # that they are white space. We should maybe warn if this 1372 # isn't the case. 1373 # 1374 for i in range(0, self.entry.leading_space): 1375 if cont[i] != " ": 1376 self.entry.leading_space = i 1377 break 1378 # 1379 # Add the trimmed result to the section and we're done. 1380 # 1381 self.entry.add_text(cont[self.entry.leading_space:]) 1382 else: 1383 # Unknown line, ignore 1384 self.emit_msg(ln, f"bad line: {line}") 1385 1386 def process_body(self, ln, line): 1387 """ 1388 STATE_BODY: the bulk of a kerneldoc comment. 1389 """ 1390 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1391 return 1392 1393 if doc_content.search(line): 1394 cont = doc_content.group(1) 1395 self.entry.add_text(cont) 1396 else: 1397 # Unknown line, ignore 1398 self.emit_msg(ln, f"bad line: {line}") 1399 1400 def process_inline_name(self, ln, line): 1401 """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" 1402 1403 if doc_inline_sect.search(line): 1404 self.entry.begin_section(ln, doc_inline_sect.group(1)) 1405 self.entry.add_text(doc_inline_sect.group(2).lstrip()) 1406 self.state = state.INLINE_TEXT 1407 elif doc_inline_end.search(line): 1408 self.dump_section() 1409 self.state = state.PROTO 1410 elif doc_content.search(line): 1411 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") 1412 self.state = state.PROTO 1413 # else ... ?? 1414 1415 def process_inline_text(self, ln, line): 1416 """STATE_INLINE_TEXT: docbook comments within a prototype.""" 1417 1418 if doc_inline_end.search(line): 1419 self.dump_section() 1420 self.state = state.PROTO 1421 elif doc_content.search(line): 1422 self.entry.add_text(doc_content.group(1)) 1423 # else ... ?? 1424 1425 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1426 """ 1427 Handle syscall definitions 1428 """ 1429 1430 is_void = False 1431 1432 # Strip newlines/CR's 1433 proto = re.sub(r'[\r\n]+', ' ', proto) 1434 1435 # Check if it's a SYSCALL_DEFINE0 1436 if 'SYSCALL_DEFINE0' in proto: 1437 is_void = True 1438 1439 # Replace SYSCALL_DEFINE with correct return type & function name 1440 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1441 1442 r = KernRe(r'long\s+(sys_.*?),') 1443 if r.search(proto): 1444 proto = KernRe(',').sub('(', proto, count=1) 1445 elif is_void: 1446 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1447 1448 # Now delete all of the odd-numbered commas in the proto 1449 # so that argument types & names don't have a comma between them 1450 count = 0 1451 length = len(proto) 1452 1453 if is_void: 1454 length = 0 # skip the loop if is_void 1455 1456 for ix in range(length): 1457 if proto[ix] == ',': 1458 count += 1 1459 if count % 2 == 1: 1460 proto = proto[:ix] + ' ' + proto[ix + 1:] 1461 1462 return proto 1463 1464 def tracepoint_munge(self, ln, proto): 1465 """ 1466 Handle tracepoint definitions 1467 """ 1468 1469 tracepointname = None 1470 tracepointargs = None 1471 1472 # Match tracepoint name based on different patterns 1473 r = KernRe(r'TRACE_EVENT\((.*?),') 1474 if r.search(proto): 1475 tracepointname = r.group(1) 1476 1477 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1478 if r.search(proto): 1479 tracepointname = r.group(1) 1480 1481 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1482 if r.search(proto): 1483 tracepointname = r.group(2) 1484 1485 if tracepointname: 1486 tracepointname = tracepointname.lstrip() 1487 1488 r = KernRe(r'TP_PROTO\((.*?)\)') 1489 if r.search(proto): 1490 tracepointargs = r.group(1) 1491 1492 if not tracepointname or not tracepointargs: 1493 self.emit_msg(ln, 1494 f"Unrecognized tracepoint format:\n{proto}\n") 1495 else: 1496 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1497 self.entry.identifier = f"trace_{self.entry.identifier}" 1498 1499 return proto 1500 1501 def process_proto_function(self, ln, line): 1502 """Ancillary routine to process a function prototype""" 1503 1504 # strip C99-style comments to end of line 1505 line = KernRe(r"\/\/.*$", re.S).sub('', line) 1506 # 1507 # Soak up the line's worth of prototype text, stopping at { or ; if present. 1508 # 1509 if KernRe(r'\s*#\s*define').match(line): 1510 self.entry.prototype = line 1511 elif not line.startswith('#'): # skip other preprocessor stuff 1512 r = KernRe(r'([^\{]*)') 1513 if r.match(line): 1514 self.entry.prototype += r.group(1) + " " 1515 # 1516 # If we now have the whole prototype, clean it up and declare victory. 1517 # 1518 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1519 # strip comments and surrounding spaces 1520 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() 1521 # 1522 # Handle self.entry.prototypes for function pointers like: 1523 # int (*pcs_config)(struct foo) 1524 # by turning it into 1525 # int pcs_config(struct foo) 1526 # 1527 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1528 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1529 # 1530 # Handle special declaration syntaxes 1531 # 1532 if 'SYSCALL_DEFINE' in self.entry.prototype: 1533 self.entry.prototype = self.syscall_munge(ln, 1534 self.entry.prototype) 1535 else: 1536 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1537 if r.search(self.entry.prototype): 1538 self.entry.prototype = self.tracepoint_munge(ln, 1539 self.entry.prototype) 1540 # 1541 # ... and we're done 1542 # 1543 self.dump_function(ln, self.entry.prototype) 1544 self.reset_state(ln) 1545 1546 def process_proto_type(self, ln, line): 1547 """Ancillary routine to process a type""" 1548 1549 # Strip C99-style comments and surrounding whitespace 1550 line = KernRe(r"//.*$", re.S).sub('', line).strip() 1551 if not line: 1552 return # nothing to see here 1553 1554 # To distinguish preprocessor directive from regular declaration later. 1555 if line.startswith('#'): 1556 line += ";" 1557 # 1558 # Split the declaration on any of { } or ;, and accumulate pieces 1559 # until we hit a semicolon while not inside {brackets} 1560 # 1561 r = KernRe(r'(.*?)([{};])') 1562 for chunk in r.split(line): 1563 if chunk: # Ignore empty matches 1564 self.entry.prototype += chunk 1565 # 1566 # This cries out for a match statement ... someday after we can 1567 # drop Python 3.9 ... 1568 # 1569 if chunk == '{': 1570 self.entry.brcount += 1 1571 elif chunk == '}': 1572 self.entry.brcount -= 1 1573 elif chunk == ';' and self.entry.brcount <= 0: 1574 self.dump_declaration(ln, self.entry.prototype) 1575 self.reset_state(ln) 1576 return 1577 # 1578 # We hit the end of the line while still in the declaration; put 1579 # in a space to represent the newline. 1580 # 1581 self.entry.prototype += ' ' 1582 1583 def process_proto(self, ln, line): 1584 """STATE_PROTO: reading a function/whatever prototype.""" 1585 1586 if doc_inline_oneline.search(line): 1587 self.entry.begin_section(ln, doc_inline_oneline.group(1)) 1588 self.entry.add_text(doc_inline_oneline.group(2)) 1589 self.dump_section() 1590 1591 elif doc_inline_start.search(line): 1592 self.state = state.INLINE_NAME 1593 1594 elif self.entry.decl_type == 'function': 1595 self.process_proto_function(ln, line) 1596 1597 else: 1598 self.process_proto_type(ln, line) 1599 1600 def process_docblock(self, ln, line): 1601 """STATE_DOCBLOCK: within a DOC: block.""" 1602 1603 if doc_end.search(line): 1604 self.dump_section() 1605 self.output_declaration("doc", self.entry.identifier) 1606 self.reset_state(ln) 1607 1608 elif doc_content.search(line): 1609 self.entry.add_text(doc_content.group(1)) 1610 1611 def parse_export(self): 1612 """ 1613 Parses EXPORT_SYMBOL* macros from a single Kernel source file. 1614 """ 1615 1616 export_table = set() 1617 1618 try: 1619 with open(self.fname, "r", encoding="utf8", 1620 errors="backslashreplace") as fp: 1621 1622 for line in fp: 1623 self.process_export(export_table, line) 1624 1625 except IOError: 1626 return None 1627 1628 return export_table 1629 1630 # 1631 # The state/action table telling us which function to invoke in 1632 # each state. 1633 # 1634 state_actions = { 1635 state.NORMAL: process_normal, 1636 state.NAME: process_name, 1637 state.BODY: process_body, 1638 state.DECLARATION: process_decl, 1639 state.SPECIAL_SECTION: process_special, 1640 state.INLINE_NAME: process_inline_name, 1641 state.INLINE_TEXT: process_inline_text, 1642 state.PROTO: process_proto, 1643 state.DOCBLOCK: process_docblock, 1644 } 1645 1646 def parse_kdoc(self): 1647 """ 1648 Open and process each line of a C source file. 1649 The parsing is controlled via a state machine, and the line is passed 1650 to a different process function depending on the state. The process 1651 function may update the state as needed. 1652 1653 Besides parsing kernel-doc tags, it also parses export symbols. 1654 """ 1655 1656 prev = "" 1657 prev_ln = None 1658 export_table = set() 1659 1660 try: 1661 with open(self.fname, "r", encoding="utf8", 1662 errors="backslashreplace") as fp: 1663 for ln, line in enumerate(fp): 1664 1665 line = line.expandtabs().strip("\n") 1666 1667 # Group continuation lines on prototypes 1668 if self.state == state.PROTO: 1669 if line.endswith("\\"): 1670 prev += line.rstrip("\\") 1671 if not prev_ln: 1672 prev_ln = ln 1673 continue 1674 1675 if prev: 1676 ln = prev_ln 1677 line = prev + line 1678 prev = "" 1679 prev_ln = None 1680 1681 self.config.log.debug("%d %s: %s", 1682 ln, state.name[self.state], 1683 line) 1684 1685 # This is an optimization over the original script. 1686 # There, when export_file was used for the same file, 1687 # it was read twice. Here, we use the already-existing 1688 # loop to parse exported symbols as well. 1689 # 1690 if (self.state != state.NORMAL) or \ 1691 not self.process_export(export_table, line): 1692 # Hand this line to the appropriate state handler 1693 self.state_actions[self.state](self, ln, line) 1694 1695 except OSError: 1696 self.config.log.error(f"Error: Cannot open file {self.fname}") 1697 1698 return export_table, self.entries 1699