1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8kdoc_parser 9=========== 10 11Read a C language source or header FILE and extract embedded 12documentation comments 13""" 14 15import re 16from pprint import pformat 17 18from kdoc_re import NestedMatch, KernRe 19from kdoc_item import KdocItem 20 21# 22# Regular expressions used to parse kernel-doc markups at KernelDoc class. 23# 24# Let's declare them in lowercase outside any class to make easier to 25# convert from the python script. 26# 27# As those are evaluated at the beginning, no need to cache them 28# 29 30# Allow whitespace at end of comment start. 31doc_start = KernRe(r'^/\*\*\s*$', cache=False) 32 33doc_end = KernRe(r'\*/', cache=False) 34doc_com = KernRe(r'\s*\*\s*', cache=False) 35doc_com_body = KernRe(r'\s*\* ?', cache=False) 36doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 37 38# @params and a strictly limited set of supported section names 39# Specifically: 40# Match @word: 41# @...: 42# @{section-name}: 43# while trying to not match literal block starts like "example::" 44# 45known_section_names = 'description|context|returns?|notes?|examples?' 46known_sections = KernRe(known_section_names, flags = re.I) 47doc_sect = doc_com + \ 48 KernRe(r'\s*(\@[.\w]+|\@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', 49 flags=re.I, cache=False) 50 51doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 52doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 53doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 54doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 55doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) 56attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", 57 flags=re.I | re.S, cache=False) 58 59export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 60export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 61 62type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 63 64# 65# Tests for the beginning of a kerneldoc block in its various forms. 66# 67doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 68doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) 69doc_begin_func = KernRe(str(doc_com) + # initial " * ' 70 r"(?:\w+\s*\*\s*)?" + # type (not captured) 71 r'(?:define\s+)?' + # possible "define" (not captured) 72 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 73 r'(?:[-:].*)?$', # description (not captured) 74 cache = False) 75 76# 77# A little helper to get rid of excess white space 78# 79multi_space = KernRe(r'\s\s+') 80def trim_whitespace(s): 81 return multi_space.sub(' ', s.strip()) 82 83class state: 84 """ 85 State machine enums 86 """ 87 88 # Parser states 89 NORMAL = 0 # normal code 90 NAME = 1 # looking for function name 91 DECLARATION = 2 # We have seen a declaration which might not be done 92 BODY = 3 # the body of the comment 93 SPECIAL_SECTION = 4 # doc section ending with a blank line 94 PROTO = 5 # scanning prototype 95 DOCBLOCK = 6 # documentation block 96 INLINE_NAME = 7 # gathering doc outside main block 97 INLINE_TEXT = 8 # reading the body of inline docs 98 99 name = [ 100 "NORMAL", 101 "NAME", 102 "DECLARATION", 103 "BODY", 104 "SPECIAL_SECTION", 105 "PROTO", 106 "DOCBLOCK", 107 "INLINE_NAME", 108 "INLINE_TEXT", 109 ] 110 111 112SECTION_DEFAULT = "Description" # default section 113 114class KernelEntry: 115 116 def __init__(self, config, ln): 117 self.config = config 118 119 self._contents = [] 120 self.prototype = "" 121 122 self.warnings = [] 123 124 self.parameterlist = [] 125 self.parameterdescs = {} 126 self.parametertypes = {} 127 self.parameterdesc_start_lines = {} 128 129 self.section_start_lines = {} 130 self.sections = {} 131 132 self.anon_struct_union = False 133 134 self.leading_space = None 135 136 # State flags 137 self.brcount = 0 138 self.declaration_start_line = ln + 1 139 140 # 141 # Management of section contents 142 # 143 def add_text(self, text): 144 self._contents.append(text) 145 146 def contents(self): 147 return '\n'.join(self._contents) + '\n' 148 149 # TODO: rename to emit_message after removal of kernel-doc.pl 150 def emit_msg(self, log_msg, warning=True): 151 """Emit a message""" 152 153 if not warning: 154 self.config.log.info(log_msg) 155 return 156 157 # Delegate warning output to output logic, as this way it 158 # will report warnings/info only for symbols that are output 159 160 self.warnings.append(log_msg) 161 return 162 163 # 164 # Begin a new section. 165 # 166 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): 167 if dump: 168 self.dump_section(start_new = True) 169 self.section = title 170 self.new_start_line = line_no 171 172 def dump_section(self, start_new=True): 173 """ 174 Dumps section contents to arrays/hashes intended for that purpose. 175 """ 176 # 177 # If we have accumulated no contents in the default ("description") 178 # section, don't bother. 179 # 180 if self.section == SECTION_DEFAULT and not self._contents: 181 return 182 name = self.section 183 contents = self.contents() 184 185 if type_param.match(name): 186 name = type_param.group(1) 187 188 self.parameterdescs[name] = contents 189 self.parameterdesc_start_lines[name] = self.new_start_line 190 191 self.new_start_line = 0 192 193 else: 194 if name in self.sections and self.sections[name] != "": 195 # Only warn on user-specified duplicate section names 196 if name != SECTION_DEFAULT: 197 self.emit_msg(self.new_start_line, 198 f"duplicate section name '{name}'\n") 199 # Treat as a new paragraph - add a blank line 200 self.sections[name] += '\n' + contents 201 else: 202 self.sections[name] = contents 203 self.section_start_lines[name] = self.new_start_line 204 self.new_start_line = 0 205 206# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 207 208 if start_new: 209 self.section = SECTION_DEFAULT 210 self._contents = [] 211 212 213class KernelDoc: 214 """ 215 Read a C language source or header FILE and extract embedded 216 documentation comments. 217 """ 218 219 # Section names 220 221 section_context = "Context" 222 section_return = "Return" 223 224 undescribed = "-- undescribed --" 225 226 def __init__(self, config, fname): 227 """Initialize internal variables""" 228 229 self.fname = fname 230 self.config = config 231 232 # Initial state for the state machines 233 self.state = state.NORMAL 234 235 # Store entry currently being processed 236 self.entry = None 237 238 # Place all potential outputs into an array 239 self.entries = [] 240 241 def emit_msg(self, ln, msg, warning=True): 242 """Emit a message""" 243 244 log_msg = f"{self.fname}:{ln} {msg}" 245 246 if self.entry: 247 self.entry.emit_msg(log_msg, warning) 248 return 249 250 if warning: 251 self.config.log.warning(log_msg) 252 else: 253 self.config.log.info(log_msg) 254 255 def dump_section(self, start_new=True): 256 """ 257 Dumps section contents to arrays/hashes intended for that purpose. 258 """ 259 260 if self.entry: 261 self.entry.dump_section(start_new) 262 263 # TODO: rename it to store_declaration after removal of kernel-doc.pl 264 def output_declaration(self, dtype, name, **args): 265 """ 266 Stores the entry into an entry array. 267 268 The actual output and output filters will be handled elsewhere 269 """ 270 271 item = KdocItem(name, dtype, self.entry.declaration_start_line, **args) 272 item.warnings = self.entry.warnings 273 274 # Drop empty sections 275 # TODO: improve empty sections logic to emit warnings 276 sections = self.entry.sections 277 for section in ["Description", "Return"]: 278 if section in sections and not sections[section].rstrip(): 279 del sections[section] 280 item.set_sections(sections, self.entry.section_start_lines) 281 item.set_params(self.entry.parameterlist, self.entry.parameterdescs, 282 self.entry.parametertypes, 283 self.entry.parameterdesc_start_lines) 284 self.entries.append(item) 285 286 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 287 288 def reset_state(self, ln): 289 """ 290 Ancillary routine to create a new entry. It initializes all 291 variables used by the state machine. 292 """ 293 294 self.entry = KernelEntry(self.config, ln) 295 296 # State flags 297 self.state = state.NORMAL 298 299 def push_parameter(self, ln, decl_type, param, dtype, 300 org_arg, declaration_name): 301 """ 302 Store parameters and their descriptions at self.entry. 303 """ 304 305 if self.entry.anon_struct_union and dtype == "" and param == "}": 306 return # Ignore the ending }; from anonymous struct/union 307 308 self.entry.anon_struct_union = False 309 310 param = KernRe(r'[\[\)].*').sub('', param, count=1) 311 312 if dtype == "" and param.endswith("..."): 313 if KernRe(r'\w\.\.\.$').search(param): 314 # For named variable parameters of the form `x...`, 315 # remove the dots 316 param = param[:-3] 317 else: 318 # Handles unnamed variable parameters 319 param = "..." 320 321 if param not in self.entry.parameterdescs or \ 322 not self.entry.parameterdescs[param]: 323 324 self.entry.parameterdescs[param] = "variable arguments" 325 326 elif dtype == "" and (not param or param == "void"): 327 param = "void" 328 self.entry.parameterdescs[param] = "no arguments" 329 330 elif dtype == "" and param in ["struct", "union"]: 331 # Handle unnamed (anonymous) union or struct 332 dtype = param 333 param = "{unnamed_" + param + "}" 334 self.entry.parameterdescs[param] = "anonymous\n" 335 self.entry.anon_struct_union = True 336 337 # Handle cache group enforcing variables: they do not need 338 # to be described in header files 339 elif "__cacheline_group" in param: 340 # Ignore __cacheline_group_begin and __cacheline_group_end 341 return 342 343 # Warn if parameter has no description 344 # (but ignore ones starting with # as these are not parameters 345 # but inline preprocessor statements) 346 if param not in self.entry.parameterdescs and not param.startswith("#"): 347 self.entry.parameterdescs[param] = self.undescribed 348 349 if "." not in param: 350 if decl_type == 'function': 351 dname = f"{decl_type} parameter" 352 else: 353 dname = f"{decl_type} member" 354 355 self.emit_msg(ln, 356 f"{dname} '{param}' not described in '{declaration_name}'") 357 358 # Strip spaces from param so that it is one continuous string on 359 # parameterlist. This fixes a problem where check_sections() 360 # cannot find a parameter like "addr[6 + 2]" because it actually 361 # appears as "addr[6", "+", "2]" on the parameter list. 362 # However, it's better to maintain the param string unchanged for 363 # output, so just weaken the string compare in check_sections() 364 # to ignore "[blah" in a parameter string. 365 366 self.entry.parameterlist.append(param) 367 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 368 self.entry.parametertypes[param] = org_arg 369 370 371 def create_parameter_list(self, ln, decl_type, args, 372 splitter, declaration_name): 373 """ 374 Creates a list of parameters, storing them at self.entry. 375 """ 376 377 # temporarily replace all commas inside function pointer definition 378 arg_expr = KernRe(r'(\([^\),]+),') 379 while arg_expr.search(args): 380 args = arg_expr.sub(r"\1#", args) 381 382 for arg in args.split(splitter): 383 # Strip comments 384 arg = KernRe(r'\/\*.*\*\/').sub('', arg) 385 386 # Ignore argument attributes 387 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 388 389 # Strip leading/trailing spaces 390 arg = arg.strip() 391 arg = KernRe(r'\s+').sub(' ', arg, count=1) 392 393 if arg.startswith('#'): 394 # Treat preprocessor directive as a typeless variable just to fill 395 # corresponding data structures "correctly". Catch it later in 396 # output_* subs. 397 398 # Treat preprocessor directive as a typeless variable 399 self.push_parameter(ln, decl_type, arg, "", 400 "", declaration_name) 401 402 elif KernRe(r'\(.+\)\s*\(').search(arg): 403 # Pointer-to-function 404 405 arg = arg.replace('#', ',') 406 407 r = KernRe(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') 408 if r.match(arg): 409 param = r.group(1) 410 else: 411 self.emit_msg(ln, f"Invalid param: {arg}") 412 param = arg 413 414 dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) 415 self.push_parameter(ln, decl_type, param, dtype, 416 arg, declaration_name) 417 418 elif KernRe(r'\(.+\)\s*\[').search(arg): 419 # Array-of-pointers 420 421 arg = arg.replace('#', ',') 422 r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') 423 if r.match(arg): 424 param = r.group(1) 425 else: 426 self.emit_msg(ln, f"Invalid param: {arg}") 427 param = arg 428 429 dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) 430 431 self.push_parameter(ln, decl_type, param, dtype, 432 arg, declaration_name) 433 434 elif arg: 435 arg = KernRe(r'\s*:\s*').sub(":", arg) 436 arg = KernRe(r'\s*\[').sub('[', arg) 437 438 args = KernRe(r'\s*,\s*').split(arg) 439 if args[0] and '*' in args[0]: 440 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 441 442 first_arg = [] 443 r = KernRe(r'^(.*\s+)(.*?\[.*\].*)$') 444 if args[0] and r.match(args[0]): 445 args.pop(0) 446 first_arg.extend(r.group(1)) 447 first_arg.append(r.group(2)) 448 else: 449 first_arg = KernRe(r'\s+').split(args.pop(0)) 450 451 args.insert(0, first_arg.pop()) 452 dtype = ' '.join(first_arg) 453 454 for param in args: 455 if KernRe(r'^(\*+)\s*(.*)').match(param): 456 r = KernRe(r'^(\*+)\s*(.*)') 457 if not r.match(param): 458 self.emit_msg(ln, f"Invalid param: {param}") 459 continue 460 461 param = r.group(1) 462 463 self.push_parameter(ln, decl_type, r.group(2), 464 f"{dtype} {r.group(1)}", 465 arg, declaration_name) 466 467 elif KernRe(r'(.*?):(\w+)').search(param): 468 r = KernRe(r'(.*?):(\w+)') 469 if not r.match(param): 470 self.emit_msg(ln, f"Invalid param: {param}") 471 continue 472 473 if dtype != "": # Skip unnamed bit-fields 474 self.push_parameter(ln, decl_type, r.group(1), 475 f"{dtype}:{r.group(2)}", 476 arg, declaration_name) 477 else: 478 self.push_parameter(ln, decl_type, param, dtype, 479 arg, declaration_name) 480 481 def check_sections(self, ln, decl_name, decl_type): 482 """ 483 Check for errors inside sections, emitting warnings if not found 484 parameters are described. 485 """ 486 for section in self.entry.sections: 487 if section not in self.entry.parameterlist and \ 488 not known_sections.search(section): 489 if decl_type == 'function': 490 dname = f"{decl_type} parameter" 491 else: 492 dname = f"{decl_type} member" 493 self.emit_msg(ln, 494 f"Excess {dname} '{section}' description in '{decl_name}'") 495 496 def check_return_section(self, ln, declaration_name, return_type): 497 """ 498 If the function doesn't return void, warns about the lack of a 499 return description. 500 """ 501 502 if not self.config.wreturn: 503 return 504 505 # Ignore an empty return type (It's a macro) 506 # Ignore functions with a "void" return type (but not "void *") 507 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 508 return 509 510 if not self.entry.sections.get("Return", None): 511 self.emit_msg(ln, 512 f"No description found for return value of '{declaration_name}'") 513 514 def dump_struct(self, ln, proto): 515 """ 516 Store an entry for an struct or union 517 """ 518 519 type_pattern = r'(struct|union)' 520 521 qualifiers = [ 522 "__attribute__", 523 "__packed", 524 "__aligned", 525 "____cacheline_aligned_in_smp", 526 "____cacheline_aligned", 527 ] 528 529 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 530 struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') 531 532 # Extract struct/union definition 533 members = None 534 declaration_name = None 535 decl_type = None 536 537 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 538 if r.search(proto): 539 decl_type = r.group(1) 540 declaration_name = r.group(2) 541 members = r.group(3) 542 else: 543 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 544 545 if r.search(proto): 546 decl_type = r.group(1) 547 declaration_name = r.group(3) 548 members = r.group(2) 549 550 if not members: 551 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 552 return 553 554 if self.entry.identifier != declaration_name: 555 self.emit_msg(ln, 556 f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") 557 return 558 559 args_pattern = r'([^,)]+)' 560 561 sub_prefixes = [ 562 (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), 563 (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''), 564 565 # Strip comments 566 (KernRe(r'\/\*.*?\*\/', re.S), ''), 567 568 # Strip attributes 569 (attribute, ' '), 570 (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), 571 (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), 572 (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), 573 (KernRe(r'\s*__packed\s*', re.S), ' '), 574 (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), 575 (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), 576 (KernRe(r'\s*____cacheline_aligned', re.S), ' '), 577 578 # Unwrap struct_group macros based on this definition: 579 # __struct_group(TAG, NAME, ATTRS, MEMBERS...) 580 # which has variants like: struct_group(NAME, MEMBERS...) 581 # Only MEMBERS arguments require documentation. 582 # 583 # Parsing them happens on two steps: 584 # 585 # 1. drop struct group arguments that aren't at MEMBERS, 586 # storing them as STRUCT_GROUP(MEMBERS) 587 # 588 # 2. remove STRUCT_GROUP() ancillary macro. 589 # 590 # The original logic used to remove STRUCT_GROUP() using an 591 # advanced regex: 592 # 593 # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; 594 # 595 # with two patterns that are incompatible with 596 # Python re module, as it has: 597 # 598 # - a recursive pattern: (?1) 599 # - an atomic grouping: (?>...) 600 # 601 # I tried a simpler version: but it didn't work either: 602 # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; 603 # 604 # As it doesn't properly match the end parenthesis on some cases. 605 # 606 # So, a better solution was crafted: there's now a NestedMatch 607 # class that ensures that delimiters after a search are properly 608 # matched. So, the implementation to drop STRUCT_GROUP() will be 609 # handled in separate. 610 611 (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), 612 (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), 613 (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), 614 (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), 615 616 # Replace macros 617 # 618 # TODO: use NestedMatch for FOO($1, $2, ...) matches 619 # 620 # it is better to also move those to the NestedMatch logic, 621 # to ensure that parenthesis will be properly matched. 622 623 (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), 624 (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), 625 (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), 626 (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), 627 (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), 628 (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), 629 (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), 630 (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), 631 (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), 632 ] 633 634 # Regexes here are guaranteed to have the end limiter matching 635 # the start delimiter. Yet, right now, only one replace group 636 # is allowed. 637 638 sub_nested_prefixes = [ 639 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), 640 ] 641 642 for search, sub in sub_prefixes: 643 members = search.sub(sub, members) 644 645 nested = NestedMatch() 646 647 for search, sub in sub_nested_prefixes: 648 members = nested.sub(search, sub, members) 649 650 # Keeps the original declaration as-is 651 declaration = members 652 653 # Split nested struct/union elements 654 # 655 # This loop was simpler at the original kernel-doc perl version, as 656 # while ($members =~ m/$struct_members/) { ... } 657 # reads 'members' string on each interaction. 658 # 659 # Python behavior is different: it parses 'members' only once, 660 # creating a list of tuples from the first interaction. 661 # 662 # On other words, this won't get nested structs. 663 # 664 # So, we need to have an extra loop on Python to override such 665 # re limitation. 666 667 while True: 668 tuples = struct_members.findall(members) 669 if not tuples: 670 break 671 672 for t in tuples: 673 newmember = "" 674 maintype = t[0] 675 s_ids = t[5] 676 content = t[3] 677 678 oldmember = "".join(t) 679 680 for s_id in s_ids.split(','): 681 s_id = s_id.strip() 682 683 newmember += f"{maintype} {s_id}; " 684 s_id = KernRe(r'[:\[].*').sub('', s_id) 685 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 686 687 for arg in content.split(';'): 688 arg = arg.strip() 689 690 if not arg: 691 continue 692 693 r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') 694 if r.match(arg): 695 # Pointer-to-function 696 dtype = r.group(1) 697 name = r.group(2) 698 extra = r.group(3) 699 700 if not name: 701 continue 702 703 if not s_id: 704 # Anonymous struct/union 705 newmember += f"{dtype}{name}{extra}; " 706 else: 707 newmember += f"{dtype}{s_id}.{name}{extra}; " 708 709 else: 710 arg = arg.strip() 711 # Handle bitmaps 712 arg = KernRe(r':\s*\d+\s*').sub('', arg) 713 714 # Handle arrays 715 arg = KernRe(r'\[.*\]').sub('', arg) 716 717 # Handle multiple IDs 718 arg = KernRe(r'\s*,\s*').sub(',', arg) 719 720 r = KernRe(r'(.*)\s+([\S+,]+)') 721 722 if r.search(arg): 723 dtype = r.group(1) 724 names = r.group(2) 725 else: 726 newmember += f"{arg}; " 727 continue 728 729 for name in names.split(','): 730 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() 731 732 if not name: 733 continue 734 735 if not s_id: 736 # Anonymous struct/union 737 newmember += f"{dtype} {name}; " 738 else: 739 newmember += f"{dtype} {s_id}.{name}; " 740 741 members = members.replace(oldmember, newmember) 742 743 # Ignore other nested elements, like enums 744 members = re.sub(r'(\{[^\{\}]*\})', '', members) 745 746 self.create_parameter_list(ln, decl_type, members, ';', 747 declaration_name) 748 self.check_sections(ln, declaration_name, decl_type) 749 750 # Adjust declaration for better display 751 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 752 declaration = KernRe(r'\}\s+;').sub('};', declaration) 753 754 # Better handle inlined enums 755 while True: 756 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 757 if not r.search(declaration): 758 break 759 760 declaration = r.sub(r'\1,\n\2', declaration) 761 762 def_args = declaration.split('\n') 763 level = 1 764 declaration = "" 765 for clause in def_args: 766 767 clause = clause.strip() 768 clause = KernRe(r'\s+').sub(' ', clause, count=1) 769 770 if not clause: 771 continue 772 773 if '}' in clause and level > 1: 774 level -= 1 775 776 if not KernRe(r'^\s*#').match(clause): 777 declaration += "\t" * level 778 779 declaration += "\t" + clause + "\n" 780 if "{" in clause and "}" not in clause: 781 level += 1 782 783 self.output_declaration(decl_type, declaration_name, 784 definition=declaration, 785 purpose=self.entry.declaration_purpose) 786 787 def dump_enum(self, ln, proto): 788 """ 789 Stores an enum inside self.entries array. 790 """ 791 792 # Ignore members marked private 793 proto = KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) 794 proto = KernRe(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) 795 796 # Strip comments 797 proto = KernRe(r'\/\*.*?\*\/', flags=re.S).sub('', proto) 798 799 # Strip #define macros inside enums 800 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 801 802 # 803 # Parse out the name and members of the enum. Typedef form first. 804 # 805 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 806 if r.search(proto): 807 declaration_name = r.group(2) 808 members = r.group(1).rstrip() 809 # 810 # Failing that, look for a straight enum 811 # 812 else: 813 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 814 if r.match(proto): 815 declaration_name = r.group(1) 816 members = r.group(2).rstrip() 817 # 818 # OK, this isn't going to work. 819 # 820 else: 821 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 822 return 823 # 824 # Make sure we found what we were expecting. 825 # 826 if self.entry.identifier != declaration_name: 827 if self.entry.identifier == "": 828 self.emit_msg(ln, 829 f"{proto}: wrong kernel-doc identifier on prototype") 830 else: 831 self.emit_msg(ln, 832 f"expecting prototype for enum {self.entry.identifier}. " 833 f"Prototype was for enum {declaration_name} instead") 834 return 835 836 if not declaration_name: 837 declaration_name = "(anonymous)" 838 # 839 # Parse out the name of each enum member, and verify that we 840 # have a description for it. 841 # 842 member_set = set() 843 members = KernRe(r'\([^;)]*\)').sub('', members) 844 for arg in members.split(','): 845 if not arg: 846 continue 847 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 848 self.entry.parameterlist.append(arg) 849 if arg not in self.entry.parameterdescs: 850 self.entry.parameterdescs[arg] = self.undescribed 851 self.emit_msg(ln, 852 f"Enum value '{arg}' not described in enum '{declaration_name}'") 853 member_set.add(arg) 854 # 855 # Ensure that every described member actually exists in the enum. 856 # 857 for k in self.entry.parameterdescs: 858 if k not in member_set: 859 self.emit_msg(ln, 860 f"Excess enum value '%{k}' description in '{declaration_name}'") 861 862 self.output_declaration('enum', declaration_name, 863 purpose=self.entry.declaration_purpose) 864 865 def dump_declaration(self, ln, prototype): 866 """ 867 Stores a data declaration inside self.entries array. 868 """ 869 870 if self.entry.decl_type == "enum": 871 self.dump_enum(ln, prototype) 872 elif self.entry.decl_type == "typedef": 873 self.dump_typedef(ln, prototype) 874 elif self.entry.decl_type in ["union", "struct"]: 875 self.dump_struct(ln, prototype) 876 else: 877 # This would be a bug 878 self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') 879 880 def dump_function(self, ln, prototype): 881 """ 882 Stores a function of function macro inside self.entries array. 883 """ 884 885 func_macro = False 886 return_type = '' 887 decl_type = 'function' 888 889 # Prefixes that would be removed 890 sub_prefixes = [ 891 (r"^static +", "", 0), 892 (r"^extern +", "", 0), 893 (r"^asmlinkage +", "", 0), 894 (r"^inline +", "", 0), 895 (r"^__inline__ +", "", 0), 896 (r"^__inline +", "", 0), 897 (r"^__always_inline +", "", 0), 898 (r"^noinline +", "", 0), 899 (r"^__FORTIFY_INLINE +", "", 0), 900 (r"__init +", "", 0), 901 (r"__init_or_module +", "", 0), 902 (r"__deprecated +", "", 0), 903 (r"__flatten +", "", 0), 904 (r"__meminit +", "", 0), 905 (r"__must_check +", "", 0), 906 (r"__weak +", "", 0), 907 (r"__sched +", "", 0), 908 (r"_noprof", "", 0), 909 (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), 910 (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), 911 (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), 912 (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), 913 (r"__attribute_const__ +", "", 0), 914 915 # It seems that Python support for re.X is broken: 916 # At least for me (Python 3.13), this didn't work 917# (r""" 918# __attribute__\s*\(\( 919# (?: 920# [\w\s]+ # attribute name 921# (?:\([^)]*\))? # attribute arguments 922# \s*,? # optional comma at the end 923# )+ 924# \)\)\s+ 925# """, "", re.X), 926 927 # So, remove whitespaces and comments from it 928 (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), 929 ] 930 931 for search, sub, flags in sub_prefixes: 932 prototype = KernRe(search, flags).sub(sub, prototype) 933 934 # Macros are a special case, as they change the prototype format 935 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 936 if new_proto != prototype: 937 is_define_proto = True 938 prototype = new_proto 939 else: 940 is_define_proto = False 941 942 # Yes, this truly is vile. We are looking for: 943 # 1. Return type (may be nothing if we're looking at a macro) 944 # 2. Function name 945 # 3. Function parameters. 946 # 947 # All the while we have to watch out for function pointer parameters 948 # (which IIRC is what the two sections are for), C types (these 949 # regexps don't even start to express all the possibilities), and 950 # so on. 951 # 952 # If you mess with these regexps, it's a good idea to check that 953 # the following functions' documentation still comes out right: 954 # - parport_register_device (function pointer parameters) 955 # - atomic_set (macro) 956 # - pci_match_device, __copy_to_user (long return type) 957 958 name = r'[a-zA-Z0-9_~:]+' 959 prototype_end1 = r'[^\(]*' 960 prototype_end2 = r'[^\{]*' 961 prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)' 962 963 # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing group. 964 # So, this needs to be mapped in Python with (?:...)? or (?:...)+ 965 966 type1 = r'(?:[\w\s]+)?' 967 type2 = r'(?:[\w\s]+\*+)+' 968 969 found = False 970 971 if is_define_proto: 972 r = KernRe(r'^()(' + name + r')\s+') 973 974 if r.search(prototype): 975 return_type = '' 976 declaration_name = r.group(2) 977 func_macro = True 978 979 found = True 980 981 if not found: 982 patterns = [ 983 rf'^()({name})\s*{prototype_end}', 984 rf'^({type1})\s+({name})\s*{prototype_end}', 985 rf'^({type2})\s*({name})\s*{prototype_end}', 986 ] 987 988 for p in patterns: 989 r = KernRe(p) 990 991 if r.match(prototype): 992 993 return_type = r.group(1) 994 declaration_name = r.group(2) 995 args = r.group(3) 996 997 self.create_parameter_list(ln, decl_type, args, ',', 998 declaration_name) 999 1000 found = True 1001 break 1002 if not found: 1003 self.emit_msg(ln, 1004 f"cannot understand function prototype: '{prototype}'") 1005 return 1006 1007 if self.entry.identifier != declaration_name: 1008 self.emit_msg(ln, 1009 f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") 1010 return 1011 1012 self.check_sections(ln, declaration_name, "function") 1013 1014 self.check_return_section(ln, declaration_name, return_type) 1015 1016 if 'typedef' in return_type: 1017 self.output_declaration(decl_type, declaration_name, 1018 typedef=True, 1019 functiontype=return_type, 1020 purpose=self.entry.declaration_purpose, 1021 func_macro=func_macro) 1022 else: 1023 self.output_declaration(decl_type, declaration_name, 1024 typedef=False, 1025 functiontype=return_type, 1026 purpose=self.entry.declaration_purpose, 1027 func_macro=func_macro) 1028 1029 def dump_typedef(self, ln, proto): 1030 """ 1031 Stores a typedef inside self.entries array. 1032 """ 1033 1034 typedef_type = r'((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1035 typedef_ident = r'\*?\s*(\w\S+)\s*' 1036 typedef_args = r'\s*\((.*)\);' 1037 1038 typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1039 typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args) 1040 1041 # Strip comments 1042 proto = KernRe(r'/\*.*?\*/', flags=re.S).sub('', proto) 1043 1044 # Parse function typedef prototypes 1045 for r in [typedef1, typedef2]: 1046 if not r.match(proto): 1047 continue 1048 1049 return_type = r.group(1).strip() 1050 declaration_name = r.group(2) 1051 args = r.group(3) 1052 1053 if self.entry.identifier != declaration_name: 1054 self.emit_msg(ln, 1055 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1056 return 1057 1058 decl_type = 'function' 1059 self.create_parameter_list(ln, decl_type, args, ',', declaration_name) 1060 1061 self.output_declaration(decl_type, declaration_name, 1062 typedef=True, 1063 functiontype=return_type, 1064 purpose=self.entry.declaration_purpose) 1065 return 1066 1067 # Handle nested parentheses or brackets 1068 r = KernRe(r'(\(*.\)\s*|\[*.\]\s*);$') 1069 while r.search(proto): 1070 proto = r.sub('', proto) 1071 1072 # Parse simple typedefs 1073 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1074 if r.match(proto): 1075 declaration_name = r.group(1) 1076 1077 if self.entry.identifier != declaration_name: 1078 self.emit_msg(ln, 1079 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1080 return 1081 1082 self.output_declaration('typedef', declaration_name, 1083 purpose=self.entry.declaration_purpose) 1084 return 1085 1086 self.emit_msg(ln, "error: Cannot parse typedef!") 1087 1088 @staticmethod 1089 def process_export(function_set, line): 1090 """ 1091 process EXPORT_SYMBOL* tags 1092 1093 This method doesn't use any variable from the class, so declare it 1094 with a staticmethod decorator. 1095 """ 1096 1097 # We support documenting some exported symbols with different 1098 # names. A horrible hack. 1099 suffixes = [ '_noprof' ] 1100 1101 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1102 # multiple export lines would violate Kernel coding style. 1103 1104 if export_symbol.search(line): 1105 symbol = export_symbol.group(2) 1106 elif export_symbol_ns.search(line): 1107 symbol = export_symbol_ns.group(2) 1108 else: 1109 return False 1110 # 1111 # Found an export, trim out any special suffixes 1112 # 1113 for suffix in suffixes: 1114 symbol = symbol.removesuffix(suffix) 1115 function_set.add(symbol) 1116 return True 1117 1118 def process_normal(self, ln, line): 1119 """ 1120 STATE_NORMAL: looking for the /** to begin everything. 1121 """ 1122 1123 if not doc_start.match(line): 1124 return 1125 1126 # start a new entry 1127 self.reset_state(ln) 1128 1129 # next line is always the function name 1130 self.state = state.NAME 1131 1132 def process_name(self, ln, line): 1133 """ 1134 STATE_NAME: Looking for the "name - description" line 1135 """ 1136 # 1137 # Check for a DOC: block and handle them specially. 1138 # 1139 if doc_block.search(line): 1140 1141 if not doc_block.group(1): 1142 self.entry.begin_section(ln, "Introduction") 1143 else: 1144 self.entry.begin_section(ln, doc_block.group(1)) 1145 1146 self.entry.identifier = self.entry.section 1147 self.state = state.DOCBLOCK 1148 # 1149 # Otherwise we're looking for a normal kerneldoc declaration line. 1150 # 1151 elif doc_decl.search(line): 1152 self.entry.identifier = doc_decl.group(1) 1153 1154 # Test for data declaration 1155 if doc_begin_data.search(line): 1156 self.entry.decl_type = doc_begin_data.group(1) 1157 self.entry.identifier = doc_begin_data.group(2) 1158 # 1159 # Look for a function description 1160 # 1161 elif doc_begin_func.search(line): 1162 self.entry.identifier = doc_begin_func.group(1) 1163 self.entry.decl_type = "function" 1164 # 1165 # We struck out. 1166 # 1167 else: 1168 self.emit_msg(ln, 1169 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") 1170 self.state = state.NORMAL 1171 return 1172 # 1173 # OK, set up for a new kerneldoc entry. 1174 # 1175 self.state = state.BODY 1176 self.entry.identifier = self.entry.identifier.strip(" ") 1177 # if there's no @param blocks need to set up default section here 1178 self.entry.begin_section(ln + 1) 1179 # 1180 # Find the description portion, which *should* be there but 1181 # isn't always. 1182 # (We should be able to capture this from the previous parsing - someday) 1183 # 1184 r = KernRe("[-:](.*)") 1185 if r.search(line): 1186 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1187 self.state = state.DECLARATION 1188 else: 1189 self.entry.declaration_purpose = "" 1190 1191 if not self.entry.declaration_purpose and self.config.wshort_desc: 1192 self.emit_msg(ln, 1193 f"missing initial short description on line:\n{line}") 1194 1195 if not self.entry.identifier and self.entry.decl_type != "enum": 1196 self.emit_msg(ln, 1197 f"wrong kernel-doc identifier on line:\n{line}") 1198 self.state = state.NORMAL 1199 1200 if self.config.verbose: 1201 self.emit_msg(ln, 1202 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1203 warning=False) 1204 # 1205 # Failed to find an identifier. Emit a warning 1206 # 1207 else: 1208 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1209 1210 # 1211 # Helper function to determine if a new section is being started. 1212 # 1213 def is_new_section(self, ln, line): 1214 if doc_sect.search(line): 1215 self.state = state.BODY 1216 # 1217 # Pick out the name of our new section, tweaking it if need be. 1218 # 1219 newsection = doc_sect.group(1) 1220 if newsection.lower() == 'description': 1221 newsection = 'Description' 1222 elif newsection.lower() == 'context': 1223 newsection = 'Context' 1224 self.state = state.SPECIAL_SECTION 1225 elif newsection.lower() in ["@return", "@returns", 1226 "return", "returns"]: 1227 newsection = "Return" 1228 self.state = state.SPECIAL_SECTION 1229 elif newsection[0] == '@': 1230 self.state = state.SPECIAL_SECTION 1231 # 1232 # Initialize the contents, and get the new section going. 1233 # 1234 newcontents = doc_sect.group(2) 1235 if not newcontents: 1236 newcontents = "" 1237 self.dump_section() 1238 self.entry.begin_section(ln, newsection) 1239 self.entry.leading_space = None 1240 1241 self.entry.add_text(newcontents.lstrip()) 1242 return True 1243 return False 1244 1245 # 1246 # Helper function to detect (and effect) the end of a kerneldoc comment. 1247 # 1248 def is_comment_end(self, ln, line): 1249 if doc_end.search(line): 1250 self.dump_section() 1251 1252 # Look for doc_com + <text> + doc_end: 1253 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') 1254 if r.match(line): 1255 self.emit_msg(ln, f"suspicious ending line: {line}") 1256 1257 self.entry.prototype = "" 1258 self.entry.new_start_line = ln + 1 1259 1260 self.state = state.PROTO 1261 return True 1262 return False 1263 1264 1265 def process_decl(self, ln, line): 1266 """ 1267 STATE_DECLARATION: We've seen the beginning of a declaration 1268 """ 1269 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1270 return 1271 # 1272 # Look for anything with the " * " line beginning. 1273 # 1274 if doc_content.search(line): 1275 cont = doc_content.group(1) 1276 # 1277 # A blank line means that we have moved out of the declaration 1278 # part of the comment (without any "special section" parameter 1279 # descriptions). 1280 # 1281 if cont == "": 1282 self.state = state.BODY 1283 # 1284 # Otherwise we have more of the declaration section to soak up. 1285 # 1286 else: 1287 self.entry.declaration_purpose = \ 1288 trim_whitespace(self.entry.declaration_purpose + ' ' + cont) 1289 else: 1290 # Unknown line, ignore 1291 self.emit_msg(ln, f"bad line: {line}") 1292 1293 1294 def process_special(self, ln, line): 1295 """ 1296 STATE_SPECIAL_SECTION: a section ending with a blank line 1297 """ 1298 # 1299 # If we have hit a blank line (only the " * " marker), then this 1300 # section is done. 1301 # 1302 if KernRe(r"\s*\*\s*$").match(line): 1303 self.entry.begin_section(ln, dump = True) 1304 self.state = state.BODY 1305 return 1306 # 1307 # Not a blank line, look for the other ways to end the section. 1308 # 1309 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1310 return 1311 # 1312 # OK, we should have a continuation of the text for this section. 1313 # 1314 if doc_content.search(line): 1315 cont = doc_content.group(1) 1316 # 1317 # If the lines of text after the first in a special section have 1318 # leading white space, we need to trim it out or Sphinx will get 1319 # confused. For the second line (the None case), see what we 1320 # find there and remember it. 1321 # 1322 if self.entry.leading_space is None: 1323 r = KernRe(r'^(\s+)') 1324 if r.match(cont): 1325 self.entry.leading_space = len(r.group(1)) 1326 else: 1327 self.entry.leading_space = 0 1328 # 1329 # Otherwise, before trimming any leading chars, be *sure* 1330 # that they are white space. We should maybe warn if this 1331 # isn't the case. 1332 # 1333 for i in range(0, self.entry.leading_space): 1334 if cont[i] != " ": 1335 self.entry.leading_space = i 1336 break 1337 # 1338 # Add the trimmed result to the section and we're done. 1339 # 1340 self.entry.add_text(cont[self.entry.leading_space:]) 1341 else: 1342 # Unknown line, ignore 1343 self.emit_msg(ln, f"bad line: {line}") 1344 1345 def process_body(self, ln, line): 1346 """ 1347 STATE_BODY: the bulk of a kerneldoc comment. 1348 """ 1349 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1350 return 1351 1352 if doc_content.search(line): 1353 cont = doc_content.group(1) 1354 self.entry.add_text(cont) 1355 else: 1356 # Unknown line, ignore 1357 self.emit_msg(ln, f"bad line: {line}") 1358 1359 def process_inline_name(self, ln, line): 1360 """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" 1361 1362 if doc_inline_sect.search(line): 1363 self.entry.begin_section(ln, doc_inline_sect.group(1)) 1364 self.entry.add_text(doc_inline_sect.group(2).lstrip()) 1365 self.state = state.INLINE_TEXT 1366 elif doc_inline_end.search(line): 1367 self.dump_section() 1368 self.state = state.PROTO 1369 elif doc_content.search(line): 1370 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") 1371 self.state = state.PROTO 1372 # else ... ?? 1373 1374 def process_inline_text(self, ln, line): 1375 """STATE_INLINE_TEXT: docbook comments within a prototype.""" 1376 1377 if doc_inline_end.search(line): 1378 self.dump_section() 1379 self.state = state.PROTO 1380 elif doc_content.search(line): 1381 self.entry.add_text(doc_content.group(1)) 1382 # else ... ?? 1383 1384 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1385 """ 1386 Handle syscall definitions 1387 """ 1388 1389 is_void = False 1390 1391 # Strip newlines/CR's 1392 proto = re.sub(r'[\r\n]+', ' ', proto) 1393 1394 # Check if it's a SYSCALL_DEFINE0 1395 if 'SYSCALL_DEFINE0' in proto: 1396 is_void = True 1397 1398 # Replace SYSCALL_DEFINE with correct return type & function name 1399 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1400 1401 r = KernRe(r'long\s+(sys_.*?),') 1402 if r.search(proto): 1403 proto = KernRe(',').sub('(', proto, count=1) 1404 elif is_void: 1405 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1406 1407 # Now delete all of the odd-numbered commas in the proto 1408 # so that argument types & names don't have a comma between them 1409 count = 0 1410 length = len(proto) 1411 1412 if is_void: 1413 length = 0 # skip the loop if is_void 1414 1415 for ix in range(length): 1416 if proto[ix] == ',': 1417 count += 1 1418 if count % 2 == 1: 1419 proto = proto[:ix] + ' ' + proto[ix + 1:] 1420 1421 return proto 1422 1423 def tracepoint_munge(self, ln, proto): 1424 """ 1425 Handle tracepoint definitions 1426 """ 1427 1428 tracepointname = None 1429 tracepointargs = None 1430 1431 # Match tracepoint name based on different patterns 1432 r = KernRe(r'TRACE_EVENT\((.*?),') 1433 if r.search(proto): 1434 tracepointname = r.group(1) 1435 1436 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1437 if r.search(proto): 1438 tracepointname = r.group(1) 1439 1440 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1441 if r.search(proto): 1442 tracepointname = r.group(2) 1443 1444 if tracepointname: 1445 tracepointname = tracepointname.lstrip() 1446 1447 r = KernRe(r'TP_PROTO\((.*?)\)') 1448 if r.search(proto): 1449 tracepointargs = r.group(1) 1450 1451 if not tracepointname or not tracepointargs: 1452 self.emit_msg(ln, 1453 f"Unrecognized tracepoint format:\n{proto}\n") 1454 else: 1455 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1456 self.entry.identifier = f"trace_{self.entry.identifier}" 1457 1458 return proto 1459 1460 def process_proto_function(self, ln, line): 1461 """Ancillary routine to process a function prototype""" 1462 1463 # strip C99-style comments to end of line 1464 line = KernRe(r"\/\/.*$", re.S).sub('', line) 1465 # 1466 # Soak up the line's worth of prototype text, stopping at { or ; if present. 1467 # 1468 if KernRe(r'\s*#\s*define').match(line): 1469 self.entry.prototype = line 1470 elif not line.startswith('#'): # skip other preprocessor stuff 1471 r = KernRe(r'([^\{]*)') 1472 if r.match(line): 1473 self.entry.prototype += r.group(1) + " " 1474 # 1475 # If we now have the whole prototype, clean it up and declare victory. 1476 # 1477 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1478 # strip comments and surrounding spaces 1479 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() 1480 # 1481 # Handle self.entry.prototypes for function pointers like: 1482 # int (*pcs_config)(struct foo) 1483 # by turning it into 1484 # int pcs_config(struct foo) 1485 # 1486 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1487 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1488 # 1489 # Handle special declaration syntaxes 1490 # 1491 if 'SYSCALL_DEFINE' in self.entry.prototype: 1492 self.entry.prototype = self.syscall_munge(ln, 1493 self.entry.prototype) 1494 else: 1495 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1496 if r.search(self.entry.prototype): 1497 self.entry.prototype = self.tracepoint_munge(ln, 1498 self.entry.prototype) 1499 # 1500 # ... and we're done 1501 # 1502 self.dump_function(ln, self.entry.prototype) 1503 self.reset_state(ln) 1504 1505 def process_proto_type(self, ln, line): 1506 """Ancillary routine to process a type""" 1507 1508 # Strip C99-style comments and surrounding whitespace 1509 line = KernRe(r"//.*$", re.S).sub('', line).strip() 1510 if not line: 1511 return # nothing to see here 1512 1513 # To distinguish preprocessor directive from regular declaration later. 1514 if line.startswith('#'): 1515 line += ";" 1516 # 1517 # Split the declaration on any of { } or ;, and accumulate pieces 1518 # until we hit a semicolon while not inside {brackets} 1519 # 1520 r = KernRe(r'(.*?)([{};])') 1521 for chunk in r.split(line): 1522 if chunk: # Ignore empty matches 1523 self.entry.prototype += chunk 1524 # 1525 # This cries out for a match statement ... someday after we can 1526 # drop Python 3.9 ... 1527 # 1528 if chunk == '{': 1529 self.entry.brcount += 1 1530 elif chunk == '}': 1531 self.entry.brcount -= 1 1532 elif chunk == ';' and self.entry.brcount <= 0: 1533 self.dump_declaration(ln, self.entry.prototype) 1534 self.reset_state(ln) 1535 return 1536 # 1537 # We hit the end of the line while still in the declaration; put 1538 # in a space to represent the newline. 1539 # 1540 self.entry.prototype += ' ' 1541 1542 def process_proto(self, ln, line): 1543 """STATE_PROTO: reading a function/whatever prototype.""" 1544 1545 if doc_inline_oneline.search(line): 1546 self.entry.begin_section(ln, doc_inline_oneline.group(1)) 1547 self.entry.add_text(doc_inline_oneline.group(2)) 1548 self.dump_section() 1549 1550 elif doc_inline_start.search(line): 1551 self.state = state.INLINE_NAME 1552 1553 elif self.entry.decl_type == 'function': 1554 self.process_proto_function(ln, line) 1555 1556 else: 1557 self.process_proto_type(ln, line) 1558 1559 def process_docblock(self, ln, line): 1560 """STATE_DOCBLOCK: within a DOC: block.""" 1561 1562 if doc_end.search(line): 1563 self.dump_section() 1564 self.output_declaration("doc", self.entry.identifier) 1565 self.reset_state(ln) 1566 1567 elif doc_content.search(line): 1568 self.entry.add_text(doc_content.group(1)) 1569 1570 def parse_export(self): 1571 """ 1572 Parses EXPORT_SYMBOL* macros from a single Kernel source file. 1573 """ 1574 1575 export_table = set() 1576 1577 try: 1578 with open(self.fname, "r", encoding="utf8", 1579 errors="backslashreplace") as fp: 1580 1581 for line in fp: 1582 self.process_export(export_table, line) 1583 1584 except IOError: 1585 return None 1586 1587 return export_table 1588 1589 # 1590 # The state/action table telling us which function to invoke in 1591 # each state. 1592 # 1593 state_actions = { 1594 state.NORMAL: process_normal, 1595 state.NAME: process_name, 1596 state.BODY: process_body, 1597 state.DECLARATION: process_decl, 1598 state.SPECIAL_SECTION: process_special, 1599 state.INLINE_NAME: process_inline_name, 1600 state.INLINE_TEXT: process_inline_text, 1601 state.PROTO: process_proto, 1602 state.DOCBLOCK: process_docblock, 1603 } 1604 1605 def parse_kdoc(self): 1606 """ 1607 Open and process each line of a C source file. 1608 The parsing is controlled via a state machine, and the line is passed 1609 to a different process function depending on the state. The process 1610 function may update the state as needed. 1611 1612 Besides parsing kernel-doc tags, it also parses export symbols. 1613 """ 1614 1615 prev = "" 1616 prev_ln = None 1617 export_table = set() 1618 1619 try: 1620 with open(self.fname, "r", encoding="utf8", 1621 errors="backslashreplace") as fp: 1622 for ln, line in enumerate(fp): 1623 1624 line = line.expandtabs().strip("\n") 1625 1626 # Group continuation lines on prototypes 1627 if self.state == state.PROTO: 1628 if line.endswith("\\"): 1629 prev += line.rstrip("\\") 1630 if not prev_ln: 1631 prev_ln = ln 1632 continue 1633 1634 if prev: 1635 ln = prev_ln 1636 line = prev + line 1637 prev = "" 1638 prev_ln = None 1639 1640 self.config.log.debug("%d %s: %s", 1641 ln, state.name[self.state], 1642 line) 1643 1644 # This is an optimization over the original script. 1645 # There, when export_file was used for the same file, 1646 # it was read twice. Here, we use the already-existing 1647 # loop to parse exported symbols as well. 1648 # 1649 if (self.state != state.NORMAL) or \ 1650 not self.process_export(export_table, line): 1651 # Hand this line to the appropriate state handler 1652 self.state_actions[self.state](self, ln, line) 1653 1654 except OSError: 1655 self.config.log.error(f"Error: Cannot open file {self.fname}") 1656 1657 return export_table, self.entries 1658