1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8kdoc_parser 9=========== 10 11Read a C language source or header FILE and extract embedded 12documentation comments 13""" 14 15import re 16from pprint import pformat 17 18from kdoc_re import NestedMatch, KernRe 19 20 21# 22# Regular expressions used to parse kernel-doc markups at KernelDoc class. 23# 24# Let's declare them in lowercase outside any class to make easier to 25# convert from the python script. 26# 27# As those are evaluated at the beginning, no need to cache them 28# 29 30# Allow whitespace at end of comment start. 31doc_start = KernRe(r'^/\*\*\s*$', cache=False) 32 33doc_end = KernRe(r'\*/', cache=False) 34doc_com = KernRe(r'\s*\*\s*', cache=False) 35doc_com_body = KernRe(r'\s*\* ?', cache=False) 36doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 37 38# @params and a strictly limited set of supported section names 39# Specifically: 40# Match @word: 41# @...: 42# @{section-name}: 43# while trying to not match literal block starts like "example::" 44# 45doc_sect = doc_com + \ 46 KernRe(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', 47 flags=re.I, cache=False) 48 49doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 50doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 51doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 52doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 53doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 54doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) 55attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", 56 flags=re.I | re.S, cache=False) 57 58export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 59export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 60 61type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 62 63class state: 64 """ 65 State machine enums 66 """ 67 68 # Parser states 69 NORMAL = 0 # normal code 70 NAME = 1 # looking for function name 71 BODY_MAYBE = 2 # body - or maybe more description 72 BODY = 3 # the body of the comment 73 BODY_WITH_BLANK_LINE = 4 # the body which has a blank line 74 PROTO = 5 # scanning prototype 75 DOCBLOCK = 6 # documentation block 76 INLINE = 7 # gathering doc outside main block 77 78 name = [ 79 "NORMAL", 80 "NAME", 81 "BODY_MAYBE", 82 "BODY", 83 "BODY_WITH_BLANK_LINE", 84 "PROTO", 85 "DOCBLOCK", 86 "INLINE", 87 ] 88 89 # Inline documentation state 90 INLINE_NA = 0 # not applicable ($state != INLINE) 91 INLINE_NAME = 1 # looking for member name (@foo:) 92 INLINE_TEXT = 2 # looking for member documentation 93 INLINE_END = 3 # done 94 INLINE_ERROR = 4 # error - Comment without header was found. 95 # Spit a warning as it's not 96 # proper kernel-doc and ignore the rest. 97 98 inline_name = [ 99 "", 100 "_NAME", 101 "_TEXT", 102 "_END", 103 "_ERROR", 104 ] 105 106SECTION_DEFAULT = "Description" # default section 107 108class KernelEntry: 109 110 def __init__(self, config, ln): 111 self.config = config 112 113 self.contents = "" 114 self.function = "" 115 self.sectcheck = "" 116 self.struct_actual = "" 117 self.prototype = "" 118 119 self.warnings = [] 120 121 self.parameterlist = [] 122 self.parameterdescs = {} 123 self.parametertypes = {} 124 self.parameterdesc_start_lines = {} 125 126 self.section_start_lines = {} 127 self.sectionlist = [] 128 self.sections = {} 129 130 self.anon_struct_union = False 131 132 self.leading_space = None 133 134 # State flags 135 self.brcount = 0 136 137 self.in_doc_sect = False 138 self.declaration_start_line = ln + 1 139 140 # TODO: rename to emit_message after removal of kernel-doc.pl 141 def emit_msg(self, log_msg, warning=True): 142 """Emit a message""" 143 144 if not warning: 145 self.config.log.info(log_msg) 146 return 147 148 # Delegate warning output to output logic, as this way it 149 # will report warnings/info only for symbols that are output 150 151 self.warnings.append(log_msg) 152 return 153 154 def dump_section(self, start_new=True): 155 """ 156 Dumps section contents to arrays/hashes intended for that purpose. 157 """ 158 159 name = self.section 160 contents = self.contents 161 162 if type_param.match(name): 163 name = type_param.group(1) 164 165 self.parameterdescs[name] = contents 166 self.parameterdesc_start_lines[name] = self.new_start_line 167 168 self.sectcheck += name + " " 169 self.new_start_line = 0 170 171 elif name == "@...": 172 name = "..." 173 self.parameterdescs[name] = contents 174 self.sectcheck += name + " " 175 self.parameterdesc_start_lines[name] = self.new_start_line 176 self.new_start_line = 0 177 178 else: 179 if name in self.sections and self.sections[name] != "": 180 # Only warn on user-specified duplicate section names 181 if name != SECTION_DEFAULT: 182 self.emit_msg(self.new_start_line, 183 f"duplicate section name '{name}'\n") 184 self.sections[name] += contents 185 else: 186 self.sections[name] = contents 187 self.sectionlist.append(name) 188 self.section_start_lines[name] = self.new_start_line 189 self.new_start_line = 0 190 191# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 192 193 if start_new: 194 self.section = SECTION_DEFAULT 195 self.contents = "" 196 197 198class KernelDoc: 199 """ 200 Read a C language source or header FILE and extract embedded 201 documentation comments. 202 """ 203 204 # Section names 205 206 section_intro = "Introduction" 207 section_context = "Context" 208 section_return = "Return" 209 210 undescribed = "-- undescribed --" 211 212 def __init__(self, config, fname): 213 """Initialize internal variables""" 214 215 self.fname = fname 216 self.config = config 217 218 # Initial state for the state machines 219 self.state = state.NORMAL 220 self.inline_doc_state = state.INLINE_NA 221 222 # Store entry currently being processed 223 self.entry = None 224 225 # Place all potential outputs into an array 226 self.entries = [] 227 228 def emit_msg(self, ln, msg, warning=True): 229 """Emit a message""" 230 231 log_msg = f"{self.fname}:{ln} {msg}" 232 233 if self.entry: 234 self.entry.emit_msg(log_msg, warning) 235 return 236 237 if warning: 238 self.config.log.warning(log_msg) 239 else: 240 self.config.log.info(log_msg) 241 242 def dump_section(self, start_new=True): 243 """ 244 Dumps section contents to arrays/hashes intended for that purpose. 245 """ 246 247 if self.entry: 248 self.entry.dump_section(start_new) 249 250 # TODO: rename it to store_declaration after removal of kernel-doc.pl 251 def output_declaration(self, dtype, name, **args): 252 """ 253 Stores the entry into an entry array. 254 255 The actual output and output filters will be handled elsewhere 256 """ 257 258 # The implementation here is different than the original kernel-doc: 259 # instead of checking for output filters or actually output anything, 260 # it just stores the declaration content at self.entries, as the 261 # output will happen on a separate class. 262 # 263 # For now, we're keeping the same name of the function just to make 264 # easier to compare the source code of both scripts 265 266 args["declaration_start_line"] = self.entry.declaration_start_line 267 args["type"] = dtype 268 args["warnings"] = self.entry.warnings 269 270 # TODO: use colletions.OrderedDict to remove sectionlist 271 272 sections = args.get('sections', {}) 273 sectionlist = args.get('sectionlist', []) 274 275 # Drop empty sections 276 # TODO: improve empty sections logic to emit warnings 277 for section in ["Description", "Return"]: 278 if section in sectionlist: 279 if not sections[section].rstrip(): 280 del sections[section] 281 sectionlist.remove(section) 282 283 self.entries.append((name, args)) 284 285 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 286 287 def reset_state(self, ln): 288 """ 289 Ancillary routine to create a new entry. It initializes all 290 variables used by the state machine. 291 """ 292 293 self.entry = KernelEntry(self.config, ln) 294 295 # State flags 296 self.state = state.NORMAL 297 self.inline_doc_state = state.INLINE_NA 298 299 def push_parameter(self, ln, decl_type, param, dtype, 300 org_arg, declaration_name): 301 """ 302 Store parameters and their descriptions at self.entry. 303 """ 304 305 if self.entry.anon_struct_union and dtype == "" and param == "}": 306 return # Ignore the ending }; from anonymous struct/union 307 308 self.entry.anon_struct_union = False 309 310 param = KernRe(r'[\[\)].*').sub('', param, count=1) 311 312 if dtype == "" and param.endswith("..."): 313 if KernRe(r'\w\.\.\.$').search(param): 314 # For named variable parameters of the form `x...`, 315 # remove the dots 316 param = param[:-3] 317 else: 318 # Handles unnamed variable parameters 319 param = "..." 320 321 if param not in self.entry.parameterdescs or \ 322 not self.entry.parameterdescs[param]: 323 324 self.entry.parameterdescs[param] = "variable arguments" 325 326 elif dtype == "" and (not param or param == "void"): 327 param = "void" 328 self.entry.parameterdescs[param] = "no arguments" 329 330 elif dtype == "" and param in ["struct", "union"]: 331 # Handle unnamed (anonymous) union or struct 332 dtype = param 333 param = "{unnamed_" + param + "}" 334 self.entry.parameterdescs[param] = "anonymous\n" 335 self.entry.anon_struct_union = True 336 337 # Handle cache group enforcing variables: they do not need 338 # to be described in header files 339 elif "__cacheline_group" in param: 340 # Ignore __cacheline_group_begin and __cacheline_group_end 341 return 342 343 # Warn if parameter has no description 344 # (but ignore ones starting with # as these are not parameters 345 # but inline preprocessor statements) 346 if param not in self.entry.parameterdescs and not param.startswith("#"): 347 self.entry.parameterdescs[param] = self.undescribed 348 349 if "." not in param: 350 if decl_type == 'function': 351 dname = f"{decl_type} parameter" 352 else: 353 dname = f"{decl_type} member" 354 355 self.emit_msg(ln, 356 f"{dname} '{param}' not described in '{declaration_name}'") 357 358 # Strip spaces from param so that it is one continuous string on 359 # parameterlist. This fixes a problem where check_sections() 360 # cannot find a parameter like "addr[6 + 2]" because it actually 361 # appears as "addr[6", "+", "2]" on the parameter list. 362 # However, it's better to maintain the param string unchanged for 363 # output, so just weaken the string compare in check_sections() 364 # to ignore "[blah" in a parameter string. 365 366 self.entry.parameterlist.append(param) 367 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 368 self.entry.parametertypes[param] = org_arg 369 370 def save_struct_actual(self, actual): 371 """ 372 Strip all spaces from the actual param so that it looks like 373 one string item. 374 """ 375 376 actual = KernRe(r'\s*').sub("", actual, count=1) 377 378 self.entry.struct_actual += actual + " " 379 380 def create_parameter_list(self, ln, decl_type, args, 381 splitter, declaration_name): 382 """ 383 Creates a list of parameters, storing them at self.entry. 384 """ 385 386 # temporarily replace all commas inside function pointer definition 387 arg_expr = KernRe(r'(\([^\),]+),') 388 while arg_expr.search(args): 389 args = arg_expr.sub(r"\1#", args) 390 391 for arg in args.split(splitter): 392 # Strip comments 393 arg = KernRe(r'\/\*.*\*\/').sub('', arg) 394 395 # Ignore argument attributes 396 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 397 398 # Strip leading/trailing spaces 399 arg = arg.strip() 400 arg = KernRe(r'\s+').sub(' ', arg, count=1) 401 402 if arg.startswith('#'): 403 # Treat preprocessor directive as a typeless variable just to fill 404 # corresponding data structures "correctly". Catch it later in 405 # output_* subs. 406 407 # Treat preprocessor directive as a typeless variable 408 self.push_parameter(ln, decl_type, arg, "", 409 "", declaration_name) 410 411 elif KernRe(r'\(.+\)\s*\(').search(arg): 412 # Pointer-to-function 413 414 arg = arg.replace('#', ',') 415 416 r = KernRe(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') 417 if r.match(arg): 418 param = r.group(1) 419 else: 420 self.emit_msg(ln, f"Invalid param: {arg}") 421 param = arg 422 423 dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) 424 self.save_struct_actual(param) 425 self.push_parameter(ln, decl_type, param, dtype, 426 arg, declaration_name) 427 428 elif KernRe(r'\(.+\)\s*\[').search(arg): 429 # Array-of-pointers 430 431 arg = arg.replace('#', ',') 432 r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') 433 if r.match(arg): 434 param = r.group(1) 435 else: 436 self.emit_msg(ln, f"Invalid param: {arg}") 437 param = arg 438 439 dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) 440 441 self.save_struct_actual(param) 442 self.push_parameter(ln, decl_type, param, dtype, 443 arg, declaration_name) 444 445 elif arg: 446 arg = KernRe(r'\s*:\s*').sub(":", arg) 447 arg = KernRe(r'\s*\[').sub('[', arg) 448 449 args = KernRe(r'\s*,\s*').split(arg) 450 if args[0] and '*' in args[0]: 451 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 452 453 first_arg = [] 454 r = KernRe(r'^(.*\s+)(.*?\[.*\].*)$') 455 if args[0] and r.match(args[0]): 456 args.pop(0) 457 first_arg.extend(r.group(1)) 458 first_arg.append(r.group(2)) 459 else: 460 first_arg = KernRe(r'\s+').split(args.pop(0)) 461 462 args.insert(0, first_arg.pop()) 463 dtype = ' '.join(first_arg) 464 465 for param in args: 466 if KernRe(r'^(\*+)\s*(.*)').match(param): 467 r = KernRe(r'^(\*+)\s*(.*)') 468 if not r.match(param): 469 self.emit_msg(ln, f"Invalid param: {param}") 470 continue 471 472 param = r.group(1) 473 474 self.save_struct_actual(r.group(2)) 475 self.push_parameter(ln, decl_type, r.group(2), 476 f"{dtype} {r.group(1)}", 477 arg, declaration_name) 478 479 elif KernRe(r'(.*?):(\w+)').search(param): 480 r = KernRe(r'(.*?):(\w+)') 481 if not r.match(param): 482 self.emit_msg(ln, f"Invalid param: {param}") 483 continue 484 485 if dtype != "": # Skip unnamed bit-fields 486 self.save_struct_actual(r.group(1)) 487 self.push_parameter(ln, decl_type, r.group(1), 488 f"{dtype}:{r.group(2)}", 489 arg, declaration_name) 490 else: 491 self.save_struct_actual(param) 492 self.push_parameter(ln, decl_type, param, dtype, 493 arg, declaration_name) 494 495 def check_sections(self, ln, decl_name, decl_type, sectcheck, prmscheck): 496 """ 497 Check for errors inside sections, emitting warnings if not found 498 parameters are described. 499 """ 500 501 sects = sectcheck.split() 502 prms = prmscheck.split() 503 err = False 504 505 for sx in range(len(sects)): # pylint: disable=C0200 506 err = True 507 for px in range(len(prms)): # pylint: disable=C0200 508 prm_clean = prms[px] 509 prm_clean = KernRe(r'\[.*\]').sub('', prm_clean) 510 prm_clean = attribute.sub('', prm_clean) 511 512 # ignore array size in a parameter string; 513 # however, the original param string may contain 514 # spaces, e.g.: addr[6 + 2] 515 # and this appears in @prms as "addr[6" since the 516 # parameter list is split at spaces; 517 # hence just ignore "[..." for the sections check; 518 prm_clean = KernRe(r'\[.*').sub('', prm_clean) 519 520 if prm_clean == sects[sx]: 521 err = False 522 break 523 524 if err: 525 if decl_type == 'function': 526 dname = f"{decl_type} parameter" 527 else: 528 dname = f"{decl_type} member" 529 530 self.emit_msg(ln, 531 f"Excess {dname} '{sects[sx]}' description in '{decl_name}'") 532 533 def check_return_section(self, ln, declaration_name, return_type): 534 """ 535 If the function doesn't return void, warns about the lack of a 536 return description. 537 """ 538 539 if not self.config.wreturn: 540 return 541 542 # Ignore an empty return type (It's a macro) 543 # Ignore functions with a "void" return type (but not "void *") 544 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 545 return 546 547 if not self.entry.sections.get("Return", None): 548 self.emit_msg(ln, 549 f"No description found for return value of '{declaration_name}'") 550 551 def dump_struct(self, ln, proto): 552 """ 553 Store an entry for an struct or union 554 """ 555 556 type_pattern = r'(struct|union)' 557 558 qualifiers = [ 559 "__attribute__", 560 "__packed", 561 "__aligned", 562 "____cacheline_aligned_in_smp", 563 "____cacheline_aligned", 564 ] 565 566 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 567 struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') 568 569 # Extract struct/union definition 570 members = None 571 declaration_name = None 572 decl_type = None 573 574 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 575 if r.search(proto): 576 decl_type = r.group(1) 577 declaration_name = r.group(2) 578 members = r.group(3) 579 else: 580 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 581 582 if r.search(proto): 583 decl_type = r.group(1) 584 declaration_name = r.group(3) 585 members = r.group(2) 586 587 if not members: 588 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 589 return 590 591 if self.entry.identifier != declaration_name: 592 self.emit_msg(ln, 593 f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") 594 return 595 596 args_pattern = r'([^,)]+)' 597 598 sub_prefixes = [ 599 (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), 600 (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''), 601 602 # Strip comments 603 (KernRe(r'\/\*.*?\*\/', re.S), ''), 604 605 # Strip attributes 606 (attribute, ' '), 607 (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), 608 (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), 609 (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), 610 (KernRe(r'\s*__packed\s*', re.S), ' '), 611 (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), 612 (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), 613 (KernRe(r'\s*____cacheline_aligned', re.S), ' '), 614 615 # Unwrap struct_group macros based on this definition: 616 # __struct_group(TAG, NAME, ATTRS, MEMBERS...) 617 # which has variants like: struct_group(NAME, MEMBERS...) 618 # Only MEMBERS arguments require documentation. 619 # 620 # Parsing them happens on two steps: 621 # 622 # 1. drop struct group arguments that aren't at MEMBERS, 623 # storing them as STRUCT_GROUP(MEMBERS) 624 # 625 # 2. remove STRUCT_GROUP() ancillary macro. 626 # 627 # The original logic used to remove STRUCT_GROUP() using an 628 # advanced regex: 629 # 630 # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; 631 # 632 # with two patterns that are incompatible with 633 # Python re module, as it has: 634 # 635 # - a recursive pattern: (?1) 636 # - an atomic grouping: (?>...) 637 # 638 # I tried a simpler version: but it didn't work either: 639 # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; 640 # 641 # As it doesn't properly match the end parenthesis on some cases. 642 # 643 # So, a better solution was crafted: there's now a NestedMatch 644 # class that ensures that delimiters after a search are properly 645 # matched. So, the implementation to drop STRUCT_GROUP() will be 646 # handled in separate. 647 648 (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), 649 (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), 650 (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), 651 (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), 652 653 # Replace macros 654 # 655 # TODO: use NestedMatch for FOO($1, $2, ...) matches 656 # 657 # it is better to also move those to the NestedMatch logic, 658 # to ensure that parenthesis will be properly matched. 659 660 (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), 661 (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), 662 (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), 663 (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), 664 (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), 665 (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), 666 (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), 667 (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), 668 (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), 669 (KernRe(r'VIRTIO_DECLARE_FEATURES\s*\(' + args_pattern + r'\)', re.S), r'u64 \1; u64 \1_array[VIRTIO_FEATURES_DWORDS]'), 670 ] 671 672 # Regexes here are guaranteed to have the end limiter matching 673 # the start delimiter. Yet, right now, only one replace group 674 # is allowed. 675 676 sub_nested_prefixes = [ 677 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), 678 ] 679 680 for search, sub in sub_prefixes: 681 members = search.sub(sub, members) 682 683 nested = NestedMatch() 684 685 for search, sub in sub_nested_prefixes: 686 members = nested.sub(search, sub, members) 687 688 # Keeps the original declaration as-is 689 declaration = members 690 691 # Split nested struct/union elements 692 # 693 # This loop was simpler at the original kernel-doc perl version, as 694 # while ($members =~ m/$struct_members/) { ... } 695 # reads 'members' string on each interaction. 696 # 697 # Python behavior is different: it parses 'members' only once, 698 # creating a list of tuples from the first interaction. 699 # 700 # On other words, this won't get nested structs. 701 # 702 # So, we need to have an extra loop on Python to override such 703 # re limitation. 704 705 while True: 706 tuples = struct_members.findall(members) 707 if not tuples: 708 break 709 710 for t in tuples: 711 newmember = "" 712 maintype = t[0] 713 s_ids = t[5] 714 content = t[3] 715 716 oldmember = "".join(t) 717 718 for s_id in s_ids.split(','): 719 s_id = s_id.strip() 720 721 newmember += f"{maintype} {s_id}; " 722 s_id = KernRe(r'[:\[].*').sub('', s_id) 723 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 724 725 for arg in content.split(';'): 726 arg = arg.strip() 727 728 if not arg: 729 continue 730 731 r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') 732 if r.match(arg): 733 # Pointer-to-function 734 dtype = r.group(1) 735 name = r.group(2) 736 extra = r.group(3) 737 738 if not name: 739 continue 740 741 if not s_id: 742 # Anonymous struct/union 743 newmember += f"{dtype}{name}{extra}; " 744 else: 745 newmember += f"{dtype}{s_id}.{name}{extra}; " 746 747 else: 748 arg = arg.strip() 749 # Handle bitmaps 750 arg = KernRe(r':\s*\d+\s*').sub('', arg) 751 752 # Handle arrays 753 arg = KernRe(r'\[.*\]').sub('', arg) 754 755 # Handle multiple IDs 756 arg = KernRe(r'\s*,\s*').sub(',', arg) 757 758 r = KernRe(r'(.*)\s+([\S+,]+)') 759 760 if r.search(arg): 761 dtype = r.group(1) 762 names = r.group(2) 763 else: 764 newmember += f"{arg}; " 765 continue 766 767 for name in names.split(','): 768 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() 769 770 if not name: 771 continue 772 773 if not s_id: 774 # Anonymous struct/union 775 newmember += f"{dtype} {name}; " 776 else: 777 newmember += f"{dtype} {s_id}.{name}; " 778 779 members = members.replace(oldmember, newmember) 780 781 # Ignore other nested elements, like enums 782 members = re.sub(r'(\{[^\{\}]*\})', '', members) 783 784 self.create_parameter_list(ln, decl_type, members, ';', 785 declaration_name) 786 self.check_sections(ln, declaration_name, decl_type, 787 self.entry.sectcheck, self.entry.struct_actual) 788 789 # Adjust declaration for better display 790 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 791 declaration = KernRe(r'\}\s+;').sub('};', declaration) 792 793 # Better handle inlined enums 794 while True: 795 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 796 if not r.search(declaration): 797 break 798 799 declaration = r.sub(r'\1,\n\2', declaration) 800 801 def_args = declaration.split('\n') 802 level = 1 803 declaration = "" 804 for clause in def_args: 805 806 clause = clause.strip() 807 clause = KernRe(r'\s+').sub(' ', clause, count=1) 808 809 if not clause: 810 continue 811 812 if '}' in clause and level > 1: 813 level -= 1 814 815 if not KernRe(r'^\s*#').match(clause): 816 declaration += "\t" * level 817 818 declaration += "\t" + clause + "\n" 819 if "{" in clause and "}" not in clause: 820 level += 1 821 822 self.output_declaration(decl_type, declaration_name, 823 struct=declaration_name, 824 definition=declaration, 825 parameterlist=self.entry.parameterlist, 826 parameterdescs=self.entry.parameterdescs, 827 parametertypes=self.entry.parametertypes, 828 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 829 sectionlist=self.entry.sectionlist, 830 sections=self.entry.sections, 831 section_start_lines=self.entry.section_start_lines, 832 purpose=self.entry.declaration_purpose) 833 834 def dump_enum(self, ln, proto): 835 """ 836 Stores an enum inside self.entries array. 837 """ 838 839 # Ignore members marked private 840 proto = KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) 841 proto = KernRe(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) 842 843 # Strip comments 844 proto = KernRe(r'\/\*.*?\*\/', flags=re.S).sub('', proto) 845 846 # Strip #define macros inside enums 847 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 848 849 members = None 850 declaration_name = None 851 852 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 853 if r.search(proto): 854 declaration_name = r.group(2) 855 members = r.group(1).rstrip() 856 else: 857 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 858 if r.match(proto): 859 declaration_name = r.group(1) 860 members = r.group(2).rstrip() 861 862 if not members: 863 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 864 return 865 866 if self.entry.identifier != declaration_name: 867 if self.entry.identifier == "": 868 self.emit_msg(ln, 869 f"{proto}: wrong kernel-doc identifier on prototype") 870 else: 871 self.emit_msg(ln, 872 f"expecting prototype for enum {self.entry.identifier}. Prototype was for enum {declaration_name} instead") 873 return 874 875 if not declaration_name: 876 declaration_name = "(anonymous)" 877 878 member_set = set() 879 880 members = KernRe(r'\([^;]*?[\)]').sub('', members) 881 882 for arg in members.split(','): 883 if not arg: 884 continue 885 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 886 self.entry.parameterlist.append(arg) 887 if arg not in self.entry.parameterdescs: 888 self.entry.parameterdescs[arg] = self.undescribed 889 self.emit_msg(ln, 890 f"Enum value '{arg}' not described in enum '{declaration_name}'") 891 member_set.add(arg) 892 893 for k in self.entry.parameterdescs: 894 if k not in member_set: 895 self.emit_msg(ln, 896 f"Excess enum value '%{k}' description in '{declaration_name}'") 897 898 self.output_declaration('enum', declaration_name, 899 enum=declaration_name, 900 parameterlist=self.entry.parameterlist, 901 parameterdescs=self.entry.parameterdescs, 902 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 903 sectionlist=self.entry.sectionlist, 904 sections=self.entry.sections, 905 section_start_lines=self.entry.section_start_lines, 906 purpose=self.entry.declaration_purpose) 907 908 def dump_declaration(self, ln, prototype): 909 """ 910 Stores a data declaration inside self.entries array. 911 """ 912 913 if self.entry.decl_type == "enum": 914 self.dump_enum(ln, prototype) 915 return 916 917 if self.entry.decl_type == "typedef": 918 self.dump_typedef(ln, prototype) 919 return 920 921 if self.entry.decl_type in ["union", "struct"]: 922 self.dump_struct(ln, prototype) 923 return 924 925 self.output_declaration(self.entry.decl_type, prototype, 926 entry=self.entry) 927 928 def dump_function(self, ln, prototype): 929 """ 930 Stores a function of function macro inside self.entries array. 931 """ 932 933 func_macro = False 934 return_type = '' 935 decl_type = 'function' 936 937 # Prefixes that would be removed 938 sub_prefixes = [ 939 (r"^static +", "", 0), 940 (r"^extern +", "", 0), 941 (r"^asmlinkage +", "", 0), 942 (r"^inline +", "", 0), 943 (r"^__inline__ +", "", 0), 944 (r"^__inline +", "", 0), 945 (r"^__always_inline +", "", 0), 946 (r"^noinline +", "", 0), 947 (r"^__FORTIFY_INLINE +", "", 0), 948 (r"__init +", "", 0), 949 (r"__init_or_module +", "", 0), 950 (r"__deprecated +", "", 0), 951 (r"__flatten +", "", 0), 952 (r"__meminit +", "", 0), 953 (r"__must_check +", "", 0), 954 (r"__weak +", "", 0), 955 (r"__sched +", "", 0), 956 (r"_noprof", "", 0), 957 (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), 958 (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), 959 (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), 960 (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), 961 (r"__attribute_const__ +", "", 0), 962 963 # It seems that Python support for re.X is broken: 964 # At least for me (Python 3.13), this didn't work 965# (r""" 966# __attribute__\s*\(\( 967# (?: 968# [\w\s]+ # attribute name 969# (?:\([^)]*\))? # attribute arguments 970# \s*,? # optional comma at the end 971# )+ 972# \)\)\s+ 973# """, "", re.X), 974 975 # So, remove whitespaces and comments from it 976 (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), 977 ] 978 979 for search, sub, flags in sub_prefixes: 980 prototype = KernRe(search, flags).sub(sub, prototype) 981 982 # Macros are a special case, as they change the prototype format 983 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 984 if new_proto != prototype: 985 is_define_proto = True 986 prototype = new_proto 987 else: 988 is_define_proto = False 989 990 # Yes, this truly is vile. We are looking for: 991 # 1. Return type (may be nothing if we're looking at a macro) 992 # 2. Function name 993 # 3. Function parameters. 994 # 995 # All the while we have to watch out for function pointer parameters 996 # (which IIRC is what the two sections are for), C types (these 997 # regexps don't even start to express all the possibilities), and 998 # so on. 999 # 1000 # If you mess with these regexps, it's a good idea to check that 1001 # the following functions' documentation still comes out right: 1002 # - parport_register_device (function pointer parameters) 1003 # - atomic_set (macro) 1004 # - pci_match_device, __copy_to_user (long return type) 1005 1006 name = r'[a-zA-Z0-9_~:]+' 1007 prototype_end1 = r'[^\(]*' 1008 prototype_end2 = r'[^\{]*' 1009 prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)' 1010 1011 # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing group. 1012 # So, this needs to be mapped in Python with (?:...)? or (?:...)+ 1013 1014 type1 = r'(?:[\w\s]+)?' 1015 type2 = r'(?:[\w\s]+\*+)+' 1016 1017 found = False 1018 1019 if is_define_proto: 1020 r = KernRe(r'^()(' + name + r')\s+') 1021 1022 if r.search(prototype): 1023 return_type = '' 1024 declaration_name = r.group(2) 1025 func_macro = True 1026 1027 found = True 1028 1029 if not found: 1030 patterns = [ 1031 rf'^()({name})\s*{prototype_end}', 1032 rf'^({type1})\s+({name})\s*{prototype_end}', 1033 rf'^({type2})\s*({name})\s*{prototype_end}', 1034 ] 1035 1036 for p in patterns: 1037 r = KernRe(p) 1038 1039 if r.match(prototype): 1040 1041 return_type = r.group(1) 1042 declaration_name = r.group(2) 1043 args = r.group(3) 1044 1045 self.create_parameter_list(ln, decl_type, args, ',', 1046 declaration_name) 1047 1048 found = True 1049 break 1050 if not found: 1051 self.emit_msg(ln, 1052 f"cannot understand function prototype: '{prototype}'") 1053 return 1054 1055 if self.entry.identifier != declaration_name: 1056 self.emit_msg(ln, 1057 f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") 1058 return 1059 1060 prms = " ".join(self.entry.parameterlist) 1061 self.check_sections(ln, declaration_name, "function", 1062 self.entry.sectcheck, prms) 1063 1064 self.check_return_section(ln, declaration_name, return_type) 1065 1066 if 'typedef' in return_type: 1067 self.output_declaration(decl_type, declaration_name, 1068 function=declaration_name, 1069 typedef=True, 1070 functiontype=return_type, 1071 parameterlist=self.entry.parameterlist, 1072 parameterdescs=self.entry.parameterdescs, 1073 parametertypes=self.entry.parametertypes, 1074 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 1075 sectionlist=self.entry.sectionlist, 1076 sections=self.entry.sections, 1077 section_start_lines=self.entry.section_start_lines, 1078 purpose=self.entry.declaration_purpose, 1079 func_macro=func_macro) 1080 else: 1081 self.output_declaration(decl_type, declaration_name, 1082 function=declaration_name, 1083 typedef=False, 1084 functiontype=return_type, 1085 parameterlist=self.entry.parameterlist, 1086 parameterdescs=self.entry.parameterdescs, 1087 parametertypes=self.entry.parametertypes, 1088 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 1089 sectionlist=self.entry.sectionlist, 1090 sections=self.entry.sections, 1091 section_start_lines=self.entry.section_start_lines, 1092 purpose=self.entry.declaration_purpose, 1093 func_macro=func_macro) 1094 1095 def dump_typedef(self, ln, proto): 1096 """ 1097 Stores a typedef inside self.entries array. 1098 """ 1099 1100 typedef_type = r'((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1101 typedef_ident = r'\*?\s*(\w\S+)\s*' 1102 typedef_args = r'\s*\((.*)\);' 1103 1104 typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1105 typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args) 1106 1107 # Strip comments 1108 proto = KernRe(r'/\*.*?\*/', flags=re.S).sub('', proto) 1109 1110 # Parse function typedef prototypes 1111 for r in [typedef1, typedef2]: 1112 if not r.match(proto): 1113 continue 1114 1115 return_type = r.group(1).strip() 1116 declaration_name = r.group(2) 1117 args = r.group(3) 1118 1119 if self.entry.identifier != declaration_name: 1120 self.emit_msg(ln, 1121 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1122 return 1123 1124 decl_type = 'function' 1125 self.create_parameter_list(ln, decl_type, args, ',', declaration_name) 1126 1127 self.output_declaration(decl_type, declaration_name, 1128 function=declaration_name, 1129 typedef=True, 1130 functiontype=return_type, 1131 parameterlist=self.entry.parameterlist, 1132 parameterdescs=self.entry.parameterdescs, 1133 parametertypes=self.entry.parametertypes, 1134 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 1135 sectionlist=self.entry.sectionlist, 1136 sections=self.entry.sections, 1137 section_start_lines=self.entry.section_start_lines, 1138 purpose=self.entry.declaration_purpose) 1139 return 1140 1141 # Handle nested parentheses or brackets 1142 r = KernRe(r'(\(*.\)\s*|\[*.\]\s*);$') 1143 while r.search(proto): 1144 proto = r.sub('', proto) 1145 1146 # Parse simple typedefs 1147 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1148 if r.match(proto): 1149 declaration_name = r.group(1) 1150 1151 if self.entry.identifier != declaration_name: 1152 self.emit_msg(ln, 1153 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1154 return 1155 1156 self.output_declaration('typedef', declaration_name, 1157 typedef=declaration_name, 1158 sectionlist=self.entry.sectionlist, 1159 sections=self.entry.sections, 1160 section_start_lines=self.entry.section_start_lines, 1161 purpose=self.entry.declaration_purpose) 1162 return 1163 1164 self.emit_msg(ln, "error: Cannot parse typedef!") 1165 1166 @staticmethod 1167 def process_export(function_set, line): 1168 """ 1169 process EXPORT_SYMBOL* tags 1170 1171 This method doesn't use any variable from the class, so declare it 1172 with a staticmethod decorator. 1173 """ 1174 1175 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1176 # multiple export lines would violate Kernel coding style. 1177 1178 if export_symbol.search(line): 1179 symbol = export_symbol.group(2) 1180 function_set.add(symbol) 1181 return 1182 1183 if export_symbol_ns.search(line): 1184 symbol = export_symbol_ns.group(2) 1185 function_set.add(symbol) 1186 1187 def process_normal(self, ln, line): 1188 """ 1189 STATE_NORMAL: looking for the /** to begin everything. 1190 """ 1191 1192 if not doc_start.match(line): 1193 return 1194 1195 # start a new entry 1196 self.reset_state(ln) 1197 self.entry.in_doc_sect = False 1198 1199 # next line is always the function name 1200 self.state = state.NAME 1201 1202 def process_name(self, ln, line): 1203 """ 1204 STATE_NAME: Looking for the "name - description" line 1205 """ 1206 1207 if doc_block.search(line): 1208 self.entry.new_start_line = ln 1209 1210 if not doc_block.group(1): 1211 self.entry.section = self.section_intro 1212 else: 1213 self.entry.section = doc_block.group(1) 1214 1215 self.entry.identifier = self.entry.section 1216 self.state = state.DOCBLOCK 1217 return 1218 1219 if doc_decl.search(line): 1220 self.entry.identifier = doc_decl.group(1) 1221 self.entry.is_kernel_comment = False 1222 1223 decl_start = str(doc_com) # comment block asterisk 1224 fn_type = r"(?:\w+\s*\*\s*)?" # type (for non-functions) 1225 parenthesis = r"(?:\(\w*\))?" # optional parenthesis on function 1226 decl_end = r"(?:[-:].*)" # end of the name part 1227 1228 # test for pointer declaration type, foo * bar() - desc 1229 r = KernRe(fr"^{decl_start}([\w\s]+?){parenthesis}?\s*{decl_end}?$") 1230 if r.search(line): 1231 self.entry.identifier = r.group(1) 1232 1233 # Test for data declaration 1234 r = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)") 1235 if r.search(line): 1236 self.entry.decl_type = r.group(1) 1237 self.entry.identifier = r.group(2) 1238 self.entry.is_kernel_comment = True 1239 else: 1240 # Look for foo() or static void foo() - description; 1241 # or misspelt identifier 1242 1243 r1 = KernRe(fr"^{decl_start}{fn_type}(\w+)\s*{parenthesis}\s*{decl_end}?$") 1244 r2 = KernRe(fr"^{decl_start}{fn_type}(\w+[^-:]*){parenthesis}\s*{decl_end}$") 1245 1246 for r in [r1, r2]: 1247 if r.search(line): 1248 self.entry.identifier = r.group(1) 1249 self.entry.decl_type = "function" 1250 1251 r = KernRe(r"define\s+") 1252 self.entry.identifier = r.sub("", self.entry.identifier) 1253 self.entry.is_kernel_comment = True 1254 break 1255 1256 self.entry.identifier = self.entry.identifier.strip(" ") 1257 1258 self.state = state.BODY 1259 1260 # if there's no @param blocks need to set up default section here 1261 self.entry.section = SECTION_DEFAULT 1262 self.entry.new_start_line = ln + 1 1263 1264 r = KernRe("[-:](.*)") 1265 if r.search(line): 1266 # strip leading/trailing/multiple spaces 1267 self.entry.descr = r.group(1).strip(" ") 1268 1269 r = KernRe(r"\s+") 1270 self.entry.descr = r.sub(" ", self.entry.descr) 1271 self.entry.declaration_purpose = self.entry.descr 1272 self.state = state.BODY_MAYBE 1273 else: 1274 self.entry.declaration_purpose = "" 1275 1276 if not self.entry.is_kernel_comment: 1277 self.emit_msg(ln, 1278 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") 1279 self.state = state.NORMAL 1280 1281 if not self.entry.declaration_purpose and self.config.wshort_desc: 1282 self.emit_msg(ln, 1283 f"missing initial short description on line:\n{line}") 1284 1285 if not self.entry.identifier and self.entry.decl_type != "enum": 1286 self.emit_msg(ln, 1287 f"wrong kernel-doc identifier on line:\n{line}") 1288 self.state = state.NORMAL 1289 1290 if self.config.verbose: 1291 self.emit_msg(ln, 1292 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1293 warning=False) 1294 1295 return 1296 1297 # Failed to find an identifier. Emit a warning 1298 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1299 1300 def process_body(self, ln, line): 1301 """ 1302 STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment. 1303 """ 1304 1305 if self.state == state.BODY_WITH_BLANK_LINE: 1306 r = KernRe(r"\s*\*\s?\S") 1307 if r.match(line): 1308 self.dump_section() 1309 self.entry.section = SECTION_DEFAULT 1310 self.entry.new_start_line = ln 1311 self.entry.contents = "" 1312 1313 if doc_sect.search(line): 1314 self.entry.in_doc_sect = True 1315 newsection = doc_sect.group(1) 1316 1317 if newsection.lower() in ["description", "context"]: 1318 newsection = newsection.title() 1319 1320 # Special case: @return is a section, not a param description 1321 if newsection.lower() in ["@return", "@returns", 1322 "return", "returns"]: 1323 newsection = "Return" 1324 1325 # Perl kernel-doc has a check here for contents before sections. 1326 # the logic there is always false, as in_doc_sect variable is 1327 # always true. So, just don't implement Wcontents_before_sections 1328 1329 # .title() 1330 newcontents = doc_sect.group(2) 1331 if not newcontents: 1332 newcontents = "" 1333 1334 if self.entry.contents.strip("\n"): 1335 self.dump_section() 1336 1337 self.entry.new_start_line = ln 1338 self.entry.section = newsection 1339 self.entry.leading_space = None 1340 1341 self.entry.contents = newcontents.lstrip() 1342 if self.entry.contents: 1343 self.entry.contents += "\n" 1344 1345 self.state = state.BODY 1346 return 1347 1348 if doc_end.search(line): 1349 self.dump_section() 1350 1351 # Look for doc_com + <text> + doc_end: 1352 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') 1353 if r.match(line): 1354 self.emit_msg(ln, f"suspicious ending line: {line}") 1355 1356 self.entry.prototype = "" 1357 self.entry.new_start_line = ln + 1 1358 1359 self.state = state.PROTO 1360 return 1361 1362 if doc_content.search(line): 1363 cont = doc_content.group(1) 1364 1365 if cont == "": 1366 if self.entry.section == self.section_context: 1367 self.dump_section() 1368 1369 self.entry.new_start_line = ln 1370 self.state = state.BODY 1371 else: 1372 if self.entry.section != SECTION_DEFAULT: 1373 self.state = state.BODY_WITH_BLANK_LINE 1374 else: 1375 self.state = state.BODY 1376 1377 self.entry.contents += "\n" 1378 1379 elif self.state == state.BODY_MAYBE: 1380 1381 # Continued declaration purpose 1382 self.entry.declaration_purpose = self.entry.declaration_purpose.rstrip() 1383 self.entry.declaration_purpose += " " + cont 1384 1385 r = KernRe(r"\s+") 1386 self.entry.declaration_purpose = r.sub(' ', 1387 self.entry.declaration_purpose) 1388 1389 else: 1390 if self.entry.section.startswith('@') or \ 1391 self.entry.section == self.section_context: 1392 if self.entry.leading_space is None: 1393 r = KernRe(r'^(\s+)') 1394 if r.match(cont): 1395 self.entry.leading_space = len(r.group(1)) 1396 else: 1397 self.entry.leading_space = 0 1398 1399 # Double-check if leading space are realy spaces 1400 pos = 0 1401 for i in range(0, self.entry.leading_space): 1402 if cont[i] != " ": 1403 break 1404 pos += 1 1405 1406 cont = cont[pos:] 1407 1408 # NEW LOGIC: 1409 # In case it is different, update it 1410 if self.entry.leading_space != pos: 1411 self.entry.leading_space = pos 1412 1413 self.entry.contents += cont + "\n" 1414 return 1415 1416 # Unknown line, ignore 1417 self.emit_msg(ln, f"bad line: {line}") 1418 1419 def process_inline(self, ln, line): 1420 """STATE_INLINE: docbook comments within a prototype.""" 1421 1422 if self.inline_doc_state == state.INLINE_NAME and \ 1423 doc_inline_sect.search(line): 1424 self.entry.section = doc_inline_sect.group(1) 1425 self.entry.new_start_line = ln 1426 1427 self.entry.contents = doc_inline_sect.group(2).lstrip() 1428 if self.entry.contents != "": 1429 self.entry.contents += "\n" 1430 1431 self.inline_doc_state = state.INLINE_TEXT 1432 # Documentation block end */ 1433 return 1434 1435 if doc_inline_end.search(line): 1436 if self.entry.contents not in ["", "\n"]: 1437 self.dump_section() 1438 1439 self.state = state.PROTO 1440 self.inline_doc_state = state.INLINE_NA 1441 return 1442 1443 if doc_content.search(line): 1444 if self.inline_doc_state == state.INLINE_TEXT: 1445 self.entry.contents += doc_content.group(1) + "\n" 1446 if not self.entry.contents.strip(" ").rstrip("\n"): 1447 self.entry.contents = "" 1448 1449 elif self.inline_doc_state == state.INLINE_NAME: 1450 self.emit_msg(ln, 1451 f"Incorrect use of kernel-doc format: {line}") 1452 1453 self.inline_doc_state = state.INLINE_ERROR 1454 1455 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1456 """ 1457 Handle syscall definitions 1458 """ 1459 1460 is_void = False 1461 1462 # Strip newlines/CR's 1463 proto = re.sub(r'[\r\n]+', ' ', proto) 1464 1465 # Check if it's a SYSCALL_DEFINE0 1466 if 'SYSCALL_DEFINE0' in proto: 1467 is_void = True 1468 1469 # Replace SYSCALL_DEFINE with correct return type & function name 1470 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1471 1472 r = KernRe(r'long\s+(sys_.*?),') 1473 if r.search(proto): 1474 proto = KernRe(',').sub('(', proto, count=1) 1475 elif is_void: 1476 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1477 1478 # Now delete all of the odd-numbered commas in the proto 1479 # so that argument types & names don't have a comma between them 1480 count = 0 1481 length = len(proto) 1482 1483 if is_void: 1484 length = 0 # skip the loop if is_void 1485 1486 for ix in range(length): 1487 if proto[ix] == ',': 1488 count += 1 1489 if count % 2 == 1: 1490 proto = proto[:ix] + ' ' + proto[ix + 1:] 1491 1492 return proto 1493 1494 def tracepoint_munge(self, ln, proto): 1495 """ 1496 Handle tracepoint definitions 1497 """ 1498 1499 tracepointname = None 1500 tracepointargs = None 1501 1502 # Match tracepoint name based on different patterns 1503 r = KernRe(r'TRACE_EVENT\((.*?),') 1504 if r.search(proto): 1505 tracepointname = r.group(1) 1506 1507 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1508 if r.search(proto): 1509 tracepointname = r.group(1) 1510 1511 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1512 if r.search(proto): 1513 tracepointname = r.group(2) 1514 1515 if tracepointname: 1516 tracepointname = tracepointname.lstrip() 1517 1518 r = KernRe(r'TP_PROTO\((.*?)\)') 1519 if r.search(proto): 1520 tracepointargs = r.group(1) 1521 1522 if not tracepointname or not tracepointargs: 1523 self.emit_msg(ln, 1524 f"Unrecognized tracepoint format:\n{proto}\n") 1525 else: 1526 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1527 self.entry.identifier = f"trace_{self.entry.identifier}" 1528 1529 return proto 1530 1531 def process_proto_function(self, ln, line): 1532 """Ancillary routine to process a function prototype""" 1533 1534 # strip C99-style comments to end of line 1535 r = KernRe(r"\/\/.*$", re.S) 1536 line = r.sub('', line) 1537 1538 if KernRe(r'\s*#\s*define').match(line): 1539 self.entry.prototype = line 1540 elif line.startswith('#'): 1541 # Strip other macros like #ifdef/#ifndef/#endif/... 1542 pass 1543 else: 1544 r = KernRe(r'([^\{]*)') 1545 if r.match(line): 1546 self.entry.prototype += r.group(1) + " " 1547 1548 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1549 # strip comments 1550 r = KernRe(r'/\*.*?\*/') 1551 self.entry.prototype = r.sub('', self.entry.prototype) 1552 1553 # strip newlines/cr's 1554 r = KernRe(r'[\r\n]+') 1555 self.entry.prototype = r.sub(' ', self.entry.prototype) 1556 1557 # strip leading spaces 1558 r = KernRe(r'^\s+') 1559 self.entry.prototype = r.sub('', self.entry.prototype) 1560 1561 # Handle self.entry.prototypes for function pointers like: 1562 # int (*pcs_config)(struct foo) 1563 1564 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1565 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1566 1567 if 'SYSCALL_DEFINE' in self.entry.prototype: 1568 self.entry.prototype = self.syscall_munge(ln, 1569 self.entry.prototype) 1570 1571 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1572 if r.search(self.entry.prototype): 1573 self.entry.prototype = self.tracepoint_munge(ln, 1574 self.entry.prototype) 1575 1576 self.dump_function(ln, self.entry.prototype) 1577 self.reset_state(ln) 1578 1579 def process_proto_type(self, ln, line): 1580 """Ancillary routine to process a type""" 1581 1582 # Strip newlines/cr's. 1583 line = KernRe(r'[\r\n]+', re.S).sub(' ', line) 1584 1585 # Strip leading spaces 1586 line = KernRe(r'^\s+', re.S).sub('', line) 1587 1588 # Strip trailing spaces 1589 line = KernRe(r'\s+$', re.S).sub('', line) 1590 1591 # Strip C99-style comments to the end of the line 1592 line = KernRe(r"\/\/.*$", re.S).sub('', line) 1593 1594 # To distinguish preprocessor directive from regular declaration later. 1595 if line.startswith('#'): 1596 line += ";" 1597 1598 r = KernRe(r'([^\{\};]*)([\{\};])(.*)') 1599 while True: 1600 if r.search(line): 1601 if self.entry.prototype: 1602 self.entry.prototype += " " 1603 self.entry.prototype += r.group(1) + r.group(2) 1604 1605 self.entry.brcount += r.group(2).count('{') 1606 self.entry.brcount -= r.group(2).count('}') 1607 1608 self.entry.brcount = max(self.entry.brcount, 0) 1609 1610 if r.group(2) == ';' and self.entry.brcount == 0: 1611 self.dump_declaration(ln, self.entry.prototype) 1612 self.reset_state(ln) 1613 break 1614 1615 line = r.group(3) 1616 else: 1617 self.entry.prototype += line 1618 break 1619 1620 def process_proto(self, ln, line): 1621 """STATE_PROTO: reading a function/whatever prototype.""" 1622 1623 if doc_inline_oneline.search(line): 1624 self.entry.section = doc_inline_oneline.group(1) 1625 self.entry.contents = doc_inline_oneline.group(2) 1626 1627 if self.entry.contents != "": 1628 self.entry.contents += "\n" 1629 self.dump_section(start_new=False) 1630 1631 elif doc_inline_start.search(line): 1632 self.state = state.INLINE 1633 self.inline_doc_state = state.INLINE_NAME 1634 1635 elif self.entry.decl_type == 'function': 1636 self.process_proto_function(ln, line) 1637 1638 else: 1639 self.process_proto_type(ln, line) 1640 1641 def process_docblock(self, ln, line): 1642 """STATE_DOCBLOCK: within a DOC: block.""" 1643 1644 if doc_end.search(line): 1645 self.dump_section() 1646 self.output_declaration("doc", self.entry.identifier, 1647 sectionlist=self.entry.sectionlist, 1648 sections=self.entry.sections, 1649 section_start_lines=self.entry.section_start_lines) 1650 self.reset_state(ln) 1651 1652 elif doc_content.search(line): 1653 self.entry.contents += doc_content.group(1) + "\n" 1654 1655 def parse_export(self): 1656 """ 1657 Parses EXPORT_SYMBOL* macros from a single Kernel source file. 1658 """ 1659 1660 export_table = set() 1661 1662 try: 1663 with open(self.fname, "r", encoding="utf8", 1664 errors="backslashreplace") as fp: 1665 1666 for line in fp: 1667 self.process_export(export_table, line) 1668 1669 except IOError: 1670 return None 1671 1672 return export_table 1673 1674 def parse_kdoc(self): 1675 """ 1676 Open and process each line of a C source file. 1677 The parsing is controlled via a state machine, and the line is passed 1678 to a different process function depending on the state. The process 1679 function may update the state as needed. 1680 1681 Besides parsing kernel-doc tags, it also parses export symbols. 1682 """ 1683 1684 cont = False 1685 prev = "" 1686 prev_ln = None 1687 export_table = set() 1688 1689 try: 1690 with open(self.fname, "r", encoding="utf8", 1691 errors="backslashreplace") as fp: 1692 for ln, line in enumerate(fp): 1693 1694 line = line.expandtabs().strip("\n") 1695 1696 # Group continuation lines on prototypes 1697 if self.state == state.PROTO: 1698 if line.endswith("\\"): 1699 prev += line.rstrip("\\") 1700 cont = True 1701 1702 if not prev_ln: 1703 prev_ln = ln 1704 1705 continue 1706 1707 if cont: 1708 ln = prev_ln 1709 line = prev + line 1710 prev = "" 1711 cont = False 1712 prev_ln = None 1713 1714 self.config.log.debug("%d %s%s: %s", 1715 ln, state.name[self.state], 1716 state.inline_name[self.inline_doc_state], 1717 line) 1718 1719 # This is an optimization over the original script. 1720 # There, when export_file was used for the same file, 1721 # it was read twice. Here, we use the already-existing 1722 # loop to parse exported symbols as well. 1723 # 1724 # TODO: It should be noticed that not all states are 1725 # needed here. On a future cleanup, process export only 1726 # at the states that aren't handling comment markups. 1727 self.process_export(export_table, line) 1728 1729 # Hand this line to the appropriate state handler 1730 if self.state == state.NORMAL: 1731 self.process_normal(ln, line) 1732 elif self.state == state.NAME: 1733 self.process_name(ln, line) 1734 elif self.state in [state.BODY, state.BODY_MAYBE, 1735 state.BODY_WITH_BLANK_LINE]: 1736 self.process_body(ln, line) 1737 elif self.state == state.INLINE: # scanning for inline parameters 1738 self.process_inline(ln, line) 1739 elif self.state == state.PROTO: 1740 self.process_proto(ln, line) 1741 elif self.state == state.DOCBLOCK: 1742 self.process_docblock(ln, line) 1743 except OSError: 1744 self.config.log.error(f"Error: Cannot open file {self.fname}") 1745 1746 return export_table, self.entries 1747