1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8kdoc_parser 9=========== 10 11Read a C language source or header FILE and extract embedded 12documentation comments 13""" 14 15import re 16from pprint import pformat 17 18from kdoc_re import NestedMatch, KernRe 19 20 21# 22# Regular expressions used to parse kernel-doc markups at KernelDoc class. 23# 24# Let's declare them in lowercase outside any class to make easier to 25# convert from the python script. 26# 27# As those are evaluated at the beginning, no need to cache them 28# 29 30# Allow whitespace at end of comment start. 31doc_start = KernRe(r'^/\*\*\s*$', cache=False) 32 33doc_end = KernRe(r'\*/', cache=False) 34doc_com = KernRe(r'\s*\*\s*', cache=False) 35doc_com_body = KernRe(r'\s*\* ?', cache=False) 36doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 37 38# @params and a strictly limited set of supported section names 39# Specifically: 40# Match @word: 41# @...: 42# @{section-name}: 43# while trying to not match literal block starts like "example::" 44# 45doc_sect = doc_com + \ 46 KernRe(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', 47 flags=re.I, cache=False) 48 49doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 50doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 51doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 52doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 53doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) 54attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", 55 flags=re.I | re.S, cache=False) 56 57export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 58export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 59 60type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 61 62# 63# Tests for the beginning of a kerneldoc block in its various forms. 64# 65doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 66doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) 67doc_begin_func = KernRe(str(doc_com) + # initial " * ' 68 r"(?:\w+\s*\*\s*)?" + # type (not captured) 69 r'(?:define\s+)?' + # possible "define" (not captured) 70 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 71 r'(?:[-:].*)?$', # description (not captured) 72 cache = False) 73 74# 75# A little helper to get rid of excess white space 76# 77multi_space = KernRe(r'\s\s+') 78def trim_whitespace(s): 79 return multi_space.sub(' ', s.strip()) 80 81class state: 82 """ 83 State machine enums 84 """ 85 86 # Parser states 87 NORMAL = 0 # normal code 88 NAME = 1 # looking for function name 89 BODY_MAYBE = 2 # body - or maybe more description 90 BODY = 3 # the body of the comment 91 BODY_WITH_BLANK_LINE = 4 # the body which has a blank line 92 PROTO = 5 # scanning prototype 93 DOCBLOCK = 6 # documentation block 94 INLINE = 7 # gathering doc outside main block 95 96 name = [ 97 "NORMAL", 98 "NAME", 99 "BODY_MAYBE", 100 "BODY", 101 "BODY_WITH_BLANK_LINE", 102 "PROTO", 103 "DOCBLOCK", 104 "INLINE", 105 ] 106 107 # Inline documentation state 108 INLINE_NA = 0 # not applicable ($state != INLINE) 109 INLINE_NAME = 1 # looking for member name (@foo:) 110 INLINE_TEXT = 2 # looking for member documentation 111 INLINE_END = 3 # done 112 INLINE_ERROR = 4 # error - Comment without header was found. 113 # Spit a warning as it's not 114 # proper kernel-doc and ignore the rest. 115 116 inline_name = [ 117 "", 118 "_NAME", 119 "_TEXT", 120 "_END", 121 "_ERROR", 122 ] 123 124SECTION_DEFAULT = "Description" # default section 125 126class KernelEntry: 127 128 def __init__(self, config, ln): 129 self.config = config 130 131 self.contents = "" 132 self.function = "" 133 self.sectcheck = "" 134 self.struct_actual = "" 135 self.prototype = "" 136 137 self.warnings = [] 138 139 self.parameterlist = [] 140 self.parameterdescs = {} 141 self.parametertypes = {} 142 self.parameterdesc_start_lines = {} 143 144 self.section_start_lines = {} 145 self.sectionlist = [] 146 self.sections = {} 147 148 self.anon_struct_union = False 149 150 self.leading_space = None 151 152 # State flags 153 self.brcount = 0 154 155 self.in_doc_sect = False 156 self.declaration_start_line = ln + 1 157 158 # TODO: rename to emit_message after removal of kernel-doc.pl 159 def emit_msg(self, log_msg, warning=True): 160 """Emit a message""" 161 162 if not warning: 163 self.config.log.info(log_msg) 164 return 165 166 # Delegate warning output to output logic, as this way it 167 # will report warnings/info only for symbols that are output 168 169 self.warnings.append(log_msg) 170 return 171 172 def dump_section(self, start_new=True): 173 """ 174 Dumps section contents to arrays/hashes intended for that purpose. 175 """ 176 177 name = self.section 178 contents = self.contents 179 180 if type_param.match(name): 181 name = type_param.group(1) 182 183 self.parameterdescs[name] = contents 184 self.parameterdesc_start_lines[name] = self.new_start_line 185 186 self.sectcheck += name + " " 187 self.new_start_line = 0 188 189 elif name == "@...": 190 name = "..." 191 self.parameterdescs[name] = contents 192 self.sectcheck += name + " " 193 self.parameterdesc_start_lines[name] = self.new_start_line 194 self.new_start_line = 0 195 196 else: 197 if name in self.sections and self.sections[name] != "": 198 # Only warn on user-specified duplicate section names 199 if name != SECTION_DEFAULT: 200 self.emit_msg(self.new_start_line, 201 f"duplicate section name '{name}'\n") 202 self.sections[name] += contents 203 else: 204 self.sections[name] = contents 205 self.sectionlist.append(name) 206 self.section_start_lines[name] = self.new_start_line 207 self.new_start_line = 0 208 209# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 210 211 if start_new: 212 self.section = SECTION_DEFAULT 213 self.contents = "" 214 215 216class KernelDoc: 217 """ 218 Read a C language source or header FILE and extract embedded 219 documentation comments. 220 """ 221 222 # Section names 223 224 section_context = "Context" 225 section_return = "Return" 226 227 undescribed = "-- undescribed --" 228 229 def __init__(self, config, fname): 230 """Initialize internal variables""" 231 232 self.fname = fname 233 self.config = config 234 235 # Initial state for the state machines 236 self.state = state.NORMAL 237 self.inline_doc_state = state.INLINE_NA 238 239 # Store entry currently being processed 240 self.entry = None 241 242 # Place all potential outputs into an array 243 self.entries = [] 244 245 def emit_msg(self, ln, msg, warning=True): 246 """Emit a message""" 247 248 log_msg = f"{self.fname}:{ln} {msg}" 249 250 if self.entry: 251 self.entry.emit_msg(log_msg, warning) 252 return 253 254 if warning: 255 self.config.log.warning(log_msg) 256 else: 257 self.config.log.info(log_msg) 258 259 def dump_section(self, start_new=True): 260 """ 261 Dumps section contents to arrays/hashes intended for that purpose. 262 """ 263 264 if self.entry: 265 self.entry.dump_section(start_new) 266 267 # TODO: rename it to store_declaration after removal of kernel-doc.pl 268 def output_declaration(self, dtype, name, **args): 269 """ 270 Stores the entry into an entry array. 271 272 The actual output and output filters will be handled elsewhere 273 """ 274 275 # The implementation here is different than the original kernel-doc: 276 # instead of checking for output filters or actually output anything, 277 # it just stores the declaration content at self.entries, as the 278 # output will happen on a separate class. 279 # 280 # For now, we're keeping the same name of the function just to make 281 # easier to compare the source code of both scripts 282 283 args["declaration_start_line"] = self.entry.declaration_start_line 284 args["type"] = dtype 285 args["warnings"] = self.entry.warnings 286 287 # TODO: use colletions.OrderedDict to remove sectionlist 288 289 sections = args.get('sections', {}) 290 sectionlist = args.get('sectionlist', []) 291 292 # Drop empty sections 293 # TODO: improve empty sections logic to emit warnings 294 for section in ["Description", "Return"]: 295 if section in sectionlist: 296 if not sections[section].rstrip(): 297 del sections[section] 298 sectionlist.remove(section) 299 300 self.entries.append((name, args)) 301 302 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 303 304 def reset_state(self, ln): 305 """ 306 Ancillary routine to create a new entry. It initializes all 307 variables used by the state machine. 308 """ 309 310 self.entry = KernelEntry(self.config, ln) 311 312 # State flags 313 self.state = state.NORMAL 314 self.inline_doc_state = state.INLINE_NA 315 316 def push_parameter(self, ln, decl_type, param, dtype, 317 org_arg, declaration_name): 318 """ 319 Store parameters and their descriptions at self.entry. 320 """ 321 322 if self.entry.anon_struct_union and dtype == "" and param == "}": 323 return # Ignore the ending }; from anonymous struct/union 324 325 self.entry.anon_struct_union = False 326 327 param = KernRe(r'[\[\)].*').sub('', param, count=1) 328 329 if dtype == "" and param.endswith("..."): 330 if KernRe(r'\w\.\.\.$').search(param): 331 # For named variable parameters of the form `x...`, 332 # remove the dots 333 param = param[:-3] 334 else: 335 # Handles unnamed variable parameters 336 param = "..." 337 338 if param not in self.entry.parameterdescs or \ 339 not self.entry.parameterdescs[param]: 340 341 self.entry.parameterdescs[param] = "variable arguments" 342 343 elif dtype == "" and (not param or param == "void"): 344 param = "void" 345 self.entry.parameterdescs[param] = "no arguments" 346 347 elif dtype == "" and param in ["struct", "union"]: 348 # Handle unnamed (anonymous) union or struct 349 dtype = param 350 param = "{unnamed_" + param + "}" 351 self.entry.parameterdescs[param] = "anonymous\n" 352 self.entry.anon_struct_union = True 353 354 # Handle cache group enforcing variables: they do not need 355 # to be described in header files 356 elif "__cacheline_group" in param: 357 # Ignore __cacheline_group_begin and __cacheline_group_end 358 return 359 360 # Warn if parameter has no description 361 # (but ignore ones starting with # as these are not parameters 362 # but inline preprocessor statements) 363 if param not in self.entry.parameterdescs and not param.startswith("#"): 364 self.entry.parameterdescs[param] = self.undescribed 365 366 if "." not in param: 367 if decl_type == 'function': 368 dname = f"{decl_type} parameter" 369 else: 370 dname = f"{decl_type} member" 371 372 self.emit_msg(ln, 373 f"{dname} '{param}' not described in '{declaration_name}'") 374 375 # Strip spaces from param so that it is one continuous string on 376 # parameterlist. This fixes a problem where check_sections() 377 # cannot find a parameter like "addr[6 + 2]" because it actually 378 # appears as "addr[6", "+", "2]" on the parameter list. 379 # However, it's better to maintain the param string unchanged for 380 # output, so just weaken the string compare in check_sections() 381 # to ignore "[blah" in a parameter string. 382 383 self.entry.parameterlist.append(param) 384 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 385 self.entry.parametertypes[param] = org_arg 386 387 def save_struct_actual(self, actual): 388 """ 389 Strip all spaces from the actual param so that it looks like 390 one string item. 391 """ 392 393 actual = KernRe(r'\s*').sub("", actual, count=1) 394 395 self.entry.struct_actual += actual + " " 396 397 def create_parameter_list(self, ln, decl_type, args, 398 splitter, declaration_name): 399 """ 400 Creates a list of parameters, storing them at self.entry. 401 """ 402 403 # temporarily replace all commas inside function pointer definition 404 arg_expr = KernRe(r'(\([^\),]+),') 405 while arg_expr.search(args): 406 args = arg_expr.sub(r"\1#", args) 407 408 for arg in args.split(splitter): 409 # Strip comments 410 arg = KernRe(r'\/\*.*\*\/').sub('', arg) 411 412 # Ignore argument attributes 413 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 414 415 # Strip leading/trailing spaces 416 arg = arg.strip() 417 arg = KernRe(r'\s+').sub(' ', arg, count=1) 418 419 if arg.startswith('#'): 420 # Treat preprocessor directive as a typeless variable just to fill 421 # corresponding data structures "correctly". Catch it later in 422 # output_* subs. 423 424 # Treat preprocessor directive as a typeless variable 425 self.push_parameter(ln, decl_type, arg, "", 426 "", declaration_name) 427 428 elif KernRe(r'\(.+\)\s*\(').search(arg): 429 # Pointer-to-function 430 431 arg = arg.replace('#', ',') 432 433 r = KernRe(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') 434 if r.match(arg): 435 param = r.group(1) 436 else: 437 self.emit_msg(ln, f"Invalid param: {arg}") 438 param = arg 439 440 dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) 441 self.save_struct_actual(param) 442 self.push_parameter(ln, decl_type, param, dtype, 443 arg, declaration_name) 444 445 elif KernRe(r'\(.+\)\s*\[').search(arg): 446 # Array-of-pointers 447 448 arg = arg.replace('#', ',') 449 r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') 450 if r.match(arg): 451 param = r.group(1) 452 else: 453 self.emit_msg(ln, f"Invalid param: {arg}") 454 param = arg 455 456 dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) 457 458 self.save_struct_actual(param) 459 self.push_parameter(ln, decl_type, param, dtype, 460 arg, declaration_name) 461 462 elif arg: 463 arg = KernRe(r'\s*:\s*').sub(":", arg) 464 arg = KernRe(r'\s*\[').sub('[', arg) 465 466 args = KernRe(r'\s*,\s*').split(arg) 467 if args[0] and '*' in args[0]: 468 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 469 470 first_arg = [] 471 r = KernRe(r'^(.*\s+)(.*?\[.*\].*)$') 472 if args[0] and r.match(args[0]): 473 args.pop(0) 474 first_arg.extend(r.group(1)) 475 first_arg.append(r.group(2)) 476 else: 477 first_arg = KernRe(r'\s+').split(args.pop(0)) 478 479 args.insert(0, first_arg.pop()) 480 dtype = ' '.join(first_arg) 481 482 for param in args: 483 if KernRe(r'^(\*+)\s*(.*)').match(param): 484 r = KernRe(r'^(\*+)\s*(.*)') 485 if not r.match(param): 486 self.emit_msg(ln, f"Invalid param: {param}") 487 continue 488 489 param = r.group(1) 490 491 self.save_struct_actual(r.group(2)) 492 self.push_parameter(ln, decl_type, r.group(2), 493 f"{dtype} {r.group(1)}", 494 arg, declaration_name) 495 496 elif KernRe(r'(.*?):(\w+)').search(param): 497 r = KernRe(r'(.*?):(\w+)') 498 if not r.match(param): 499 self.emit_msg(ln, f"Invalid param: {param}") 500 continue 501 502 if dtype != "": # Skip unnamed bit-fields 503 self.save_struct_actual(r.group(1)) 504 self.push_parameter(ln, decl_type, r.group(1), 505 f"{dtype}:{r.group(2)}", 506 arg, declaration_name) 507 else: 508 self.save_struct_actual(param) 509 self.push_parameter(ln, decl_type, param, dtype, 510 arg, declaration_name) 511 512 def check_sections(self, ln, decl_name, decl_type, sectcheck, prmscheck): 513 """ 514 Check for errors inside sections, emitting warnings if not found 515 parameters are described. 516 """ 517 518 sects = sectcheck.split() 519 prms = prmscheck.split() 520 err = False 521 522 for sx in range(len(sects)): # pylint: disable=C0200 523 err = True 524 for px in range(len(prms)): # pylint: disable=C0200 525 prm_clean = prms[px] 526 prm_clean = KernRe(r'\[.*\]').sub('', prm_clean) 527 prm_clean = attribute.sub('', prm_clean) 528 529 # ignore array size in a parameter string; 530 # however, the original param string may contain 531 # spaces, e.g.: addr[6 + 2] 532 # and this appears in @prms as "addr[6" since the 533 # parameter list is split at spaces; 534 # hence just ignore "[..." for the sections check; 535 prm_clean = KernRe(r'\[.*').sub('', prm_clean) 536 537 if prm_clean == sects[sx]: 538 err = False 539 break 540 541 if err: 542 if decl_type == 'function': 543 dname = f"{decl_type} parameter" 544 else: 545 dname = f"{decl_type} member" 546 547 self.emit_msg(ln, 548 f"Excess {dname} '{sects[sx]}' description in '{decl_name}'") 549 550 def check_return_section(self, ln, declaration_name, return_type): 551 """ 552 If the function doesn't return void, warns about the lack of a 553 return description. 554 """ 555 556 if not self.config.wreturn: 557 return 558 559 # Ignore an empty return type (It's a macro) 560 # Ignore functions with a "void" return type (but not "void *") 561 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 562 return 563 564 if not self.entry.sections.get("Return", None): 565 self.emit_msg(ln, 566 f"No description found for return value of '{declaration_name}'") 567 568 def dump_struct(self, ln, proto): 569 """ 570 Store an entry for an struct or union 571 """ 572 573 type_pattern = r'(struct|union)' 574 575 qualifiers = [ 576 "__attribute__", 577 "__packed", 578 "__aligned", 579 "____cacheline_aligned_in_smp", 580 "____cacheline_aligned", 581 ] 582 583 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 584 struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') 585 586 # Extract struct/union definition 587 members = None 588 declaration_name = None 589 decl_type = None 590 591 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 592 if r.search(proto): 593 decl_type = r.group(1) 594 declaration_name = r.group(2) 595 members = r.group(3) 596 else: 597 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 598 599 if r.search(proto): 600 decl_type = r.group(1) 601 declaration_name = r.group(3) 602 members = r.group(2) 603 604 if not members: 605 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 606 return 607 608 if self.entry.identifier != declaration_name: 609 self.emit_msg(ln, 610 f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") 611 return 612 613 args_pattern = r'([^,)]+)' 614 615 sub_prefixes = [ 616 (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), 617 (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''), 618 619 # Strip comments 620 (KernRe(r'\/\*.*?\*\/', re.S), ''), 621 622 # Strip attributes 623 (attribute, ' '), 624 (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), 625 (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), 626 (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), 627 (KernRe(r'\s*__packed\s*', re.S), ' '), 628 (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), 629 (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), 630 (KernRe(r'\s*____cacheline_aligned', re.S), ' '), 631 632 # Unwrap struct_group macros based on this definition: 633 # __struct_group(TAG, NAME, ATTRS, MEMBERS...) 634 # which has variants like: struct_group(NAME, MEMBERS...) 635 # Only MEMBERS arguments require documentation. 636 # 637 # Parsing them happens on two steps: 638 # 639 # 1. drop struct group arguments that aren't at MEMBERS, 640 # storing them as STRUCT_GROUP(MEMBERS) 641 # 642 # 2. remove STRUCT_GROUP() ancillary macro. 643 # 644 # The original logic used to remove STRUCT_GROUP() using an 645 # advanced regex: 646 # 647 # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; 648 # 649 # with two patterns that are incompatible with 650 # Python re module, as it has: 651 # 652 # - a recursive pattern: (?1) 653 # - an atomic grouping: (?>...) 654 # 655 # I tried a simpler version: but it didn't work either: 656 # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; 657 # 658 # As it doesn't properly match the end parenthesis on some cases. 659 # 660 # So, a better solution was crafted: there's now a NestedMatch 661 # class that ensures that delimiters after a search are properly 662 # matched. So, the implementation to drop STRUCT_GROUP() will be 663 # handled in separate. 664 665 (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), 666 (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), 667 (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), 668 (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), 669 670 # Replace macros 671 # 672 # TODO: use NestedMatch for FOO($1, $2, ...) matches 673 # 674 # it is better to also move those to the NestedMatch logic, 675 # to ensure that parenthesis will be properly matched. 676 677 (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), 678 (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), 679 (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), 680 (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), 681 (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), 682 (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), 683 (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), 684 (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), 685 (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), 686 ] 687 688 # Regexes here are guaranteed to have the end limiter matching 689 # the start delimiter. Yet, right now, only one replace group 690 # is allowed. 691 692 sub_nested_prefixes = [ 693 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), 694 ] 695 696 for search, sub in sub_prefixes: 697 members = search.sub(sub, members) 698 699 nested = NestedMatch() 700 701 for search, sub in sub_nested_prefixes: 702 members = nested.sub(search, sub, members) 703 704 # Keeps the original declaration as-is 705 declaration = members 706 707 # Split nested struct/union elements 708 # 709 # This loop was simpler at the original kernel-doc perl version, as 710 # while ($members =~ m/$struct_members/) { ... } 711 # reads 'members' string on each interaction. 712 # 713 # Python behavior is different: it parses 'members' only once, 714 # creating a list of tuples from the first interaction. 715 # 716 # On other words, this won't get nested structs. 717 # 718 # So, we need to have an extra loop on Python to override such 719 # re limitation. 720 721 while True: 722 tuples = struct_members.findall(members) 723 if not tuples: 724 break 725 726 for t in tuples: 727 newmember = "" 728 maintype = t[0] 729 s_ids = t[5] 730 content = t[3] 731 732 oldmember = "".join(t) 733 734 for s_id in s_ids.split(','): 735 s_id = s_id.strip() 736 737 newmember += f"{maintype} {s_id}; " 738 s_id = KernRe(r'[:\[].*').sub('', s_id) 739 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 740 741 for arg in content.split(';'): 742 arg = arg.strip() 743 744 if not arg: 745 continue 746 747 r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') 748 if r.match(arg): 749 # Pointer-to-function 750 dtype = r.group(1) 751 name = r.group(2) 752 extra = r.group(3) 753 754 if not name: 755 continue 756 757 if not s_id: 758 # Anonymous struct/union 759 newmember += f"{dtype}{name}{extra}; " 760 else: 761 newmember += f"{dtype}{s_id}.{name}{extra}; " 762 763 else: 764 arg = arg.strip() 765 # Handle bitmaps 766 arg = KernRe(r':\s*\d+\s*').sub('', arg) 767 768 # Handle arrays 769 arg = KernRe(r'\[.*\]').sub('', arg) 770 771 # Handle multiple IDs 772 arg = KernRe(r'\s*,\s*').sub(',', arg) 773 774 r = KernRe(r'(.*)\s+([\S+,]+)') 775 776 if r.search(arg): 777 dtype = r.group(1) 778 names = r.group(2) 779 else: 780 newmember += f"{arg}; " 781 continue 782 783 for name in names.split(','): 784 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() 785 786 if not name: 787 continue 788 789 if not s_id: 790 # Anonymous struct/union 791 newmember += f"{dtype} {name}; " 792 else: 793 newmember += f"{dtype} {s_id}.{name}; " 794 795 members = members.replace(oldmember, newmember) 796 797 # Ignore other nested elements, like enums 798 members = re.sub(r'(\{[^\{\}]*\})', '', members) 799 800 self.create_parameter_list(ln, decl_type, members, ';', 801 declaration_name) 802 self.check_sections(ln, declaration_name, decl_type, 803 self.entry.sectcheck, self.entry.struct_actual) 804 805 # Adjust declaration for better display 806 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 807 declaration = KernRe(r'\}\s+;').sub('};', declaration) 808 809 # Better handle inlined enums 810 while True: 811 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 812 if not r.search(declaration): 813 break 814 815 declaration = r.sub(r'\1,\n\2', declaration) 816 817 def_args = declaration.split('\n') 818 level = 1 819 declaration = "" 820 for clause in def_args: 821 822 clause = clause.strip() 823 clause = KernRe(r'\s+').sub(' ', clause, count=1) 824 825 if not clause: 826 continue 827 828 if '}' in clause and level > 1: 829 level -= 1 830 831 if not KernRe(r'^\s*#').match(clause): 832 declaration += "\t" * level 833 834 declaration += "\t" + clause + "\n" 835 if "{" in clause and "}" not in clause: 836 level += 1 837 838 self.output_declaration(decl_type, declaration_name, 839 struct=declaration_name, 840 definition=declaration, 841 parameterlist=self.entry.parameterlist, 842 parameterdescs=self.entry.parameterdescs, 843 parametertypes=self.entry.parametertypes, 844 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 845 sectionlist=self.entry.sectionlist, 846 sections=self.entry.sections, 847 section_start_lines=self.entry.section_start_lines, 848 purpose=self.entry.declaration_purpose) 849 850 def dump_enum(self, ln, proto): 851 """ 852 Stores an enum inside self.entries array. 853 """ 854 855 # Ignore members marked private 856 proto = KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) 857 proto = KernRe(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) 858 859 # Strip comments 860 proto = KernRe(r'\/\*.*?\*\/', flags=re.S).sub('', proto) 861 862 # Strip #define macros inside enums 863 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 864 865 members = None 866 declaration_name = None 867 868 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 869 if r.search(proto): 870 declaration_name = r.group(2) 871 members = r.group(1).rstrip() 872 else: 873 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 874 if r.match(proto): 875 declaration_name = r.group(1) 876 members = r.group(2).rstrip() 877 878 if not members: 879 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 880 return 881 882 if self.entry.identifier != declaration_name: 883 if self.entry.identifier == "": 884 self.emit_msg(ln, 885 f"{proto}: wrong kernel-doc identifier on prototype") 886 else: 887 self.emit_msg(ln, 888 f"expecting prototype for enum {self.entry.identifier}. Prototype was for enum {declaration_name} instead") 889 return 890 891 if not declaration_name: 892 declaration_name = "(anonymous)" 893 894 member_set = set() 895 896 members = KernRe(r'\([^;]*?[\)]').sub('', members) 897 898 for arg in members.split(','): 899 if not arg: 900 continue 901 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 902 self.entry.parameterlist.append(arg) 903 if arg not in self.entry.parameterdescs: 904 self.entry.parameterdescs[arg] = self.undescribed 905 self.emit_msg(ln, 906 f"Enum value '{arg}' not described in enum '{declaration_name}'") 907 member_set.add(arg) 908 909 for k in self.entry.parameterdescs: 910 if k not in member_set: 911 self.emit_msg(ln, 912 f"Excess enum value '%{k}' description in '{declaration_name}'") 913 914 self.output_declaration('enum', declaration_name, 915 enum=declaration_name, 916 parameterlist=self.entry.parameterlist, 917 parameterdescs=self.entry.parameterdescs, 918 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 919 sectionlist=self.entry.sectionlist, 920 sections=self.entry.sections, 921 section_start_lines=self.entry.section_start_lines, 922 purpose=self.entry.declaration_purpose) 923 924 def dump_declaration(self, ln, prototype): 925 """ 926 Stores a data declaration inside self.entries array. 927 """ 928 929 if self.entry.decl_type == "enum": 930 self.dump_enum(ln, prototype) 931 return 932 933 if self.entry.decl_type == "typedef": 934 self.dump_typedef(ln, prototype) 935 return 936 937 if self.entry.decl_type in ["union", "struct"]: 938 self.dump_struct(ln, prototype) 939 return 940 941 self.output_declaration(self.entry.decl_type, prototype, 942 entry=self.entry) 943 944 def dump_function(self, ln, prototype): 945 """ 946 Stores a function of function macro inside self.entries array. 947 """ 948 949 func_macro = False 950 return_type = '' 951 decl_type = 'function' 952 953 # Prefixes that would be removed 954 sub_prefixes = [ 955 (r"^static +", "", 0), 956 (r"^extern +", "", 0), 957 (r"^asmlinkage +", "", 0), 958 (r"^inline +", "", 0), 959 (r"^__inline__ +", "", 0), 960 (r"^__inline +", "", 0), 961 (r"^__always_inline +", "", 0), 962 (r"^noinline +", "", 0), 963 (r"^__FORTIFY_INLINE +", "", 0), 964 (r"__init +", "", 0), 965 (r"__init_or_module +", "", 0), 966 (r"__deprecated +", "", 0), 967 (r"__flatten +", "", 0), 968 (r"__meminit +", "", 0), 969 (r"__must_check +", "", 0), 970 (r"__weak +", "", 0), 971 (r"__sched +", "", 0), 972 (r"_noprof", "", 0), 973 (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), 974 (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), 975 (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), 976 (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), 977 (r"__attribute_const__ +", "", 0), 978 979 # It seems that Python support for re.X is broken: 980 # At least for me (Python 3.13), this didn't work 981# (r""" 982# __attribute__\s*\(\( 983# (?: 984# [\w\s]+ # attribute name 985# (?:\([^)]*\))? # attribute arguments 986# \s*,? # optional comma at the end 987# )+ 988# \)\)\s+ 989# """, "", re.X), 990 991 # So, remove whitespaces and comments from it 992 (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), 993 ] 994 995 for search, sub, flags in sub_prefixes: 996 prototype = KernRe(search, flags).sub(sub, prototype) 997 998 # Macros are a special case, as they change the prototype format 999 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 1000 if new_proto != prototype: 1001 is_define_proto = True 1002 prototype = new_proto 1003 else: 1004 is_define_proto = False 1005 1006 # Yes, this truly is vile. We are looking for: 1007 # 1. Return type (may be nothing if we're looking at a macro) 1008 # 2. Function name 1009 # 3. Function parameters. 1010 # 1011 # All the while we have to watch out for function pointer parameters 1012 # (which IIRC is what the two sections are for), C types (these 1013 # regexps don't even start to express all the possibilities), and 1014 # so on. 1015 # 1016 # If you mess with these regexps, it's a good idea to check that 1017 # the following functions' documentation still comes out right: 1018 # - parport_register_device (function pointer parameters) 1019 # - atomic_set (macro) 1020 # - pci_match_device, __copy_to_user (long return type) 1021 1022 name = r'[a-zA-Z0-9_~:]+' 1023 prototype_end1 = r'[^\(]*' 1024 prototype_end2 = r'[^\{]*' 1025 prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)' 1026 1027 # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing group. 1028 # So, this needs to be mapped in Python with (?:...)? or (?:...)+ 1029 1030 type1 = r'(?:[\w\s]+)?' 1031 type2 = r'(?:[\w\s]+\*+)+' 1032 1033 found = False 1034 1035 if is_define_proto: 1036 r = KernRe(r'^()(' + name + r')\s+') 1037 1038 if r.search(prototype): 1039 return_type = '' 1040 declaration_name = r.group(2) 1041 func_macro = True 1042 1043 found = True 1044 1045 if not found: 1046 patterns = [ 1047 rf'^()({name})\s*{prototype_end}', 1048 rf'^({type1})\s+({name})\s*{prototype_end}', 1049 rf'^({type2})\s*({name})\s*{prototype_end}', 1050 ] 1051 1052 for p in patterns: 1053 r = KernRe(p) 1054 1055 if r.match(prototype): 1056 1057 return_type = r.group(1) 1058 declaration_name = r.group(2) 1059 args = r.group(3) 1060 1061 self.create_parameter_list(ln, decl_type, args, ',', 1062 declaration_name) 1063 1064 found = True 1065 break 1066 if not found: 1067 self.emit_msg(ln, 1068 f"cannot understand function prototype: '{prototype}'") 1069 return 1070 1071 if self.entry.identifier != declaration_name: 1072 self.emit_msg(ln, 1073 f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") 1074 return 1075 1076 prms = " ".join(self.entry.parameterlist) 1077 self.check_sections(ln, declaration_name, "function", 1078 self.entry.sectcheck, prms) 1079 1080 self.check_return_section(ln, declaration_name, return_type) 1081 1082 if 'typedef' in return_type: 1083 self.output_declaration(decl_type, declaration_name, 1084 function=declaration_name, 1085 typedef=True, 1086 functiontype=return_type, 1087 parameterlist=self.entry.parameterlist, 1088 parameterdescs=self.entry.parameterdescs, 1089 parametertypes=self.entry.parametertypes, 1090 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 1091 sectionlist=self.entry.sectionlist, 1092 sections=self.entry.sections, 1093 section_start_lines=self.entry.section_start_lines, 1094 purpose=self.entry.declaration_purpose, 1095 func_macro=func_macro) 1096 else: 1097 self.output_declaration(decl_type, declaration_name, 1098 function=declaration_name, 1099 typedef=False, 1100 functiontype=return_type, 1101 parameterlist=self.entry.parameterlist, 1102 parameterdescs=self.entry.parameterdescs, 1103 parametertypes=self.entry.parametertypes, 1104 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 1105 sectionlist=self.entry.sectionlist, 1106 sections=self.entry.sections, 1107 section_start_lines=self.entry.section_start_lines, 1108 purpose=self.entry.declaration_purpose, 1109 func_macro=func_macro) 1110 1111 def dump_typedef(self, ln, proto): 1112 """ 1113 Stores a typedef inside self.entries array. 1114 """ 1115 1116 typedef_type = r'((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1117 typedef_ident = r'\*?\s*(\w\S+)\s*' 1118 typedef_args = r'\s*\((.*)\);' 1119 1120 typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1121 typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args) 1122 1123 # Strip comments 1124 proto = KernRe(r'/\*.*?\*/', flags=re.S).sub('', proto) 1125 1126 # Parse function typedef prototypes 1127 for r in [typedef1, typedef2]: 1128 if not r.match(proto): 1129 continue 1130 1131 return_type = r.group(1).strip() 1132 declaration_name = r.group(2) 1133 args = r.group(3) 1134 1135 if self.entry.identifier != declaration_name: 1136 self.emit_msg(ln, 1137 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1138 return 1139 1140 decl_type = 'function' 1141 self.create_parameter_list(ln, decl_type, args, ',', declaration_name) 1142 1143 self.output_declaration(decl_type, declaration_name, 1144 function=declaration_name, 1145 typedef=True, 1146 functiontype=return_type, 1147 parameterlist=self.entry.parameterlist, 1148 parameterdescs=self.entry.parameterdescs, 1149 parametertypes=self.entry.parametertypes, 1150 parameterdesc_start_lines=self.entry.parameterdesc_start_lines, 1151 sectionlist=self.entry.sectionlist, 1152 sections=self.entry.sections, 1153 section_start_lines=self.entry.section_start_lines, 1154 purpose=self.entry.declaration_purpose) 1155 return 1156 1157 # Handle nested parentheses or brackets 1158 r = KernRe(r'(\(*.\)\s*|\[*.\]\s*);$') 1159 while r.search(proto): 1160 proto = r.sub('', proto) 1161 1162 # Parse simple typedefs 1163 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1164 if r.match(proto): 1165 declaration_name = r.group(1) 1166 1167 if self.entry.identifier != declaration_name: 1168 self.emit_msg(ln, 1169 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1170 return 1171 1172 self.output_declaration('typedef', declaration_name, 1173 typedef=declaration_name, 1174 sectionlist=self.entry.sectionlist, 1175 sections=self.entry.sections, 1176 section_start_lines=self.entry.section_start_lines, 1177 purpose=self.entry.declaration_purpose) 1178 return 1179 1180 self.emit_msg(ln, "error: Cannot parse typedef!") 1181 1182 @staticmethod 1183 def process_export(function_set, line): 1184 """ 1185 process EXPORT_SYMBOL* tags 1186 1187 This method doesn't use any variable from the class, so declare it 1188 with a staticmethod decorator. 1189 """ 1190 1191 # We support documenting some exported symbols with different 1192 # names. A horrible hack. 1193 suffixes = [ '_noprof' ] 1194 1195 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1196 # multiple export lines would violate Kernel coding style. 1197 1198 if export_symbol.search(line): 1199 symbol = export_symbol.group(2) 1200 for suffix in suffixes: 1201 symbol = symbol.removesuffix(suffix) 1202 function_set.add(symbol) 1203 return 1204 1205 if export_symbol_ns.search(line): 1206 symbol = export_symbol_ns.group(2) 1207 for suffix in suffixes: 1208 symbol = symbol.removesuffix(suffix) 1209 function_set.add(symbol) 1210 1211 def process_normal(self, ln, line): 1212 """ 1213 STATE_NORMAL: looking for the /** to begin everything. 1214 """ 1215 1216 if not doc_start.match(line): 1217 return 1218 1219 # start a new entry 1220 self.reset_state(ln) 1221 self.entry.in_doc_sect = False 1222 1223 # next line is always the function name 1224 self.state = state.NAME 1225 1226 def process_name(self, ln, line): 1227 """ 1228 STATE_NAME: Looking for the "name - description" line 1229 """ 1230 # 1231 # Check for a DOC: block and handle them specially. 1232 # 1233 if doc_block.search(line): 1234 self.entry.new_start_line = ln 1235 1236 if not doc_block.group(1): 1237 self.entry.section = "Introduction" 1238 else: 1239 self.entry.section = doc_block.group(1) 1240 1241 self.entry.identifier = self.entry.section 1242 self.state = state.DOCBLOCK 1243 # 1244 # Otherwise we're looking for a normal kerneldoc declaration line. 1245 # 1246 elif doc_decl.search(line): 1247 self.entry.identifier = doc_decl.group(1) 1248 1249 # Test for data declaration 1250 if doc_begin_data.search(line): 1251 self.entry.decl_type = doc_begin_data.group(1) 1252 self.entry.identifier = doc_begin_data.group(2) 1253 # 1254 # Look for a function description 1255 # 1256 elif doc_begin_func.search(line): 1257 self.entry.identifier = doc_begin_func.group(1) 1258 self.entry.decl_type = "function" 1259 # 1260 # We struck out. 1261 # 1262 else: 1263 self.emit_msg(ln, 1264 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") 1265 self.state = state.NORMAL 1266 return 1267 # 1268 # OK, set up for a new kerneldoc entry. 1269 # 1270 self.state = state.BODY 1271 self.entry.identifier = self.entry.identifier.strip(" ") 1272 # if there's no @param blocks need to set up default section here 1273 self.entry.section = SECTION_DEFAULT 1274 self.entry.new_start_line = ln + 1 1275 # 1276 # Find the description portion, which *should* be there but 1277 # isn't always. 1278 # (We should be able to capture this from the previous parsing - someday) 1279 # 1280 r = KernRe("[-:](.*)") 1281 if r.search(line): 1282 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1283 self.state = state.BODY_MAYBE 1284 else: 1285 self.entry.declaration_purpose = "" 1286 1287 if not self.entry.declaration_purpose and self.config.wshort_desc: 1288 self.emit_msg(ln, 1289 f"missing initial short description on line:\n{line}") 1290 1291 if not self.entry.identifier and self.entry.decl_type != "enum": 1292 self.emit_msg(ln, 1293 f"wrong kernel-doc identifier on line:\n{line}") 1294 self.state = state.NORMAL 1295 1296 if self.config.verbose: 1297 self.emit_msg(ln, 1298 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1299 warning=False) 1300 # 1301 # Failed to find an identifier. Emit a warning 1302 # 1303 else: 1304 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1305 1306 def process_body(self, ln, line): 1307 """ 1308 STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment. 1309 """ 1310 1311 if self.state == state.BODY_WITH_BLANK_LINE: 1312 r = KernRe(r"\s*\*\s*\S") 1313 if r.match(line): 1314 self.dump_section() 1315 self.entry.section = SECTION_DEFAULT 1316 self.entry.new_start_line = ln 1317 self.entry.contents = "" 1318 1319 if doc_sect.search(line): 1320 self.entry.in_doc_sect = True 1321 newsection = doc_sect.group(1) 1322 1323 if newsection.lower() in ["description", "context"]: 1324 newsection = newsection.title() 1325 1326 # Special case: @return is a section, not a param description 1327 if newsection.lower() in ["@return", "@returns", 1328 "return", "returns"]: 1329 newsection = "Return" 1330 1331 # Perl kernel-doc has a check here for contents before sections. 1332 # the logic there is always false, as in_doc_sect variable is 1333 # always true. So, just don't implement Wcontents_before_sections 1334 1335 # .title() 1336 newcontents = doc_sect.group(2) 1337 if not newcontents: 1338 newcontents = "" 1339 1340 if self.entry.contents.strip("\n"): 1341 self.dump_section() 1342 1343 self.entry.new_start_line = ln 1344 self.entry.section = newsection 1345 self.entry.leading_space = None 1346 1347 self.entry.contents = newcontents.lstrip() 1348 if self.entry.contents: 1349 self.entry.contents += "\n" 1350 1351 self.state = state.BODY 1352 return 1353 1354 if doc_end.search(line): 1355 self.dump_section() 1356 1357 # Look for doc_com + <text> + doc_end: 1358 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') 1359 if r.match(line): 1360 self.emit_msg(ln, f"suspicious ending line: {line}") 1361 1362 self.entry.prototype = "" 1363 self.entry.new_start_line = ln + 1 1364 1365 self.state = state.PROTO 1366 return 1367 1368 if doc_content.search(line): 1369 cont = doc_content.group(1) 1370 1371 if cont == "": 1372 if self.entry.section == self.section_context: 1373 self.dump_section() 1374 1375 self.entry.new_start_line = ln 1376 self.state = state.BODY 1377 else: 1378 if self.entry.section != SECTION_DEFAULT: 1379 self.state = state.BODY_WITH_BLANK_LINE 1380 else: 1381 self.state = state.BODY 1382 1383 self.entry.contents += "\n" 1384 1385 elif self.state == state.BODY_MAYBE: 1386 1387 # Continued declaration purpose 1388 self.entry.declaration_purpose = self.entry.declaration_purpose.rstrip() 1389 self.entry.declaration_purpose += " " + cont 1390 1391 r = KernRe(r"\s+") 1392 self.entry.declaration_purpose = r.sub(' ', 1393 self.entry.declaration_purpose) 1394 1395 else: 1396 if self.entry.section.startswith('@') or \ 1397 self.entry.section == self.section_context: 1398 if self.entry.leading_space is None: 1399 r = KernRe(r'^(\s+)') 1400 if r.match(cont): 1401 self.entry.leading_space = len(r.group(1)) 1402 else: 1403 self.entry.leading_space = 0 1404 1405 # Double-check if leading space are realy spaces 1406 pos = 0 1407 for i in range(0, self.entry.leading_space): 1408 if cont[i] != " ": 1409 break 1410 pos += 1 1411 1412 cont = cont[pos:] 1413 1414 # NEW LOGIC: 1415 # In case it is different, update it 1416 if self.entry.leading_space != pos: 1417 self.entry.leading_space = pos 1418 1419 self.entry.contents += cont + "\n" 1420 return 1421 1422 # Unknown line, ignore 1423 self.emit_msg(ln, f"bad line: {line}") 1424 1425 def process_inline(self, ln, line): 1426 """STATE_INLINE: docbook comments within a prototype.""" 1427 1428 if self.inline_doc_state == state.INLINE_NAME and \ 1429 doc_inline_sect.search(line): 1430 self.entry.section = doc_inline_sect.group(1) 1431 self.entry.new_start_line = ln 1432 1433 self.entry.contents = doc_inline_sect.group(2).lstrip() 1434 if self.entry.contents != "": 1435 self.entry.contents += "\n" 1436 1437 self.inline_doc_state = state.INLINE_TEXT 1438 # Documentation block end */ 1439 return 1440 1441 if doc_inline_end.search(line): 1442 if self.entry.contents not in ["", "\n"]: 1443 self.dump_section() 1444 1445 self.state = state.PROTO 1446 self.inline_doc_state = state.INLINE_NA 1447 return 1448 1449 if doc_content.search(line): 1450 if self.inline_doc_state == state.INLINE_TEXT: 1451 self.entry.contents += doc_content.group(1) + "\n" 1452 if not self.entry.contents.strip(" ").rstrip("\n"): 1453 self.entry.contents = "" 1454 1455 elif self.inline_doc_state == state.INLINE_NAME: 1456 self.emit_msg(ln, 1457 f"Incorrect use of kernel-doc format: {line}") 1458 1459 self.inline_doc_state = state.INLINE_ERROR 1460 1461 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1462 """ 1463 Handle syscall definitions 1464 """ 1465 1466 is_void = False 1467 1468 # Strip newlines/CR's 1469 proto = re.sub(r'[\r\n]+', ' ', proto) 1470 1471 # Check if it's a SYSCALL_DEFINE0 1472 if 'SYSCALL_DEFINE0' in proto: 1473 is_void = True 1474 1475 # Replace SYSCALL_DEFINE with correct return type & function name 1476 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1477 1478 r = KernRe(r'long\s+(sys_.*?),') 1479 if r.search(proto): 1480 proto = KernRe(',').sub('(', proto, count=1) 1481 elif is_void: 1482 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1483 1484 # Now delete all of the odd-numbered commas in the proto 1485 # so that argument types & names don't have a comma between them 1486 count = 0 1487 length = len(proto) 1488 1489 if is_void: 1490 length = 0 # skip the loop if is_void 1491 1492 for ix in range(length): 1493 if proto[ix] == ',': 1494 count += 1 1495 if count % 2 == 1: 1496 proto = proto[:ix] + ' ' + proto[ix + 1:] 1497 1498 return proto 1499 1500 def tracepoint_munge(self, ln, proto): 1501 """ 1502 Handle tracepoint definitions 1503 """ 1504 1505 tracepointname = None 1506 tracepointargs = None 1507 1508 # Match tracepoint name based on different patterns 1509 r = KernRe(r'TRACE_EVENT\((.*?),') 1510 if r.search(proto): 1511 tracepointname = r.group(1) 1512 1513 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1514 if r.search(proto): 1515 tracepointname = r.group(1) 1516 1517 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1518 if r.search(proto): 1519 tracepointname = r.group(2) 1520 1521 if tracepointname: 1522 tracepointname = tracepointname.lstrip() 1523 1524 r = KernRe(r'TP_PROTO\((.*?)\)') 1525 if r.search(proto): 1526 tracepointargs = r.group(1) 1527 1528 if not tracepointname or not tracepointargs: 1529 self.emit_msg(ln, 1530 f"Unrecognized tracepoint format:\n{proto}\n") 1531 else: 1532 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1533 self.entry.identifier = f"trace_{self.entry.identifier}" 1534 1535 return proto 1536 1537 def process_proto_function(self, ln, line): 1538 """Ancillary routine to process a function prototype""" 1539 1540 # strip C99-style comments to end of line 1541 r = KernRe(r"\/\/.*$", re.S) 1542 line = r.sub('', line) 1543 1544 if KernRe(r'\s*#\s*define').match(line): 1545 self.entry.prototype = line 1546 elif line.startswith('#'): 1547 # Strip other macros like #ifdef/#ifndef/#endif/... 1548 pass 1549 else: 1550 r = KernRe(r'([^\{]*)') 1551 if r.match(line): 1552 self.entry.prototype += r.group(1) + " " 1553 1554 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1555 # strip comments 1556 r = KernRe(r'/\*.*?\*/') 1557 self.entry.prototype = r.sub('', self.entry.prototype) 1558 1559 # strip newlines/cr's 1560 r = KernRe(r'[\r\n]+') 1561 self.entry.prototype = r.sub(' ', self.entry.prototype) 1562 1563 # strip leading spaces 1564 r = KernRe(r'^\s+') 1565 self.entry.prototype = r.sub('', self.entry.prototype) 1566 1567 # Handle self.entry.prototypes for function pointers like: 1568 # int (*pcs_config)(struct foo) 1569 1570 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1571 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1572 1573 if 'SYSCALL_DEFINE' in self.entry.prototype: 1574 self.entry.prototype = self.syscall_munge(ln, 1575 self.entry.prototype) 1576 1577 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1578 if r.search(self.entry.prototype): 1579 self.entry.prototype = self.tracepoint_munge(ln, 1580 self.entry.prototype) 1581 1582 self.dump_function(ln, self.entry.prototype) 1583 self.reset_state(ln) 1584 1585 def process_proto_type(self, ln, line): 1586 """Ancillary routine to process a type""" 1587 1588 # Strip newlines/cr's. 1589 line = KernRe(r'[\r\n]+', re.S).sub(' ', line) 1590 1591 # Strip leading spaces 1592 line = KernRe(r'^\s+', re.S).sub('', line) 1593 1594 # Strip trailing spaces 1595 line = KernRe(r'\s+$', re.S).sub('', line) 1596 1597 # Strip C99-style comments to the end of the line 1598 line = KernRe(r"\/\/.*$", re.S).sub('', line) 1599 1600 # To distinguish preprocessor directive from regular declaration later. 1601 if line.startswith('#'): 1602 line += ";" 1603 1604 r = KernRe(r'([^\{\};]*)([\{\};])(.*)') 1605 while True: 1606 if r.search(line): 1607 if self.entry.prototype: 1608 self.entry.prototype += " " 1609 self.entry.prototype += r.group(1) + r.group(2) 1610 1611 self.entry.brcount += r.group(2).count('{') 1612 self.entry.brcount -= r.group(2).count('}') 1613 1614 self.entry.brcount = max(self.entry.brcount, 0) 1615 1616 if r.group(2) == ';' and self.entry.brcount == 0: 1617 self.dump_declaration(ln, self.entry.prototype) 1618 self.reset_state(ln) 1619 break 1620 1621 line = r.group(3) 1622 else: 1623 self.entry.prototype += line 1624 break 1625 1626 def process_proto(self, ln, line): 1627 """STATE_PROTO: reading a function/whatever prototype.""" 1628 1629 if doc_inline_oneline.search(line): 1630 self.entry.section = doc_inline_oneline.group(1) 1631 self.entry.contents = doc_inline_oneline.group(2) 1632 1633 if self.entry.contents != "": 1634 self.entry.contents += "\n" 1635 self.dump_section(start_new=False) 1636 1637 elif doc_inline_start.search(line): 1638 self.state = state.INLINE 1639 self.inline_doc_state = state.INLINE_NAME 1640 1641 elif self.entry.decl_type == 'function': 1642 self.process_proto_function(ln, line) 1643 1644 else: 1645 self.process_proto_type(ln, line) 1646 1647 def process_docblock(self, ln, line): 1648 """STATE_DOCBLOCK: within a DOC: block.""" 1649 1650 if doc_end.search(line): 1651 self.dump_section() 1652 self.output_declaration("doc", self.entry.identifier, 1653 sectionlist=self.entry.sectionlist, 1654 sections=self.entry.sections, 1655 section_start_lines=self.entry.section_start_lines) 1656 self.reset_state(ln) 1657 1658 elif doc_content.search(line): 1659 self.entry.contents += doc_content.group(1) + "\n" 1660 1661 def parse_export(self): 1662 """ 1663 Parses EXPORT_SYMBOL* macros from a single Kernel source file. 1664 """ 1665 1666 export_table = set() 1667 1668 try: 1669 with open(self.fname, "r", encoding="utf8", 1670 errors="backslashreplace") as fp: 1671 1672 for line in fp: 1673 self.process_export(export_table, line) 1674 1675 except IOError: 1676 return None 1677 1678 return export_table 1679 1680 # 1681 # The state/action table telling us which function to invoke in 1682 # each state. 1683 # 1684 state_actions = { 1685 state.NORMAL: process_normal, 1686 state.NAME: process_name, 1687 state.BODY: process_body, 1688 state.BODY_MAYBE: process_body, 1689 state.BODY_WITH_BLANK_LINE: process_body, 1690 state.INLINE: process_inline, 1691 state.PROTO: process_proto, 1692 state.DOCBLOCK: process_docblock, 1693 } 1694 1695 def parse_kdoc(self): 1696 """ 1697 Open and process each line of a C source file. 1698 The parsing is controlled via a state machine, and the line is passed 1699 to a different process function depending on the state. The process 1700 function may update the state as needed. 1701 1702 Besides parsing kernel-doc tags, it also parses export symbols. 1703 """ 1704 1705 prev = "" 1706 prev_ln = None 1707 export_table = set() 1708 1709 try: 1710 with open(self.fname, "r", encoding="utf8", 1711 errors="backslashreplace") as fp: 1712 for ln, line in enumerate(fp): 1713 1714 line = line.expandtabs().strip("\n") 1715 1716 # Group continuation lines on prototypes 1717 if self.state == state.PROTO: 1718 if line.endswith("\\"): 1719 prev += line.rstrip("\\") 1720 if not prev_ln: 1721 prev_ln = ln 1722 continue 1723 1724 if prev: 1725 ln = prev_ln 1726 line = prev + line 1727 prev = "" 1728 prev_ln = None 1729 1730 self.config.log.debug("%d %s%s: %s", 1731 ln, state.name[self.state], 1732 state.inline_name[self.inline_doc_state], 1733 line) 1734 1735 # This is an optimization over the original script. 1736 # There, when export_file was used for the same file, 1737 # it was read twice. Here, we use the already-existing 1738 # loop to parse exported symbols as well. 1739 # 1740 # TODO: It should be noticed that not all states are 1741 # needed here. On a future cleanup, process export only 1742 # at the states that aren't handling comment markups. 1743 self.process_export(export_table, line) 1744 1745 # Hand this line to the appropriate state handler 1746 self.state_actions[self.state](self, ln, line) 1747 1748 except OSError: 1749 self.config.log.error(f"Error: Cannot open file {self.fname}") 1750 1751 return export_table, self.entries 1752