1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8kdoc_parser 9=========== 10 11Read a C language source or header FILE and extract embedded 12documentation comments 13""" 14 15import sys 16import re 17from pprint import pformat 18 19from kdoc_re import NestedMatch, KernRe 20from kdoc_item import KdocItem 21 22# 23# Regular expressions used to parse kernel-doc markups at KernelDoc class. 24# 25# Let's declare them in lowercase outside any class to make easier to 26# convert from the python script. 27# 28# As those are evaluated at the beginning, no need to cache them 29# 30 31# Allow whitespace at end of comment start. 32doc_start = KernRe(r'^/\*\*\s*$', cache=False) 33 34doc_end = KernRe(r'\*/', cache=False) 35doc_com = KernRe(r'\s*\*\s*', cache=False) 36doc_com_body = KernRe(r'\s*\* ?', cache=False) 37doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 38 39# @params and a strictly limited set of supported section names 40# Specifically: 41# Match @word: 42# @...: 43# @{section-name}: 44# while trying to not match literal block starts like "example::" 45# 46known_section_names = 'description|context|returns?|notes?|examples?' 47known_sections = KernRe(known_section_names, flags = re.I) 48doc_sect = doc_com + \ 49 KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', 50 flags=re.I, cache=False) 51 52doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 53doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 54doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 55doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 56doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) 57 58export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 59export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 60 61type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 62 63# 64# Tests for the beginning of a kerneldoc block in its various forms. 65# 66doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 67doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) 68doc_begin_func = KernRe(str(doc_com) + # initial " * ' 69 r"(?:\w+\s*\*\s*)?" + # type (not captured) 70 r'(?:define\s+)?' + # possible "define" (not captured) 71 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 72 r'(?:[-:].*)?$', # description (not captured) 73 cache = False) 74 75# 76# Here begins a long set of transformations to turn structure member prefixes 77# and macro invocations into something we can parse and generate kdoc for. 78# 79struct_args_pattern = r'([^,)]+)' 80 81struct_xforms = [ 82 # Strip attributes 83 (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), 84 (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), 85 (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), 86 (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), 87 (KernRe(r'\s*__packed\s*', re.S), ' '), 88 (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), 89 (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), 90 (KernRe(r'\s*____cacheline_aligned', re.S), ' '), 91 (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), 92 # 93 # Unwrap struct_group macros based on this definition: 94 # __struct_group(TAG, NAME, ATTRS, MEMBERS...) 95 # which has variants like: struct_group(NAME, MEMBERS...) 96 # Only MEMBERS arguments require documentation. 97 # 98 # Parsing them happens on two steps: 99 # 100 # 1. drop struct group arguments that aren't at MEMBERS, 101 # storing them as STRUCT_GROUP(MEMBERS) 102 # 103 # 2. remove STRUCT_GROUP() ancillary macro. 104 # 105 # The original logic used to remove STRUCT_GROUP() using an 106 # advanced regex: 107 # 108 # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; 109 # 110 # with two patterns that are incompatible with 111 # Python re module, as it has: 112 # 113 # - a recursive pattern: (?1) 114 # - an atomic grouping: (?>...) 115 # 116 # I tried a simpler version: but it didn't work either: 117 # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; 118 # 119 # As it doesn't properly match the end parenthesis on some cases. 120 # 121 # So, a better solution was crafted: there's now a NestedMatch 122 # class that ensures that delimiters after a search are properly 123 # matched. So, the implementation to drop STRUCT_GROUP() will be 124 # handled in separate. 125 # 126 (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), 127 (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), 128 (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), 129 (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), 130 # 131 # Replace macros 132 # 133 # TODO: use NestedMatch for FOO($1, $2, ...) matches 134 # 135 # it is better to also move those to the NestedMatch logic, 136 # to ensure that parenthesis will be properly matched. 137 # 138 (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), 139 r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), 140 (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), 141 r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), 142 (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', 143 re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), 144 (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', 145 re.S), r'unsigned long \1[1 << ((\2) - 1)]'), 146 (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + 147 r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), 148 (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + 149 struct_args_pattern + r'\)', re.S), r'\2 *\1'), 150 (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + 151 struct_args_pattern + r'\)', re.S), r'\1 \2[]'), 152 (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), 153 (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), 154] 155# 156# Regexes here are guaranteed to have the end limiter matching 157# the start delimiter. Yet, right now, only one replace group 158# is allowed. 159# 160struct_nested_prefixes = [ 161 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), 162] 163 164# 165# Transforms for function prototypes 166# 167function_xforms = [ 168 (KernRe(r"^static +"), ""), 169 (KernRe(r"^extern +"), ""), 170 (KernRe(r"^asmlinkage +"), ""), 171 (KernRe(r"^inline +"), ""), 172 (KernRe(r"^__inline__ +"), ""), 173 (KernRe(r"^__inline +"), ""), 174 (KernRe(r"^__always_inline +"), ""), 175 (KernRe(r"^noinline +"), ""), 176 (KernRe(r"^__FORTIFY_INLINE +"), ""), 177 (KernRe(r"__init +"), ""), 178 (KernRe(r"__init_or_module +"), ""), 179 (KernRe(r"__deprecated +"), ""), 180 (KernRe(r"__flatten +"), ""), 181 (KernRe(r"__meminit +"), ""), 182 (KernRe(r"__must_check +"), ""), 183 (KernRe(r"__weak +"), ""), 184 (KernRe(r"__sched +"), ""), 185 (KernRe(r"_noprof"), ""), 186 (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), 187 (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), 188 (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), 189 (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), 190 (KernRe(r"__attribute_const__ +"), ""), 191 (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), 192] 193 194# 195# Apply a set of transforms to a block of text. 196# 197def apply_transforms(xforms, text): 198 for search, subst in xforms: 199 text = search.sub(subst, text) 200 return text 201 202# 203# A little helper to get rid of excess white space 204# 205multi_space = KernRe(r'\s\s+') 206def trim_whitespace(s): 207 return multi_space.sub(' ', s.strip()) 208 209# 210# Remove struct/enum members that have been marked "private". 211# 212def trim_private_members(text): 213 # 214 # First look for a "public:" block that ends a private region, then 215 # handle the "private until the end" case. 216 # 217 text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) 218 text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) 219 # 220 # We needed the comments to do the above, but now we can take them out. 221 # 222 return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() 223 224class state: 225 """ 226 State machine enums 227 """ 228 229 # Parser states 230 NORMAL = 0 # normal code 231 NAME = 1 # looking for function name 232 DECLARATION = 2 # We have seen a declaration which might not be done 233 BODY = 3 # the body of the comment 234 SPECIAL_SECTION = 4 # doc section ending with a blank line 235 PROTO = 5 # scanning prototype 236 DOCBLOCK = 6 # documentation block 237 INLINE_NAME = 7 # gathering doc outside main block 238 INLINE_TEXT = 8 # reading the body of inline docs 239 240 name = [ 241 "NORMAL", 242 "NAME", 243 "DECLARATION", 244 "BODY", 245 "SPECIAL_SECTION", 246 "PROTO", 247 "DOCBLOCK", 248 "INLINE_NAME", 249 "INLINE_TEXT", 250 ] 251 252 253SECTION_DEFAULT = "Description" # default section 254 255class KernelEntry: 256 257 def __init__(self, config, ln): 258 self.config = config 259 260 self._contents = [] 261 self.prototype = "" 262 263 self.warnings = [] 264 265 self.parameterlist = [] 266 self.parameterdescs = {} 267 self.parametertypes = {} 268 self.parameterdesc_start_lines = {} 269 270 self.section_start_lines = {} 271 self.sections = {} 272 273 self.anon_struct_union = False 274 275 self.leading_space = None 276 277 # State flags 278 self.brcount = 0 279 self.declaration_start_line = ln + 1 280 281 # 282 # Management of section contents 283 # 284 def add_text(self, text): 285 self._contents.append(text) 286 287 def contents(self): 288 return '\n'.join(self._contents) + '\n' 289 290 # TODO: rename to emit_message after removal of kernel-doc.pl 291 def emit_msg(self, log_msg, warning=True): 292 """Emit a message""" 293 294 if not warning: 295 self.config.log.info(log_msg) 296 return 297 298 # Delegate warning output to output logic, as this way it 299 # will report warnings/info only for symbols that are output 300 301 self.warnings.append(log_msg) 302 return 303 304 # 305 # Begin a new section. 306 # 307 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): 308 if dump: 309 self.dump_section(start_new = True) 310 self.section = title 311 self.new_start_line = line_no 312 313 def dump_section(self, start_new=True): 314 """ 315 Dumps section contents to arrays/hashes intended for that purpose. 316 """ 317 # 318 # If we have accumulated no contents in the default ("description") 319 # section, don't bother. 320 # 321 if self.section == SECTION_DEFAULT and not self._contents: 322 return 323 name = self.section 324 contents = self.contents() 325 326 if type_param.match(name): 327 name = type_param.group(1) 328 329 self.parameterdescs[name] = contents 330 self.parameterdesc_start_lines[name] = self.new_start_line 331 332 self.new_start_line = 0 333 334 else: 335 if name in self.sections and self.sections[name] != "": 336 # Only warn on user-specified duplicate section names 337 if name != SECTION_DEFAULT: 338 self.emit_msg(self.new_start_line, 339 f"duplicate section name '{name}'\n") 340 # Treat as a new paragraph - add a blank line 341 self.sections[name] += '\n' + contents 342 else: 343 self.sections[name] = contents 344 self.section_start_lines[name] = self.new_start_line 345 self.new_start_line = 0 346 347# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 348 349 if start_new: 350 self.section = SECTION_DEFAULT 351 self._contents = [] 352 353 354class KernelDoc: 355 """ 356 Read a C language source or header FILE and extract embedded 357 documentation comments. 358 """ 359 360 # Section names 361 362 section_context = "Context" 363 section_return = "Return" 364 365 undescribed = "-- undescribed --" 366 367 def __init__(self, config, fname): 368 """Initialize internal variables""" 369 370 self.fname = fname 371 self.config = config 372 373 # Initial state for the state machines 374 self.state = state.NORMAL 375 376 # Store entry currently being processed 377 self.entry = None 378 379 # Place all potential outputs into an array 380 self.entries = [] 381 382 # 383 # We need Python 3.7 for its "dicts remember the insertion 384 # order" guarantee 385 # 386 if sys.version_info.major == 3 and sys.version_info.minor < 7: 387 self.emit_msg(0, 388 'Python 3.7 or later is required for correct results') 389 390 def emit_msg(self, ln, msg, warning=True): 391 """Emit a message""" 392 393 log_msg = f"{self.fname}:{ln} {msg}" 394 395 if self.entry: 396 self.entry.emit_msg(log_msg, warning) 397 return 398 399 if warning: 400 self.config.log.warning(log_msg) 401 else: 402 self.config.log.info(log_msg) 403 404 def dump_section(self, start_new=True): 405 """ 406 Dumps section contents to arrays/hashes intended for that purpose. 407 """ 408 409 if self.entry: 410 self.entry.dump_section(start_new) 411 412 # TODO: rename it to store_declaration after removal of kernel-doc.pl 413 def output_declaration(self, dtype, name, **args): 414 """ 415 Stores the entry into an entry array. 416 417 The actual output and output filters will be handled elsewhere 418 """ 419 420 item = KdocItem(name, dtype, self.entry.declaration_start_line, **args) 421 item.warnings = self.entry.warnings 422 423 # Drop empty sections 424 # TODO: improve empty sections logic to emit warnings 425 sections = self.entry.sections 426 for section in ["Description", "Return"]: 427 if section in sections and not sections[section].rstrip(): 428 del sections[section] 429 item.set_sections(sections, self.entry.section_start_lines) 430 item.set_params(self.entry.parameterlist, self.entry.parameterdescs, 431 self.entry.parametertypes, 432 self.entry.parameterdesc_start_lines) 433 self.entries.append(item) 434 435 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 436 437 def reset_state(self, ln): 438 """ 439 Ancillary routine to create a new entry. It initializes all 440 variables used by the state machine. 441 """ 442 443 self.entry = KernelEntry(self.config, ln) 444 445 # State flags 446 self.state = state.NORMAL 447 448 def push_parameter(self, ln, decl_type, param, dtype, 449 org_arg, declaration_name): 450 """ 451 Store parameters and their descriptions at self.entry. 452 """ 453 454 if self.entry.anon_struct_union and dtype == "" and param == "}": 455 return # Ignore the ending }; from anonymous struct/union 456 457 self.entry.anon_struct_union = False 458 459 param = KernRe(r'[\[\)].*').sub('', param, count=1) 460 461 # 462 # Look at various "anonymous type" cases. 463 # 464 if dtype == '': 465 if param.endswith("..."): 466 if len(param) > 3: # there is a name provided, use that 467 param = param[:-3] 468 if not self.entry.parameterdescs.get(param): 469 self.entry.parameterdescs[param] = "variable arguments" 470 471 elif (not param) or param == "void": 472 param = "void" 473 self.entry.parameterdescs[param] = "no arguments" 474 475 elif param in ["struct", "union"]: 476 # Handle unnamed (anonymous) union or struct 477 dtype = param 478 param = "{unnamed_" + param + "}" 479 self.entry.parameterdescs[param] = "anonymous\n" 480 self.entry.anon_struct_union = True 481 482 # Warn if parameter has no description 483 # (but ignore ones starting with # as these are not parameters 484 # but inline preprocessor statements) 485 if param not in self.entry.parameterdescs and not param.startswith("#"): 486 self.entry.parameterdescs[param] = self.undescribed 487 488 if "." not in param: 489 if decl_type == 'function': 490 dname = f"{decl_type} parameter" 491 else: 492 dname = f"{decl_type} member" 493 494 self.emit_msg(ln, 495 f"{dname} '{param}' not described in '{declaration_name}'") 496 497 # Strip spaces from param so that it is one continuous string on 498 # parameterlist. This fixes a problem where check_sections() 499 # cannot find a parameter like "addr[6 + 2]" because it actually 500 # appears as "addr[6", "+", "2]" on the parameter list. 501 # However, it's better to maintain the param string unchanged for 502 # output, so just weaken the string compare in check_sections() 503 # to ignore "[blah" in a parameter string. 504 505 self.entry.parameterlist.append(param) 506 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 507 self.entry.parametertypes[param] = org_arg 508 509 510 def create_parameter_list(self, ln, decl_type, args, 511 splitter, declaration_name): 512 """ 513 Creates a list of parameters, storing them at self.entry. 514 """ 515 516 # temporarily replace all commas inside function pointer definition 517 arg_expr = KernRe(r'(\([^\),]+),') 518 while arg_expr.search(args): 519 args = arg_expr.sub(r"\1#", args) 520 521 for arg in args.split(splitter): 522 # Ignore argument attributes 523 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 524 525 # Strip leading/trailing spaces 526 arg = arg.strip() 527 arg = KernRe(r'\s+').sub(' ', arg, count=1) 528 529 if arg.startswith('#'): 530 # Treat preprocessor directive as a typeless variable just to fill 531 # corresponding data structures "correctly". Catch it later in 532 # output_* subs. 533 534 # Treat preprocessor directive as a typeless variable 535 self.push_parameter(ln, decl_type, arg, "", 536 "", declaration_name) 537 # 538 # The pointer-to-function case. 539 # 540 elif KernRe(r'\(.+\)\s*\(').search(arg): 541 arg = arg.replace('#', ',') 542 r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" 543 r'([\w\[\].]*)' # Capture the name and possible [array] 544 r'\s*\)') # Make sure the trailing ")" is there 545 if r.match(arg): 546 param = r.group(1) 547 else: 548 self.emit_msg(ln, f"Invalid param: {arg}") 549 param = arg 550 dtype = arg.replace(param, '') 551 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 552 # 553 # The array-of-pointers case. Dig the parameter name out from the middle 554 # of the declaration. 555 # 556 elif KernRe(r'\(.+\)\s*\[').search(arg): 557 r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" 558 r'([\w.]*?)' # The actual pointer name 559 r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] 560 if r.match(arg): 561 param = r.group(1) 562 else: 563 self.emit_msg(ln, f"Invalid param: {arg}") 564 param = arg 565 dtype = arg.replace(param, '') 566 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 567 elif arg: 568 # 569 # Clean up extraneous spaces and split the string at commas; the first 570 # element of the resulting list will also include the type information. 571 # 572 arg = KernRe(r'\s*:\s*').sub(":", arg) 573 arg = KernRe(r'\s*\[').sub('[', arg) 574 args = KernRe(r'\s*,\s*').split(arg) 575 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 576 # 577 # args[0] has a string of "type a". If "a" includes an [array] 578 # declaration, we want to not be fooled by any white space inside 579 # the brackets, so detect and handle that case specially. 580 # 581 r = KernRe(r'^([^[\]]*\s+)(.*)$') 582 if r.match(args[0]): 583 args[0] = r.group(2) 584 dtype = r.group(1) 585 else: 586 # No space in args[0]; this seems wrong but preserves previous behavior 587 dtype = '' 588 589 bitfield_re = KernRe(r'(.*?):(\w+)') 590 for param in args: 591 # 592 # For pointers, shift the star(s) from the variable name to the 593 # type declaration. 594 # 595 r = KernRe(r'^(\*+)\s*(.*)') 596 if r.match(param): 597 self.push_parameter(ln, decl_type, r.group(2), 598 f"{dtype} {r.group(1)}", 599 arg, declaration_name) 600 # 601 # Perform a similar shift for bitfields. 602 # 603 elif bitfield_re.search(param): 604 if dtype != "": # Skip unnamed bit-fields 605 self.push_parameter(ln, decl_type, bitfield_re.group(1), 606 f"{dtype}:{bitfield_re.group(2)}", 607 arg, declaration_name) 608 else: 609 self.push_parameter(ln, decl_type, param, dtype, 610 arg, declaration_name) 611 612 def check_sections(self, ln, decl_name, decl_type): 613 """ 614 Check for errors inside sections, emitting warnings if not found 615 parameters are described. 616 """ 617 for section in self.entry.sections: 618 if section not in self.entry.parameterlist and \ 619 not known_sections.search(section): 620 if decl_type == 'function': 621 dname = f"{decl_type} parameter" 622 else: 623 dname = f"{decl_type} member" 624 self.emit_msg(ln, 625 f"Excess {dname} '{section}' description in '{decl_name}'") 626 627 def check_return_section(self, ln, declaration_name, return_type): 628 """ 629 If the function doesn't return void, warns about the lack of a 630 return description. 631 """ 632 633 if not self.config.wreturn: 634 return 635 636 # Ignore an empty return type (It's a macro) 637 # Ignore functions with a "void" return type (but not "void *") 638 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 639 return 640 641 if not self.entry.sections.get("Return", None): 642 self.emit_msg(ln, 643 f"No description found for return value of '{declaration_name}'") 644 645 # 646 # Split apart a structure prototype; returns (struct|union, name, members) or None 647 # 648 def split_struct_proto(self, proto): 649 type_pattern = r'(struct|union)' 650 qualifiers = [ 651 "__attribute__", 652 "__packed", 653 "__aligned", 654 "____cacheline_aligned_in_smp", 655 "____cacheline_aligned", 656 ] 657 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 658 659 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 660 if r.search(proto): 661 return (r.group(1), r.group(2), r.group(3)) 662 else: 663 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 664 if r.search(proto): 665 return (r.group(1), r.group(3), r.group(2)) 666 return None 667 # 668 # Rewrite the members of a structure or union for easier formatting later on. 669 # Among other things, this function will turn a member like: 670 # 671 # struct { inner_members; } foo; 672 # 673 # into: 674 # 675 # struct foo; inner_members; 676 # 677 def rewrite_struct_members(self, members): 678 # 679 # Process struct/union members from the most deeply nested outward. The 680 # trick is in the ^{ below - it prevents a match of an outer struct/union 681 # until the inner one has been munged (removing the "{" in the process). 682 # 683 struct_members = KernRe(r'(struct|union)' # 0: declaration type 684 r'([^\{\};]+)' # 1: possible name 685 r'(\{)' 686 r'([^\{\}]*)' # 3: Contents of declaration 687 r'(\})' 688 r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration 689 tuples = struct_members.findall(members) 690 while tuples: 691 for t in tuples: 692 newmember = "" 693 oldmember = "".join(t) # Reconstruct the original formatting 694 dtype, name, lbr, content, rbr, rest, semi = t 695 # 696 # Pass through each field name, normalizing the form and formatting. 697 # 698 for s_id in rest.split(','): 699 s_id = s_id.strip() 700 newmember += f"{dtype} {s_id}; " 701 # 702 # Remove bitfield/array/pointer info, getting the bare name. 703 # 704 s_id = KernRe(r'[:\[].*').sub('', s_id) 705 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 706 # 707 # Pass through the members of this inner structure/union. 708 # 709 for arg in content.split(';'): 710 arg = arg.strip() 711 # 712 # Look for (type)(*name)(args) - pointer to function 713 # 714 r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') 715 if r.match(arg): 716 dtype, name, extra = r.group(1), r.group(2), r.group(3) 717 # Pointer-to-function 718 if not s_id: 719 # Anonymous struct/union 720 newmember += f"{dtype}{name}{extra}; " 721 else: 722 newmember += f"{dtype}{s_id}.{name}{extra}; " 723 # 724 # Otherwise a non-function member. 725 # 726 else: 727 # 728 # Remove bitmap and array portions and spaces around commas 729 # 730 arg = KernRe(r':\s*\d+\s*').sub('', arg) 731 arg = KernRe(r'\[.*\]').sub('', arg) 732 arg = KernRe(r'\s*,\s*').sub(',', arg) 733 # 734 # Look for a normal decl - "type name[,name...]" 735 # 736 r = KernRe(r'(.*)\s+([\S+,]+)') 737 if r.search(arg): 738 for name in r.group(2).split(','): 739 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) 740 if not s_id: 741 # Anonymous struct/union 742 newmember += f"{r.group(1)} {name}; " 743 else: 744 newmember += f"{r.group(1)} {s_id}.{name}; " 745 else: 746 newmember += f"{arg}; " 747 # 748 # At the end of the s_id loop, replace the original declaration with 749 # the munged version. 750 # 751 members = members.replace(oldmember, newmember) 752 # 753 # End of the tuple loop - search again and see if there are outer members 754 # that now turn up. 755 # 756 tuples = struct_members.findall(members) 757 return members 758 759 # 760 # Format the struct declaration into a standard form for inclusion in the 761 # resulting docs. 762 # 763 def format_struct_decl(self, declaration): 764 # 765 # Insert newlines, get rid of extra spaces. 766 # 767 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 768 declaration = KernRe(r'\}\s+;').sub('};', declaration) 769 # 770 # Format inline enums with each member on its own line. 771 # 772 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 773 while r.search(declaration): 774 declaration = r.sub(r'\1,\n\2', declaration) 775 # 776 # Now go through and supply the right number of tabs 777 # for each line. 778 # 779 def_args = declaration.split('\n') 780 level = 1 781 declaration = "" 782 for clause in def_args: 783 clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) 784 if clause: 785 if '}' in clause and level > 1: 786 level -= 1 787 if not clause.startswith('#'): 788 declaration += "\t" * level 789 declaration += "\t" + clause + "\n" 790 if "{" in clause and "}" not in clause: 791 level += 1 792 return declaration 793 794 795 def dump_struct(self, ln, proto): 796 """ 797 Store an entry for an struct or union 798 """ 799 # 800 # Do the basic parse to get the pieces of the declaration. 801 # 802 struct_parts = self.split_struct_proto(proto) 803 if not struct_parts: 804 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 805 return 806 decl_type, declaration_name, members = struct_parts 807 808 if self.entry.identifier != declaration_name: 809 self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " 810 f"Prototype was for {decl_type} {declaration_name} instead\n") 811 return 812 # 813 # Go through the list of members applying all of our transformations. 814 # 815 members = trim_private_members(members) 816 members = apply_transforms(struct_xforms, members) 817 818 nested = NestedMatch() 819 for search, sub in struct_nested_prefixes: 820 members = nested.sub(search, sub, members) 821 # 822 # Deal with embedded struct and union members, and drop enums entirely. 823 # 824 declaration = members 825 members = self.rewrite_struct_members(members) 826 members = re.sub(r'(\{[^\{\}]*\})', '', members) 827 # 828 # Output the result and we are done. 829 # 830 self.create_parameter_list(ln, decl_type, members, ';', 831 declaration_name) 832 self.check_sections(ln, declaration_name, decl_type) 833 self.output_declaration(decl_type, declaration_name, 834 definition=self.format_struct_decl(declaration), 835 purpose=self.entry.declaration_purpose) 836 837 def dump_enum(self, ln, proto): 838 """ 839 Stores an enum inside self.entries array. 840 """ 841 # 842 # Strip preprocessor directives. Note that this depends on the 843 # trailing semicolon we added in process_proto_type(). 844 # 845 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 846 # 847 # Parse out the name and members of the enum. Typedef form first. 848 # 849 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 850 if r.search(proto): 851 declaration_name = r.group(2) 852 members = trim_private_members(r.group(1)) 853 # 854 # Failing that, look for a straight enum 855 # 856 else: 857 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 858 if r.match(proto): 859 declaration_name = r.group(1) 860 members = trim_private_members(r.group(2)) 861 # 862 # OK, this isn't going to work. 863 # 864 else: 865 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 866 return 867 # 868 # Make sure we found what we were expecting. 869 # 870 if self.entry.identifier != declaration_name: 871 if self.entry.identifier == "": 872 self.emit_msg(ln, 873 f"{proto}: wrong kernel-doc identifier on prototype") 874 else: 875 self.emit_msg(ln, 876 f"expecting prototype for enum {self.entry.identifier}. " 877 f"Prototype was for enum {declaration_name} instead") 878 return 879 880 if not declaration_name: 881 declaration_name = "(anonymous)" 882 # 883 # Parse out the name of each enum member, and verify that we 884 # have a description for it. 885 # 886 member_set = set() 887 members = KernRe(r'\([^;)]*\)').sub('', members) 888 for arg in members.split(','): 889 if not arg: 890 continue 891 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 892 self.entry.parameterlist.append(arg) 893 if arg not in self.entry.parameterdescs: 894 self.entry.parameterdescs[arg] = self.undescribed 895 self.emit_msg(ln, 896 f"Enum value '{arg}' not described in enum '{declaration_name}'") 897 member_set.add(arg) 898 # 899 # Ensure that every described member actually exists in the enum. 900 # 901 for k in self.entry.parameterdescs: 902 if k not in member_set: 903 self.emit_msg(ln, 904 f"Excess enum value '%{k}' description in '{declaration_name}'") 905 906 self.output_declaration('enum', declaration_name, 907 purpose=self.entry.declaration_purpose) 908 909 def dump_declaration(self, ln, prototype): 910 """ 911 Stores a data declaration inside self.entries array. 912 """ 913 914 if self.entry.decl_type == "enum": 915 self.dump_enum(ln, prototype) 916 elif self.entry.decl_type == "typedef": 917 self.dump_typedef(ln, prototype) 918 elif self.entry.decl_type in ["union", "struct"]: 919 self.dump_struct(ln, prototype) 920 else: 921 # This would be a bug 922 self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') 923 924 def dump_function(self, ln, prototype): 925 """ 926 Stores a function of function macro inside self.entries array. 927 """ 928 929 found = func_macro = False 930 return_type = '' 931 decl_type = 'function' 932 # 933 # Apply the initial transformations. 934 # 935 prototype = apply_transforms(function_xforms, prototype) 936 # 937 # If we have a macro, remove the "#define" at the front. 938 # 939 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 940 if new_proto != prototype: 941 prototype = new_proto 942 # 943 # Dispense with the simple "#define A B" case here; the key 944 # is the space after the name of the symbol being defined. 945 # NOTE that the seemingly misnamed "func_macro" indicates a 946 # macro *without* arguments. 947 # 948 r = KernRe(r'^(\w+)\s+') 949 if r.search(prototype): 950 return_type = '' 951 declaration_name = r.group(1) 952 func_macro = True 953 found = True 954 955 # Yes, this truly is vile. We are looking for: 956 # 1. Return type (may be nothing if we're looking at a macro) 957 # 2. Function name 958 # 3. Function parameters. 959 # 960 # All the while we have to watch out for function pointer parameters 961 # (which IIRC is what the two sections are for), C types (these 962 # regexps don't even start to express all the possibilities), and 963 # so on. 964 # 965 # If you mess with these regexps, it's a good idea to check that 966 # the following functions' documentation still comes out right: 967 # - parport_register_device (function pointer parameters) 968 # - atomic_set (macro) 969 # - pci_match_device, __copy_to_user (long return type) 970 971 name = r'\w+' 972 type1 = r'(?:[\w\s]+)?' 973 type2 = r'(?:[\w\s]+\*+)+' 974 # 975 # Attempt to match first on (args) with no internal parentheses; this 976 # lets us easily filter out __acquires() and other post-args stuff. If 977 # that fails, just grab the rest of the line to the last closing 978 # parenthesis. 979 # 980 proto_args = r'\(([^\(]*|.*)\)' 981 # 982 # (Except for the simple macro case) attempt to split up the prototype 983 # in the various ways we understand. 984 # 985 if not found: 986 patterns = [ 987 rf'^()({name})\s*{proto_args}', 988 rf'^({type1})\s+({name})\s*{proto_args}', 989 rf'^({type2})\s*({name})\s*{proto_args}', 990 ] 991 992 for p in patterns: 993 r = KernRe(p) 994 if r.match(prototype): 995 return_type = r.group(1) 996 declaration_name = r.group(2) 997 args = r.group(3) 998 self.create_parameter_list(ln, decl_type, args, ',', 999 declaration_name) 1000 found = True 1001 break 1002 # 1003 # Parsing done; make sure that things are as we expect. 1004 # 1005 if not found: 1006 self.emit_msg(ln, 1007 f"cannot understand function prototype: '{prototype}'") 1008 return 1009 if self.entry.identifier != declaration_name: 1010 self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " 1011 f"Prototype was for {declaration_name}() instead") 1012 return 1013 self.check_sections(ln, declaration_name, "function") 1014 self.check_return_section(ln, declaration_name, return_type) 1015 # 1016 # Store the result. 1017 # 1018 self.output_declaration(decl_type, declaration_name, 1019 typedef=('typedef' in return_type), 1020 functiontype=return_type, 1021 purpose=self.entry.declaration_purpose, 1022 func_macro=func_macro) 1023 1024 1025 def dump_typedef(self, ln, proto): 1026 """ 1027 Stores a typedef inside self.entries array. 1028 """ 1029 # 1030 # We start by looking for function typedefs. 1031 # 1032 typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1033 typedef_ident = r'\*?\s*(\w\S+)\s*' 1034 typedef_args = r'\s*\((.*)\);' 1035 1036 typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1037 typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) 1038 1039 # Parse function typedef prototypes 1040 for r in [typedef1, typedef2]: 1041 if not r.match(proto): 1042 continue 1043 1044 return_type = r.group(1).strip() 1045 declaration_name = r.group(2) 1046 args = r.group(3) 1047 1048 if self.entry.identifier != declaration_name: 1049 self.emit_msg(ln, 1050 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1051 return 1052 1053 self.create_parameter_list(ln, 'function', args, ',', declaration_name) 1054 1055 self.output_declaration('function', declaration_name, 1056 typedef=True, 1057 functiontype=return_type, 1058 purpose=self.entry.declaration_purpose) 1059 return 1060 # 1061 # Not a function, try to parse a simple typedef. 1062 # 1063 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1064 if r.match(proto): 1065 declaration_name = r.group(1) 1066 1067 if self.entry.identifier != declaration_name: 1068 self.emit_msg(ln, 1069 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1070 return 1071 1072 self.output_declaration('typedef', declaration_name, 1073 purpose=self.entry.declaration_purpose) 1074 return 1075 1076 self.emit_msg(ln, "error: Cannot parse typedef!") 1077 1078 @staticmethod 1079 def process_export(function_set, line): 1080 """ 1081 process EXPORT_SYMBOL* tags 1082 1083 This method doesn't use any variable from the class, so declare it 1084 with a staticmethod decorator. 1085 """ 1086 1087 # We support documenting some exported symbols with different 1088 # names. A horrible hack. 1089 suffixes = [ '_noprof' ] 1090 1091 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1092 # multiple export lines would violate Kernel coding style. 1093 1094 if export_symbol.search(line): 1095 symbol = export_symbol.group(2) 1096 elif export_symbol_ns.search(line): 1097 symbol = export_symbol_ns.group(2) 1098 else: 1099 return False 1100 # 1101 # Found an export, trim out any special suffixes 1102 # 1103 for suffix in suffixes: 1104 # Be backward compatible with Python < 3.9 1105 if symbol.endswith(suffix): 1106 symbol = symbol[:-len(suffix)] 1107 function_set.add(symbol) 1108 return True 1109 1110 def process_normal(self, ln, line): 1111 """ 1112 STATE_NORMAL: looking for the /** to begin everything. 1113 """ 1114 1115 if not doc_start.match(line): 1116 return 1117 1118 # start a new entry 1119 self.reset_state(ln) 1120 1121 # next line is always the function name 1122 self.state = state.NAME 1123 1124 def process_name(self, ln, line): 1125 """ 1126 STATE_NAME: Looking for the "name - description" line 1127 """ 1128 # 1129 # Check for a DOC: block and handle them specially. 1130 # 1131 if doc_block.search(line): 1132 1133 if not doc_block.group(1): 1134 self.entry.begin_section(ln, "Introduction") 1135 else: 1136 self.entry.begin_section(ln, doc_block.group(1)) 1137 1138 self.entry.identifier = self.entry.section 1139 self.state = state.DOCBLOCK 1140 # 1141 # Otherwise we're looking for a normal kerneldoc declaration line. 1142 # 1143 elif doc_decl.search(line): 1144 self.entry.identifier = doc_decl.group(1) 1145 1146 # Test for data declaration 1147 if doc_begin_data.search(line): 1148 self.entry.decl_type = doc_begin_data.group(1) 1149 self.entry.identifier = doc_begin_data.group(2) 1150 # 1151 # Look for a function description 1152 # 1153 elif doc_begin_func.search(line): 1154 self.entry.identifier = doc_begin_func.group(1) 1155 self.entry.decl_type = "function" 1156 # 1157 # We struck out. 1158 # 1159 else: 1160 self.emit_msg(ln, 1161 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") 1162 self.state = state.NORMAL 1163 return 1164 # 1165 # OK, set up for a new kerneldoc entry. 1166 # 1167 self.state = state.BODY 1168 self.entry.identifier = self.entry.identifier.strip(" ") 1169 # if there's no @param blocks need to set up default section here 1170 self.entry.begin_section(ln + 1) 1171 # 1172 # Find the description portion, which *should* be there but 1173 # isn't always. 1174 # (We should be able to capture this from the previous parsing - someday) 1175 # 1176 r = KernRe("[-:](.*)") 1177 if r.search(line): 1178 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1179 self.state = state.DECLARATION 1180 else: 1181 self.entry.declaration_purpose = "" 1182 1183 if not self.entry.declaration_purpose and self.config.wshort_desc: 1184 self.emit_msg(ln, 1185 f"missing initial short description on line:\n{line}") 1186 1187 if not self.entry.identifier and self.entry.decl_type != "enum": 1188 self.emit_msg(ln, 1189 f"wrong kernel-doc identifier on line:\n{line}") 1190 self.state = state.NORMAL 1191 1192 if self.config.verbose: 1193 self.emit_msg(ln, 1194 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1195 warning=False) 1196 # 1197 # Failed to find an identifier. Emit a warning 1198 # 1199 else: 1200 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1201 1202 # 1203 # Helper function to determine if a new section is being started. 1204 # 1205 def is_new_section(self, ln, line): 1206 if doc_sect.search(line): 1207 self.state = state.BODY 1208 # 1209 # Pick out the name of our new section, tweaking it if need be. 1210 # 1211 newsection = doc_sect.group(1) 1212 if newsection.lower() == 'description': 1213 newsection = 'Description' 1214 elif newsection.lower() == 'context': 1215 newsection = 'Context' 1216 self.state = state.SPECIAL_SECTION 1217 elif newsection.lower() in ["@return", "@returns", 1218 "return", "returns"]: 1219 newsection = "Return" 1220 self.state = state.SPECIAL_SECTION 1221 elif newsection[0] == '@': 1222 self.state = state.SPECIAL_SECTION 1223 # 1224 # Initialize the contents, and get the new section going. 1225 # 1226 newcontents = doc_sect.group(2) 1227 if not newcontents: 1228 newcontents = "" 1229 self.dump_section() 1230 self.entry.begin_section(ln, newsection) 1231 self.entry.leading_space = None 1232 1233 self.entry.add_text(newcontents.lstrip()) 1234 return True 1235 return False 1236 1237 # 1238 # Helper function to detect (and effect) the end of a kerneldoc comment. 1239 # 1240 def is_comment_end(self, ln, line): 1241 if doc_end.search(line): 1242 self.dump_section() 1243 1244 # Look for doc_com + <text> + doc_end: 1245 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') 1246 if r.match(line): 1247 self.emit_msg(ln, f"suspicious ending line: {line}") 1248 1249 self.entry.prototype = "" 1250 self.entry.new_start_line = ln + 1 1251 1252 self.state = state.PROTO 1253 return True 1254 return False 1255 1256 1257 def process_decl(self, ln, line): 1258 """ 1259 STATE_DECLARATION: We've seen the beginning of a declaration 1260 """ 1261 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1262 return 1263 # 1264 # Look for anything with the " * " line beginning. 1265 # 1266 if doc_content.search(line): 1267 cont = doc_content.group(1) 1268 # 1269 # A blank line means that we have moved out of the declaration 1270 # part of the comment (without any "special section" parameter 1271 # descriptions). 1272 # 1273 if cont == "": 1274 self.state = state.BODY 1275 # 1276 # Otherwise we have more of the declaration section to soak up. 1277 # 1278 else: 1279 self.entry.declaration_purpose = \ 1280 trim_whitespace(self.entry.declaration_purpose + ' ' + cont) 1281 else: 1282 # Unknown line, ignore 1283 self.emit_msg(ln, f"bad line: {line}") 1284 1285 1286 def process_special(self, ln, line): 1287 """ 1288 STATE_SPECIAL_SECTION: a section ending with a blank line 1289 """ 1290 # 1291 # If we have hit a blank line (only the " * " marker), then this 1292 # section is done. 1293 # 1294 if KernRe(r"\s*\*\s*$").match(line): 1295 self.entry.begin_section(ln, dump = True) 1296 self.state = state.BODY 1297 return 1298 # 1299 # Not a blank line, look for the other ways to end the section. 1300 # 1301 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1302 return 1303 # 1304 # OK, we should have a continuation of the text for this section. 1305 # 1306 if doc_content.search(line): 1307 cont = doc_content.group(1) 1308 # 1309 # If the lines of text after the first in a special section have 1310 # leading white space, we need to trim it out or Sphinx will get 1311 # confused. For the second line (the None case), see what we 1312 # find there and remember it. 1313 # 1314 if self.entry.leading_space is None: 1315 r = KernRe(r'^(\s+)') 1316 if r.match(cont): 1317 self.entry.leading_space = len(r.group(1)) 1318 else: 1319 self.entry.leading_space = 0 1320 # 1321 # Otherwise, before trimming any leading chars, be *sure* 1322 # that they are white space. We should maybe warn if this 1323 # isn't the case. 1324 # 1325 for i in range(0, self.entry.leading_space): 1326 if cont[i] != " ": 1327 self.entry.leading_space = i 1328 break 1329 # 1330 # Add the trimmed result to the section and we're done. 1331 # 1332 self.entry.add_text(cont[self.entry.leading_space:]) 1333 else: 1334 # Unknown line, ignore 1335 self.emit_msg(ln, f"bad line: {line}") 1336 1337 def process_body(self, ln, line): 1338 """ 1339 STATE_BODY: the bulk of a kerneldoc comment. 1340 """ 1341 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1342 return 1343 1344 if doc_content.search(line): 1345 cont = doc_content.group(1) 1346 self.entry.add_text(cont) 1347 else: 1348 # Unknown line, ignore 1349 self.emit_msg(ln, f"bad line: {line}") 1350 1351 def process_inline_name(self, ln, line): 1352 """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" 1353 1354 if doc_inline_sect.search(line): 1355 self.entry.begin_section(ln, doc_inline_sect.group(1)) 1356 self.entry.add_text(doc_inline_sect.group(2).lstrip()) 1357 self.state = state.INLINE_TEXT 1358 elif doc_inline_end.search(line): 1359 self.dump_section() 1360 self.state = state.PROTO 1361 elif doc_content.search(line): 1362 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") 1363 self.state = state.PROTO 1364 # else ... ?? 1365 1366 def process_inline_text(self, ln, line): 1367 """STATE_INLINE_TEXT: docbook comments within a prototype.""" 1368 1369 if doc_inline_end.search(line): 1370 self.dump_section() 1371 self.state = state.PROTO 1372 elif doc_content.search(line): 1373 self.entry.add_text(doc_content.group(1)) 1374 # else ... ?? 1375 1376 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1377 """ 1378 Handle syscall definitions 1379 """ 1380 1381 is_void = False 1382 1383 # Strip newlines/CR's 1384 proto = re.sub(r'[\r\n]+', ' ', proto) 1385 1386 # Check if it's a SYSCALL_DEFINE0 1387 if 'SYSCALL_DEFINE0' in proto: 1388 is_void = True 1389 1390 # Replace SYSCALL_DEFINE with correct return type & function name 1391 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1392 1393 r = KernRe(r'long\s+(sys_.*?),') 1394 if r.search(proto): 1395 proto = KernRe(',').sub('(', proto, count=1) 1396 elif is_void: 1397 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1398 1399 # Now delete all of the odd-numbered commas in the proto 1400 # so that argument types & names don't have a comma between them 1401 count = 0 1402 length = len(proto) 1403 1404 if is_void: 1405 length = 0 # skip the loop if is_void 1406 1407 for ix in range(length): 1408 if proto[ix] == ',': 1409 count += 1 1410 if count % 2 == 1: 1411 proto = proto[:ix] + ' ' + proto[ix + 1:] 1412 1413 return proto 1414 1415 def tracepoint_munge(self, ln, proto): 1416 """ 1417 Handle tracepoint definitions 1418 """ 1419 1420 tracepointname = None 1421 tracepointargs = None 1422 1423 # Match tracepoint name based on different patterns 1424 r = KernRe(r'TRACE_EVENT\((.*?),') 1425 if r.search(proto): 1426 tracepointname = r.group(1) 1427 1428 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1429 if r.search(proto): 1430 tracepointname = r.group(1) 1431 1432 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1433 if r.search(proto): 1434 tracepointname = r.group(2) 1435 1436 if tracepointname: 1437 tracepointname = tracepointname.lstrip() 1438 1439 r = KernRe(r'TP_PROTO\((.*?)\)') 1440 if r.search(proto): 1441 tracepointargs = r.group(1) 1442 1443 if not tracepointname or not tracepointargs: 1444 self.emit_msg(ln, 1445 f"Unrecognized tracepoint format:\n{proto}\n") 1446 else: 1447 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1448 self.entry.identifier = f"trace_{self.entry.identifier}" 1449 1450 return proto 1451 1452 def process_proto_function(self, ln, line): 1453 """Ancillary routine to process a function prototype""" 1454 1455 # strip C99-style comments to end of line 1456 line = KernRe(r"//.*$", re.S).sub('', line) 1457 # 1458 # Soak up the line's worth of prototype text, stopping at { or ; if present. 1459 # 1460 if KernRe(r'\s*#\s*define').match(line): 1461 self.entry.prototype = line 1462 elif not line.startswith('#'): # skip other preprocessor stuff 1463 r = KernRe(r'([^\{]*)') 1464 if r.match(line): 1465 self.entry.prototype += r.group(1) + " " 1466 # 1467 # If we now have the whole prototype, clean it up and declare victory. 1468 # 1469 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1470 # strip comments and surrounding spaces 1471 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() 1472 # 1473 # Handle self.entry.prototypes for function pointers like: 1474 # int (*pcs_config)(struct foo) 1475 # by turning it into 1476 # int pcs_config(struct foo) 1477 # 1478 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1479 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1480 # 1481 # Handle special declaration syntaxes 1482 # 1483 if 'SYSCALL_DEFINE' in self.entry.prototype: 1484 self.entry.prototype = self.syscall_munge(ln, 1485 self.entry.prototype) 1486 else: 1487 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1488 if r.search(self.entry.prototype): 1489 self.entry.prototype = self.tracepoint_munge(ln, 1490 self.entry.prototype) 1491 # 1492 # ... and we're done 1493 # 1494 self.dump_function(ln, self.entry.prototype) 1495 self.reset_state(ln) 1496 1497 def process_proto_type(self, ln, line): 1498 """Ancillary routine to process a type""" 1499 1500 # Strip C99-style comments and surrounding whitespace 1501 line = KernRe(r"//.*$", re.S).sub('', line).strip() 1502 if not line: 1503 return # nothing to see here 1504 1505 # To distinguish preprocessor directive from regular declaration later. 1506 if line.startswith('#'): 1507 line += ";" 1508 # 1509 # Split the declaration on any of { } or ;, and accumulate pieces 1510 # until we hit a semicolon while not inside {brackets} 1511 # 1512 r = KernRe(r'(.*?)([{};])') 1513 for chunk in r.split(line): 1514 if chunk: # Ignore empty matches 1515 self.entry.prototype += chunk 1516 # 1517 # This cries out for a match statement ... someday after we can 1518 # drop Python 3.9 ... 1519 # 1520 if chunk == '{': 1521 self.entry.brcount += 1 1522 elif chunk == '}': 1523 self.entry.brcount -= 1 1524 elif chunk == ';' and self.entry.brcount <= 0: 1525 self.dump_declaration(ln, self.entry.prototype) 1526 self.reset_state(ln) 1527 return 1528 # 1529 # We hit the end of the line while still in the declaration; put 1530 # in a space to represent the newline. 1531 # 1532 self.entry.prototype += ' ' 1533 1534 def process_proto(self, ln, line): 1535 """STATE_PROTO: reading a function/whatever prototype.""" 1536 1537 if doc_inline_oneline.search(line): 1538 self.entry.begin_section(ln, doc_inline_oneline.group(1)) 1539 self.entry.add_text(doc_inline_oneline.group(2)) 1540 self.dump_section() 1541 1542 elif doc_inline_start.search(line): 1543 self.state = state.INLINE_NAME 1544 1545 elif self.entry.decl_type == 'function': 1546 self.process_proto_function(ln, line) 1547 1548 else: 1549 self.process_proto_type(ln, line) 1550 1551 def process_docblock(self, ln, line): 1552 """STATE_DOCBLOCK: within a DOC: block.""" 1553 1554 if doc_end.search(line): 1555 self.dump_section() 1556 self.output_declaration("doc", self.entry.identifier) 1557 self.reset_state(ln) 1558 1559 elif doc_content.search(line): 1560 self.entry.add_text(doc_content.group(1)) 1561 1562 def parse_export(self): 1563 """ 1564 Parses EXPORT_SYMBOL* macros from a single Kernel source file. 1565 """ 1566 1567 export_table = set() 1568 1569 try: 1570 with open(self.fname, "r", encoding="utf8", 1571 errors="backslashreplace") as fp: 1572 1573 for line in fp: 1574 self.process_export(export_table, line) 1575 1576 except IOError: 1577 return None 1578 1579 return export_table 1580 1581 # 1582 # The state/action table telling us which function to invoke in 1583 # each state. 1584 # 1585 state_actions = { 1586 state.NORMAL: process_normal, 1587 state.NAME: process_name, 1588 state.BODY: process_body, 1589 state.DECLARATION: process_decl, 1590 state.SPECIAL_SECTION: process_special, 1591 state.INLINE_NAME: process_inline_name, 1592 state.INLINE_TEXT: process_inline_text, 1593 state.PROTO: process_proto, 1594 state.DOCBLOCK: process_docblock, 1595 } 1596 1597 def parse_kdoc(self): 1598 """ 1599 Open and process each line of a C source file. 1600 The parsing is controlled via a state machine, and the line is passed 1601 to a different process function depending on the state. The process 1602 function may update the state as needed. 1603 1604 Besides parsing kernel-doc tags, it also parses export symbols. 1605 """ 1606 1607 prev = "" 1608 prev_ln = None 1609 export_table = set() 1610 1611 try: 1612 with open(self.fname, "r", encoding="utf8", 1613 errors="backslashreplace") as fp: 1614 for ln, line in enumerate(fp): 1615 1616 line = line.expandtabs().strip("\n") 1617 1618 # Group continuation lines on prototypes 1619 if self.state == state.PROTO: 1620 if line.endswith("\\"): 1621 prev += line.rstrip("\\") 1622 if not prev_ln: 1623 prev_ln = ln 1624 continue 1625 1626 if prev: 1627 ln = prev_ln 1628 line = prev + line 1629 prev = "" 1630 prev_ln = None 1631 1632 self.config.log.debug("%d %s: %s", 1633 ln, state.name[self.state], 1634 line) 1635 1636 # This is an optimization over the original script. 1637 # There, when export_file was used for the same file, 1638 # it was read twice. Here, we use the already-existing 1639 # loop to parse exported symbols as well. 1640 # 1641 if (self.state != state.NORMAL) or \ 1642 not self.process_export(export_table, line): 1643 # Hand this line to the appropriate state handler 1644 self.state_actions[self.state](self, ln, line) 1645 1646 except OSError: 1647 self.config.log.error(f"Error: Cannot open file {self.fname}") 1648 1649 return export_table, self.entries 1650