1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8kdoc_parser 9=========== 10 11Read a C language source or header FILE and extract embedded 12documentation comments 13""" 14 15import sys 16import re 17from pprint import pformat 18 19from kdoc.kdoc_re import NestedMatch, KernRe 20from kdoc.kdoc_item import KdocItem 21 22# 23# Regular expressions used to parse kernel-doc markups at KernelDoc class. 24# 25# Let's declare them in lowercase outside any class to make it easier to 26# convert from the Perl script. 27# 28# As those are evaluated at the beginning, no need to cache them 29# 30 31# Allow whitespace at end of comment start. 32doc_start = KernRe(r'^/\*\*\s*$', cache=False) 33 34doc_end = KernRe(r'\*/', cache=False) 35doc_com = KernRe(r'\s*\*\s*', cache=False) 36doc_com_body = KernRe(r'\s*\* ?', cache=False) 37doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 38 39# @params and a strictly limited set of supported section names 40# Specifically: 41# Match @word: 42# @...: 43# @{section-name}: 44# while trying to not match literal block starts like "example::" 45# 46known_section_names = 'description|context|returns?|notes?|examples?' 47known_sections = KernRe(known_section_names, flags = re.I) 48doc_sect = doc_com + \ 49 KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', 50 flags=re.I, cache=False) 51 52doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 53doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 54doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 55doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 56doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@\s*[\w][\w\.]*\s*):\s*(.*)\s*\*/\s*$', cache=False) 57 58export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 59export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 60 61type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 62 63# 64# Tests for the beginning of a kerneldoc block in its various forms. 65# 66doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 67doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef|var)\b\s*(\w*)", cache = False) 68doc_begin_func = KernRe(str(doc_com) + # initial " * ' 69 r"(?:\w+\s*\*\s*)?" + # type (not captured) 70 r'(?:define\s+)?' + # possible "define" (not captured) 71 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 72 r'(?:[-:].*)?$', # description (not captured) 73 cache = False) 74 75# 76# Here begins a long set of transformations to turn structure member prefixes 77# and macro invocations into something we can parse and generate kdoc for. 78# 79struct_args_pattern = r'([^,)]+)' 80 81struct_xforms = [ 82 # Strip attributes 83 (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), 84 (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), 85 (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), 86 (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), 87 (KernRe(r'\s*__packed\s*', re.S), ' '), 88 (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), 89 (KernRe(r'\s*__private', re.S), ' '), 90 (KernRe(r'\s*__rcu', re.S), ' '), 91 (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), 92 (KernRe(r'\s*____cacheline_aligned', re.S), ' '), 93 (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), 94 # 95 # Unwrap struct_group macros based on this definition: 96 # __struct_group(TAG, NAME, ATTRS, MEMBERS...) 97 # which has variants like: struct_group(NAME, MEMBERS...) 98 # Only MEMBERS arguments require documentation. 99 # 100 # Parsing them happens on two steps: 101 # 102 # 1. drop struct group arguments that aren't at MEMBERS, 103 # storing them as STRUCT_GROUP(MEMBERS) 104 # 105 # 2. remove STRUCT_GROUP() ancillary macro. 106 # 107 # The original logic used to remove STRUCT_GROUP() using an 108 # advanced regex: 109 # 110 # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; 111 # 112 # with two patterns that are incompatible with 113 # Python re module, as it has: 114 # 115 # - a recursive pattern: (?1) 116 # - an atomic grouping: (?>...) 117 # 118 # I tried a simpler version: but it didn't work either: 119 # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; 120 # 121 # As it doesn't properly match the end parenthesis on some cases. 122 # 123 # So, a better solution was crafted: there's now a NestedMatch 124 # class that ensures that delimiters after a search are properly 125 # matched. So, the implementation to drop STRUCT_GROUP() will be 126 # handled in separate. 127 # 128 (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), 129 (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), 130 (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), 131 (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), 132 # 133 # Replace macros 134 # 135 # TODO: use NestedMatch for FOO($1, $2, ...) matches 136 # 137 # it is better to also move those to the NestedMatch logic, 138 # to ensure that parentheses will be properly matched. 139 # 140 (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), 141 r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), 142 (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), 143 r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), 144 (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', 145 re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), 146 (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', 147 re.S), r'unsigned long \1[1 << ((\2) - 1)]'), 148 (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + 149 r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), 150 (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + 151 struct_args_pattern + r'\)', re.S), r'\2 *\1'), 152 (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + 153 struct_args_pattern + r'\)', re.S), r'\1 \2[]'), 154 (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), 155 (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), 156] 157# 158# Regexes here are guaranteed to have the end delimiter matching 159# the start delimiter. Yet, right now, only one replace group 160# is allowed. 161# 162struct_nested_prefixes = [ 163 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), 164] 165 166# 167# Transforms for function prototypes 168# 169function_xforms = [ 170 (KernRe(r"^static +"), ""), 171 (KernRe(r"^extern +"), ""), 172 (KernRe(r"^asmlinkage +"), ""), 173 (KernRe(r"^inline +"), ""), 174 (KernRe(r"^__inline__ +"), ""), 175 (KernRe(r"^__inline +"), ""), 176 (KernRe(r"^__always_inline +"), ""), 177 (KernRe(r"^noinline +"), ""), 178 (KernRe(r"^__FORTIFY_INLINE +"), ""), 179 (KernRe(r"__init +"), ""), 180 (KernRe(r"__init_or_module +"), ""), 181 (KernRe(r"__deprecated +"), ""), 182 (KernRe(r"__flatten +"), ""), 183 (KernRe(r"__meminit +"), ""), 184 (KernRe(r"__must_check +"), ""), 185 (KernRe(r"__weak +"), ""), 186 (KernRe(r"__sched +"), ""), 187 (KernRe(r"_noprof"), ""), 188 (KernRe(r"__always_unused *"), ""), 189 (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), 190 (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), 191 (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), 192 (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), 193 (KernRe(r"__attribute_const__ +"), ""), 194 (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), 195] 196 197# 198# Apply a set of transforms to a block of text. 199# 200def apply_transforms(xforms, text): 201 for search, subst in xforms: 202 text = search.sub(subst, text) 203 return text 204 205# 206# A little helper to get rid of excess white space 207# 208multi_space = KernRe(r'\s\s+') 209def trim_whitespace(s): 210 return multi_space.sub(' ', s.strip()) 211 212# 213# Remove struct/enum members that have been marked "private". 214# 215def trim_private_members(text): 216 # 217 # First look for a "public:" block that ends a private region, then 218 # handle the "private until the end" case. 219 # 220 text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) 221 text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) 222 # 223 # We needed the comments to do the above, but now we can take them out. 224 # 225 return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() 226 227class state: 228 """ 229 State machine enums 230 """ 231 232 # Parser states 233 NORMAL = 0 # normal code 234 NAME = 1 # looking for function name 235 DECLARATION = 2 # We have seen a declaration which might not be done 236 BODY = 3 # the body of the comment 237 SPECIAL_SECTION = 4 # doc section ending with a blank line 238 PROTO = 5 # scanning prototype 239 DOCBLOCK = 6 # documentation block 240 INLINE_NAME = 7 # gathering doc outside main block 241 INLINE_TEXT = 8 # reading the body of inline docs 242 243 name = [ 244 "NORMAL", 245 "NAME", 246 "DECLARATION", 247 "BODY", 248 "SPECIAL_SECTION", 249 "PROTO", 250 "DOCBLOCK", 251 "INLINE_NAME", 252 "INLINE_TEXT", 253 ] 254 255 256SECTION_DEFAULT = "Description" # default section 257 258class KernelEntry: 259 260 def __init__(self, config, fname, ln): 261 self.config = config 262 self.fname = fname 263 264 self._contents = [] 265 self.prototype = "" 266 267 self.warnings = [] 268 269 self.parameterlist = [] 270 self.parameterdescs = {} 271 self.parametertypes = {} 272 self.parameterdesc_start_lines = {} 273 274 self.section_start_lines = {} 275 self.sections = {} 276 277 self.anon_struct_union = False 278 279 self.leading_space = None 280 281 self.fname = fname 282 283 # State flags 284 self.brcount = 0 285 self.declaration_start_line = ln + 1 286 287 # 288 # Management of section contents 289 # 290 def add_text(self, text): 291 self._contents.append(text) 292 293 def contents(self): 294 return '\n'.join(self._contents) + '\n' 295 296 # TODO: rename to emit_message after removal of kernel-doc.pl 297 def emit_msg(self, ln, msg, *, warning=True): 298 """Emit a message.""" 299 300 log_msg = f"{self.fname}:{ln} {msg}" 301 302 if not warning: 303 self.config.log.info(log_msg) 304 return 305 306 # Delegate warning output to output logic, as this way it 307 # will report warnings/info only for symbols that are output 308 309 self.warnings.append(log_msg) 310 return 311 312 # 313 # Begin a new section. 314 # 315 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): 316 if dump: 317 self.dump_section(start_new = True) 318 self.section = title 319 self.new_start_line = line_no 320 321 def dump_section(self, start_new=True): 322 """ 323 Dumps section contents to arrays/hashes intended for that purpose. 324 """ 325 # 326 # If we have accumulated no contents in the default ("description") 327 # section, don't bother. 328 # 329 if self.section == SECTION_DEFAULT and not self._contents: 330 return 331 name = self.section 332 contents = self.contents() 333 334 if type_param.match(name): 335 name = type_param.group(1) 336 337 self.parameterdescs[name] = contents 338 self.parameterdesc_start_lines[name] = self.new_start_line 339 340 self.new_start_line = 0 341 342 else: 343 if name in self.sections and self.sections[name] != "": 344 # Only warn on user-specified duplicate section names 345 if name != SECTION_DEFAULT: 346 self.emit_msg(self.new_start_line, 347 f"duplicate section name '{name}'") 348 # Treat as a new paragraph - add a blank line 349 self.sections[name] += '\n' + contents 350 else: 351 self.sections[name] = contents 352 self.section_start_lines[name] = self.new_start_line 353 self.new_start_line = 0 354 355# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 356 357 if start_new: 358 self.section = SECTION_DEFAULT 359 self._contents = [] 360 361python_warning = False 362 363class KernelDoc: 364 """ 365 Read a C language source or header FILE and extract embedded 366 documentation comments. 367 """ 368 369 # Section names 370 371 section_context = "Context" 372 section_return = "Return" 373 374 undescribed = "-- undescribed --" 375 376 def __init__(self, config, fname): 377 """Initialize internal variables""" 378 379 self.fname = fname 380 self.config = config 381 382 # Initial state for the state machines 383 self.state = state.NORMAL 384 385 # Store entry currently being processed 386 self.entry = None 387 388 # Place all potential outputs into an array 389 self.entries = [] 390 391 # 392 # We need Python 3.7 for its "dicts remember the insertion 393 # order" guarantee 394 # 395 global python_warning 396 if (not python_warning and 397 sys.version_info.major == 3 and sys.version_info.minor < 7): 398 399 self.emit_msg(0, 400 'Python 3.7 or later is required for correct results') 401 python_warning = True 402 403 def emit_msg(self, ln, msg, *, warning=True): 404 """Emit a message""" 405 406 if self.entry: 407 self.entry.emit_msg(ln, msg, warning=warning) 408 return 409 410 log_msg = f"{self.fname}:{ln} {msg}" 411 412 if warning: 413 self.config.log.warning(log_msg) 414 else: 415 self.config.log.info(log_msg) 416 417 def dump_section(self, start_new=True): 418 """ 419 Dumps section contents to arrays/hashes intended for that purpose. 420 """ 421 422 if self.entry: 423 self.entry.dump_section(start_new) 424 425 # TODO: rename it to store_declaration after removal of kernel-doc.pl 426 def output_declaration(self, dtype, name, **args): 427 """ 428 Stores the entry into an entry array. 429 430 The actual output and output filters will be handled elsewhere 431 """ 432 433 item = KdocItem(name, self.fname, dtype, 434 self.entry.declaration_start_line, **args) 435 item.warnings = self.entry.warnings 436 437 # Drop empty sections 438 # TODO: improve empty sections logic to emit warnings 439 sections = self.entry.sections 440 for section in ["Description", "Return"]: 441 if section in sections and not sections[section].rstrip(): 442 del sections[section] 443 item.set_sections(sections, self.entry.section_start_lines) 444 item.set_params(self.entry.parameterlist, self.entry.parameterdescs, 445 self.entry.parametertypes, 446 self.entry.parameterdesc_start_lines) 447 self.entries.append(item) 448 449 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 450 451 def emit_unused_warnings(self): 452 """ 453 When the parser fails to produce a valid entry, it places some 454 warnings under `entry.warnings` that will be discarded when resetting 455 the state. 456 457 Ensure that those warnings are not lost. 458 459 .. note:: 460 461 Because we are calling `config.warning()` here, those 462 warnings are not filtered by the `-W` parameters: they will all 463 be produced even when `-Wreturn`, `-Wshort-desc`, and/or 464 `-Wcontents-before-sections` are used. 465 466 Allowing those warnings to be filtered is complex, because it 467 would require storing them in a buffer and then filtering them 468 during the output step of the code, depending on the 469 selected symbols. 470 """ 471 if self.entry and self.entry not in self.entries: 472 for log_msg in self.entry.warnings: 473 self.config.warning(log_msg) 474 475 def reset_state(self, ln): 476 """ 477 Ancillary routine to create a new entry. It initializes all 478 variables used by the state machine. 479 """ 480 481 self.emit_unused_warnings() 482 483 self.entry = KernelEntry(self.config, self.fname, ln) 484 485 # State flags 486 self.state = state.NORMAL 487 488 def push_parameter(self, ln, decl_type, param, dtype, 489 org_arg, declaration_name): 490 """ 491 Store parameters and their descriptions at self.entry. 492 """ 493 494 if self.entry.anon_struct_union and dtype == "" and param == "}": 495 return # Ignore the ending }; from anonymous struct/union 496 497 self.entry.anon_struct_union = False 498 499 param = KernRe(r'[\[\)].*').sub('', param, count=1) 500 501 # 502 # Look at various "anonymous type" cases. 503 # 504 if dtype == '': 505 if param.endswith("..."): 506 if len(param) > 3: # there is a name provided, use that 507 param = param[:-3] 508 if not self.entry.parameterdescs.get(param): 509 self.entry.parameterdescs[param] = "variable arguments" 510 511 elif (not param) or param == "void": 512 param = "void" 513 self.entry.parameterdescs[param] = "no arguments" 514 515 elif param in ["struct", "union"]: 516 # Handle unnamed (anonymous) union or struct 517 dtype = param 518 param = "{unnamed_" + param + "}" 519 self.entry.parameterdescs[param] = "anonymous\n" 520 self.entry.anon_struct_union = True 521 522 # Warn if parameter has no description 523 # (but ignore ones starting with # as these are not parameters 524 # but inline preprocessor statements) 525 if param not in self.entry.parameterdescs and not param.startswith("#"): 526 self.entry.parameterdescs[param] = self.undescribed 527 528 if "." not in param: 529 if decl_type == 'function': 530 dname = f"{decl_type} parameter" 531 else: 532 dname = f"{decl_type} member" 533 534 self.emit_msg(ln, 535 f"{dname} '{param}' not described in '{declaration_name}'") 536 537 # Strip spaces from param so that it is one continuous string on 538 # parameterlist. This fixes a problem where check_sections() 539 # cannot find a parameter like "addr[6 + 2]" because it actually 540 # appears as "addr[6", "+", "2]" on the parameter list. 541 # However, it's better to maintain the param string unchanged for 542 # output, so just weaken the string compare in check_sections() 543 # to ignore "[blah" in a parameter string. 544 545 self.entry.parameterlist.append(param) 546 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 547 self.entry.parametertypes[param] = org_arg 548 549 550 def create_parameter_list(self, ln, decl_type, args, 551 splitter, declaration_name): 552 """ 553 Creates a list of parameters, storing them at self.entry. 554 """ 555 556 # temporarily replace all commas inside function pointer definition 557 arg_expr = KernRe(r'(\([^\),]+),') 558 while arg_expr.search(args): 559 args = arg_expr.sub(r"\1#", args) 560 561 for arg in args.split(splitter): 562 # Ignore argument attributes 563 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 564 565 # Strip leading/trailing spaces 566 arg = arg.strip() 567 arg = KernRe(r'\s+').sub(' ', arg, count=1) 568 569 if arg.startswith('#'): 570 # Treat preprocessor directive as a typeless variable just to fill 571 # corresponding data structures "correctly". Catch it later in 572 # output_* subs. 573 574 # Treat preprocessor directive as a typeless variable 575 self.push_parameter(ln, decl_type, arg, "", 576 "", declaration_name) 577 # 578 # The pointer-to-function case. 579 # 580 elif KernRe(r'\(.+\)\s*\(').search(arg): 581 arg = arg.replace('#', ',') 582 r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" 583 r'([\w\[\].]*)' # Capture the name and possible [array] 584 r'\s*\)') # Make sure the trailing ")" is there 585 if r.match(arg): 586 param = r.group(1) 587 else: 588 self.emit_msg(ln, f"Invalid param: {arg}") 589 param = arg 590 dtype = arg.replace(param, '') 591 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 592 # 593 # The array-of-pointers case. Dig the parameter name out from the middle 594 # of the declaration. 595 # 596 elif KernRe(r'\(.+\)\s*\[').search(arg): 597 r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" 598 r'([\w.]*?)' # The actual pointer name 599 r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] 600 if r.match(arg): 601 param = r.group(1) 602 else: 603 self.emit_msg(ln, f"Invalid param: {arg}") 604 param = arg 605 dtype = arg.replace(param, '') 606 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 607 elif arg: 608 # 609 # Clean up extraneous spaces and split the string at commas; the first 610 # element of the resulting list will also include the type information. 611 # 612 arg = KernRe(r'\s*:\s*').sub(":", arg) 613 arg = KernRe(r'\s*\[').sub('[', arg) 614 args = KernRe(r'\s*,\s*').split(arg) 615 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 616 # 617 # args[0] has a string of "type a". If "a" includes an [array] 618 # declaration, we want to not be fooled by any white space inside 619 # the brackets, so detect and handle that case specially. 620 # 621 r = KernRe(r'^([^[\]]*\s+)(.*)$') 622 if r.match(args[0]): 623 args[0] = r.group(2) 624 dtype = r.group(1) 625 else: 626 # No space in args[0]; this seems wrong but preserves previous behavior 627 dtype = '' 628 629 bitfield_re = KernRe(r'(.*?):(\w+)') 630 for param in args: 631 # 632 # For pointers, shift the star(s) from the variable name to the 633 # type declaration. 634 # 635 r = KernRe(r'^(\*+)\s*(.*)') 636 if r.match(param): 637 self.push_parameter(ln, decl_type, r.group(2), 638 f"{dtype} {r.group(1)}", 639 arg, declaration_name) 640 # 641 # Perform a similar shift for bitfields. 642 # 643 elif bitfield_re.search(param): 644 if dtype != "": # Skip unnamed bit-fields 645 self.push_parameter(ln, decl_type, bitfield_re.group(1), 646 f"{dtype}:{bitfield_re.group(2)}", 647 arg, declaration_name) 648 else: 649 self.push_parameter(ln, decl_type, param, dtype, 650 arg, declaration_name) 651 652 def check_sections(self, ln, decl_name, decl_type): 653 """ 654 Check for errors inside sections, emitting warnings if not found 655 parameters are described. 656 """ 657 for section in self.entry.sections: 658 if section not in self.entry.parameterlist and \ 659 not known_sections.search(section): 660 if decl_type == 'function': 661 dname = f"{decl_type} parameter" 662 else: 663 dname = f"{decl_type} member" 664 self.emit_msg(ln, 665 f"Excess {dname} '{section}' description in '{decl_name}'") 666 667 def check_return_section(self, ln, declaration_name, return_type): 668 """ 669 If the function doesn't return void, warns about the lack of a 670 return description. 671 """ 672 673 if not self.config.wreturn: 674 return 675 676 # Ignore an empty return type (It's a macro) 677 # Ignore functions with a "void" return type (but not "void *") 678 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 679 return 680 681 if not self.entry.sections.get("Return", None): 682 self.emit_msg(ln, 683 f"No description found for return value of '{declaration_name}'") 684 685 # 686 # Split apart a structure prototype; returns (struct|union, name, members) or None 687 # 688 def split_struct_proto(self, proto): 689 type_pattern = r'(struct|union)' 690 qualifiers = [ 691 "__attribute__", 692 "__packed", 693 "__aligned", 694 "____cacheline_aligned_in_smp", 695 "____cacheline_aligned", 696 ] 697 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 698 699 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 700 if r.search(proto): 701 return (r.group(1), r.group(2), r.group(3)) 702 else: 703 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 704 if r.search(proto): 705 return (r.group(1), r.group(3), r.group(2)) 706 return None 707 # 708 # Rewrite the members of a structure or union for easier formatting later on. 709 # Among other things, this function will turn a member like: 710 # 711 # struct { inner_members; } foo; 712 # 713 # into: 714 # 715 # struct foo; inner_members; 716 # 717 def rewrite_struct_members(self, members): 718 # 719 # Process struct/union members from the most deeply nested outward. The 720 # trick is in the ^{ below - it prevents a match of an outer struct/union 721 # until the inner one has been munged (removing the "{" in the process). 722 # 723 struct_members = KernRe(r'(struct|union)' # 0: declaration type 724 r'([^\{\};]+)' # 1: possible name 725 r'(\{)' 726 r'([^\{\}]*)' # 3: Contents of declaration 727 r'(\})' 728 r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration 729 tuples = struct_members.findall(members) 730 while tuples: 731 for t in tuples: 732 newmember = "" 733 oldmember = "".join(t) # Reconstruct the original formatting 734 dtype, name, lbr, content, rbr, rest, semi = t 735 # 736 # Pass through each field name, normalizing the form and formatting. 737 # 738 for s_id in rest.split(','): 739 s_id = s_id.strip() 740 newmember += f"{dtype} {s_id}; " 741 # 742 # Remove bitfield/array/pointer info, getting the bare name. 743 # 744 s_id = KernRe(r'[:\[].*').sub('', s_id) 745 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 746 # 747 # Pass through the members of this inner structure/union. 748 # 749 for arg in content.split(';'): 750 arg = arg.strip() 751 # 752 # Look for (type)(*name)(args) - pointer to function 753 # 754 r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') 755 if r.match(arg): 756 dtype, name, extra = r.group(1), r.group(2), r.group(3) 757 # Pointer-to-function 758 if not s_id: 759 # Anonymous struct/union 760 newmember += f"{dtype}{name}{extra}; " 761 else: 762 newmember += f"{dtype}{s_id}.{name}{extra}; " 763 # 764 # Otherwise a non-function member. 765 # 766 else: 767 # 768 # Remove bitmap and array portions and spaces around commas 769 # 770 arg = KernRe(r':\s*\d+\s*').sub('', arg) 771 arg = KernRe(r'\[.*\]').sub('', arg) 772 arg = KernRe(r'\s*,\s*').sub(',', arg) 773 # 774 # Look for a normal decl - "type name[,name...]" 775 # 776 r = KernRe(r'(.*)\s+([\S+,]+)') 777 if r.search(arg): 778 for name in r.group(2).split(','): 779 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) 780 if not s_id: 781 # Anonymous struct/union 782 newmember += f"{r.group(1)} {name}; " 783 else: 784 newmember += f"{r.group(1)} {s_id}.{name}; " 785 else: 786 newmember += f"{arg}; " 787 # 788 # At the end of the s_id loop, replace the original declaration with 789 # the munged version. 790 # 791 members = members.replace(oldmember, newmember) 792 # 793 # End of the tuple loop - search again and see if there are outer members 794 # that now turn up. 795 # 796 tuples = struct_members.findall(members) 797 return members 798 799 # 800 # Format the struct declaration into a standard form for inclusion in the 801 # resulting docs. 802 # 803 def format_struct_decl(self, declaration): 804 # 805 # Insert newlines, get rid of extra spaces. 806 # 807 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 808 declaration = KernRe(r'\}\s+;').sub('};', declaration) 809 # 810 # Format inline enums with each member on its own line. 811 # 812 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 813 while r.search(declaration): 814 declaration = r.sub(r'\1,\n\2', declaration) 815 # 816 # Now go through and supply the right number of tabs 817 # for each line. 818 # 819 def_args = declaration.split('\n') 820 level = 1 821 declaration = "" 822 for clause in def_args: 823 clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) 824 if clause: 825 if '}' in clause and level > 1: 826 level -= 1 827 if not clause.startswith('#'): 828 declaration += "\t" * level 829 declaration += "\t" + clause + "\n" 830 if "{" in clause and "}" not in clause: 831 level += 1 832 return declaration 833 834 835 def dump_struct(self, ln, proto): 836 """ 837 Store an entry for a struct or union 838 """ 839 # 840 # Do the basic parse to get the pieces of the declaration. 841 # 842 struct_parts = self.split_struct_proto(proto) 843 if not struct_parts: 844 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 845 return 846 decl_type, declaration_name, members = struct_parts 847 848 if self.entry.identifier != declaration_name: 849 self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " 850 f"Prototype was for {decl_type} {declaration_name} instead\n") 851 return 852 # 853 # Go through the list of members applying all of our transformations. 854 # 855 members = trim_private_members(members) 856 members = apply_transforms(struct_xforms, members) 857 858 nested = NestedMatch() 859 for search, sub in struct_nested_prefixes: 860 members = nested.sub(search, sub, members) 861 # 862 # Deal with embedded struct and union members, and drop enums entirely. 863 # 864 declaration = members 865 members = self.rewrite_struct_members(members) 866 members = re.sub(r'(\{[^\{\}]*\})', '', members) 867 # 868 # Output the result and we are done. 869 # 870 self.create_parameter_list(ln, decl_type, members, ';', 871 declaration_name) 872 self.check_sections(ln, declaration_name, decl_type) 873 self.output_declaration(decl_type, declaration_name, 874 definition=self.format_struct_decl(declaration), 875 purpose=self.entry.declaration_purpose) 876 877 def dump_enum(self, ln, proto): 878 """ 879 Stores an enum inside self.entries array. 880 """ 881 # 882 # Strip preprocessor directives. Note that this depends on the 883 # trailing semicolon we added in process_proto_type(). 884 # 885 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 886 # 887 # Parse out the name and members of the enum. Typedef form first. 888 # 889 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 890 if r.search(proto): 891 declaration_name = r.group(2) 892 members = trim_private_members(r.group(1)) 893 # 894 # Failing that, look for a straight enum 895 # 896 else: 897 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 898 if r.match(proto): 899 declaration_name = r.group(1) 900 members = trim_private_members(r.group(2)) 901 # 902 # OK, this isn't going to work. 903 # 904 else: 905 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 906 return 907 # 908 # Make sure we found what we were expecting. 909 # 910 if self.entry.identifier != declaration_name: 911 if self.entry.identifier == "": 912 self.emit_msg(ln, 913 f"{proto}: wrong kernel-doc identifier on prototype") 914 else: 915 self.emit_msg(ln, 916 f"expecting prototype for enum {self.entry.identifier}. " 917 f"Prototype was for enum {declaration_name} instead") 918 return 919 920 if not declaration_name: 921 declaration_name = "(anonymous)" 922 # 923 # Parse out the name of each enum member, and verify that we 924 # have a description for it. 925 # 926 member_set = set() 927 members = KernRe(r'\([^;)]*\)').sub('', members) 928 for arg in members.split(','): 929 if not arg: 930 continue 931 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 932 self.entry.parameterlist.append(arg) 933 if arg not in self.entry.parameterdescs: 934 self.entry.parameterdescs[arg] = self.undescribed 935 self.emit_msg(ln, 936 f"Enum value '{arg}' not described in enum '{declaration_name}'") 937 member_set.add(arg) 938 # 939 # Ensure that every described member actually exists in the enum. 940 # 941 for k in self.entry.parameterdescs: 942 if k not in member_set: 943 self.emit_msg(ln, 944 f"Excess enum value '@{k}' description in '{declaration_name}'") 945 946 self.output_declaration('enum', declaration_name, 947 purpose=self.entry.declaration_purpose) 948 949 def dump_var(self, ln, proto): 950 """ 951 Store variables that are part of kAPI. 952 """ 953 VAR_ATTRIBS = [ 954 "extern", 955 ] 956 OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?" 957 958 sub_prefixes = [ 959 (KernRe(r"__read_mostly"), ""), 960 (KernRe(r"__ro_after_init"), ""), 961 (KernRe(r"(?://.*)$"), ""), 962 (KernRe(r"(?:/\*.*\*/)"), ""), 963 (KernRe(r";$"), ""), 964 (KernRe(r"=.*"), ""), 965 ] 966 967 # 968 # Store the full prototype before modifying it 969 # 970 full_proto = proto 971 declaration_name = None 972 973 # 974 # Handle macro definitions 975 # 976 macro_prefixes = [ 977 KernRe(r"DEFINE_[\w_]+\s*\(([\w_]+)\)"), 978 ] 979 980 for r in macro_prefixes: 981 match = r.search(proto) 982 if match: 983 declaration_name = match.group(1) 984 break 985 986 # 987 # Drop comments and macros to have a pure C prototype 988 # 989 if not declaration_name: 990 for r, sub in sub_prefixes: 991 proto = r.sub(sub, proto) 992 993 proto = proto.rstrip() 994 995 # 996 # Variable name is at the end of the declaration 997 # 998 999 default_val = None 1000 1001 r= KernRe(OPTIONAL_VAR_ATTR + r"\w.*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") 1002 if r.match(proto): 1003 if not declaration_name: 1004 declaration_name = r.group(1) 1005 1006 default_val = r.group(2) 1007 else: 1008 r= KernRe(OPTIONAL_VAR_ATTR + r"(?:\w.*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") 1009 if r.match(proto): 1010 default_val = r.group(1) 1011 1012 if not declaration_name: 1013 self.emit_msg(ln,f"{proto}: can't parse variable") 1014 return 1015 1016 if default_val: 1017 default_val = default_val.lstrip("=").strip() 1018 1019 self.output_declaration("var", declaration_name, 1020 full_proto=full_proto, 1021 default_val=default_val, 1022 purpose=self.entry.declaration_purpose) 1023 1024 def dump_declaration(self, ln, prototype): 1025 """ 1026 Stores a data declaration inside self.entries array. 1027 """ 1028 1029 if self.entry.decl_type == "enum": 1030 self.dump_enum(ln, prototype) 1031 elif self.entry.decl_type == "typedef": 1032 self.dump_typedef(ln, prototype) 1033 elif self.entry.decl_type in ["union", "struct"]: 1034 self.dump_struct(ln, prototype) 1035 elif self.entry.decl_type == "var": 1036 self.dump_var(ln, prototype) 1037 else: 1038 # This would be a bug 1039 self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') 1040 1041 def dump_function(self, ln, prototype): 1042 """ 1043 Stores a function or function macro inside self.entries array. 1044 """ 1045 1046 found = func_macro = False 1047 return_type = '' 1048 decl_type = 'function' 1049 # 1050 # Apply the initial transformations. 1051 # 1052 prototype = apply_transforms(function_xforms, prototype) 1053 # 1054 # If we have a macro, remove the "#define" at the front. 1055 # 1056 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 1057 if new_proto != prototype: 1058 prototype = new_proto 1059 # 1060 # Dispense with the simple "#define A B" case here; the key 1061 # is the space after the name of the symbol being defined. 1062 # NOTE that the seemingly misnamed "func_macro" indicates a 1063 # macro *without* arguments. 1064 # 1065 r = KernRe(r'^(\w+)\s+') 1066 if r.search(prototype): 1067 return_type = '' 1068 declaration_name = r.group(1) 1069 func_macro = True 1070 found = True 1071 1072 # Yes, this truly is vile. We are looking for: 1073 # 1. Return type (may be nothing if we're looking at a macro) 1074 # 2. Function name 1075 # 3. Function parameters. 1076 # 1077 # All the while we have to watch out for function pointer parameters 1078 # (which IIRC is what the two sections are for), C types (these 1079 # regexps don't even start to express all the possibilities), and 1080 # so on. 1081 # 1082 # If you mess with these regexps, it's a good idea to check that 1083 # the following functions' documentation still comes out right: 1084 # - parport_register_device (function pointer parameters) 1085 # - atomic_set (macro) 1086 # - pci_match_device, __copy_to_user (long return type) 1087 1088 name = r'\w+' 1089 type1 = r'(?:[\w\s]+)?' 1090 type2 = r'(?:[\w\s]+\*+)+' 1091 # 1092 # Attempt to match first on (args) with no internal parentheses; this 1093 # lets us easily filter out __acquires() and other post-args stuff. If 1094 # that fails, just grab the rest of the line to the last closing 1095 # parenthesis. 1096 # 1097 proto_args = r'\(([^\(]*|.*)\)' 1098 # 1099 # (Except for the simple macro case) attempt to split up the prototype 1100 # in the various ways we understand. 1101 # 1102 if not found: 1103 patterns = [ 1104 rf'^()({name})\s*{proto_args}', 1105 rf'^({type1})\s+({name})\s*{proto_args}', 1106 rf'^({type2})\s*({name})\s*{proto_args}', 1107 ] 1108 1109 for p in patterns: 1110 r = KernRe(p) 1111 if r.match(prototype): 1112 return_type = r.group(1) 1113 declaration_name = r.group(2) 1114 args = r.group(3) 1115 self.create_parameter_list(ln, decl_type, args, ',', 1116 declaration_name) 1117 found = True 1118 break 1119 # 1120 # Parsing done; make sure that things are as we expect. 1121 # 1122 if not found: 1123 self.emit_msg(ln, 1124 f"cannot understand function prototype: '{prototype}'") 1125 return 1126 if self.entry.identifier != declaration_name: 1127 self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " 1128 f"Prototype was for {declaration_name}() instead") 1129 return 1130 self.check_sections(ln, declaration_name, "function") 1131 self.check_return_section(ln, declaration_name, return_type) 1132 # 1133 # Store the result. 1134 # 1135 self.output_declaration(decl_type, declaration_name, 1136 typedef=('typedef' in return_type), 1137 functiontype=return_type, 1138 purpose=self.entry.declaration_purpose, 1139 func_macro=func_macro) 1140 1141 1142 def dump_typedef(self, ln, proto): 1143 """ 1144 Stores a typedef inside self.entries array. 1145 """ 1146 # 1147 # We start by looking for function typedefs. 1148 # 1149 typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1150 typedef_ident = r'\*?\s*(\w\S+)\s*' 1151 typedef_args = r'\s*\((.*)\);' 1152 1153 typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1154 typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) 1155 1156 # Parse function typedef prototypes 1157 for r in [typedef1, typedef2]: 1158 if not r.match(proto): 1159 continue 1160 1161 return_type = r.group(1).strip() 1162 declaration_name = r.group(2) 1163 args = r.group(3) 1164 1165 if self.entry.identifier != declaration_name: 1166 self.emit_msg(ln, 1167 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1168 return 1169 1170 self.create_parameter_list(ln, 'function', args, ',', declaration_name) 1171 1172 self.output_declaration('function', declaration_name, 1173 typedef=True, 1174 functiontype=return_type, 1175 purpose=self.entry.declaration_purpose) 1176 return 1177 # 1178 # Not a function, try to parse a simple typedef. 1179 # 1180 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1181 if r.match(proto): 1182 declaration_name = r.group(1) 1183 1184 if self.entry.identifier != declaration_name: 1185 self.emit_msg(ln, 1186 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1187 return 1188 1189 self.output_declaration('typedef', declaration_name, 1190 purpose=self.entry.declaration_purpose) 1191 return 1192 1193 self.emit_msg(ln, "error: Cannot parse typedef!") 1194 1195 @staticmethod 1196 def process_export(function_set, line): 1197 """ 1198 process EXPORT_SYMBOL* tags 1199 1200 This method doesn't use any variable from the class, so declare it 1201 with a staticmethod decorator. 1202 """ 1203 1204 # We support documenting some exported symbols with different 1205 # names. A horrible hack. 1206 suffixes = [ '_noprof' ] 1207 1208 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1209 # multiple export lines would violate Kernel coding style. 1210 1211 if export_symbol.search(line): 1212 symbol = export_symbol.group(2) 1213 elif export_symbol_ns.search(line): 1214 symbol = export_symbol_ns.group(2) 1215 else: 1216 return False 1217 # 1218 # Found an export, trim out any special suffixes 1219 # 1220 for suffix in suffixes: 1221 # Be backward compatible with Python < 3.9 1222 if symbol.endswith(suffix): 1223 symbol = symbol[:-len(suffix)] 1224 function_set.add(symbol) 1225 return True 1226 1227 def process_normal(self, ln, line): 1228 """ 1229 STATE_NORMAL: looking for the /** to begin everything. 1230 """ 1231 1232 if not doc_start.match(line): 1233 return 1234 1235 # start a new entry 1236 self.reset_state(ln) 1237 1238 # next line is always the function name 1239 self.state = state.NAME 1240 1241 def process_name(self, ln, line): 1242 """ 1243 STATE_NAME: Looking for the "name - description" line 1244 """ 1245 # 1246 # Check for a DOC: block and handle them specially. 1247 # 1248 if doc_block.search(line): 1249 1250 if not doc_block.group(1): 1251 self.entry.begin_section(ln, "Introduction") 1252 else: 1253 self.entry.begin_section(ln, doc_block.group(1)) 1254 1255 self.entry.identifier = self.entry.section 1256 self.state = state.DOCBLOCK 1257 # 1258 # Otherwise we're looking for a normal kerneldoc declaration line. 1259 # 1260 elif doc_decl.search(line): 1261 self.entry.identifier = doc_decl.group(1) 1262 1263 # Test for data declaration 1264 if doc_begin_data.search(line): 1265 self.entry.decl_type = doc_begin_data.group(1) 1266 self.entry.identifier = doc_begin_data.group(2) 1267 # 1268 # Look for a function description 1269 # 1270 elif doc_begin_func.search(line): 1271 self.entry.identifier = doc_begin_func.group(1) 1272 self.entry.decl_type = "function" 1273 # 1274 # We struck out. 1275 # 1276 else: 1277 self.emit_msg(ln, 1278 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}") 1279 self.state = state.NORMAL 1280 return 1281 # 1282 # OK, set up for a new kerneldoc entry. 1283 # 1284 self.state = state.BODY 1285 self.entry.identifier = self.entry.identifier.strip(" ") 1286 # if there's no @param blocks need to set up default section here 1287 self.entry.begin_section(ln + 1) 1288 # 1289 # Find the description portion, which *should* be there but 1290 # isn't always. 1291 # (We should be able to capture this from the previous parsing - someday) 1292 # 1293 r = KernRe("[-:](.*)") 1294 if r.search(line): 1295 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1296 self.state = state.DECLARATION 1297 else: 1298 self.entry.declaration_purpose = "" 1299 1300 if not self.entry.declaration_purpose and self.config.wshort_desc: 1301 self.emit_msg(ln, 1302 f"missing initial short description on line:\n{line}") 1303 1304 if not self.entry.identifier and self.entry.decl_type != "enum": 1305 self.emit_msg(ln, 1306 f"wrong kernel-doc identifier on line:\n{line}") 1307 self.state = state.NORMAL 1308 1309 if self.config.verbose: 1310 self.emit_msg(ln, 1311 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1312 warning=False) 1313 # 1314 # Failed to find an identifier. Emit a warning 1315 # 1316 else: 1317 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1318 1319 # 1320 # Helper function to determine if a new section is being started. 1321 # 1322 def is_new_section(self, ln, line): 1323 if doc_sect.search(line): 1324 self.state = state.BODY 1325 # 1326 # Pick out the name of our new section, tweaking it if need be. 1327 # 1328 newsection = doc_sect.group(1) 1329 if newsection.lower() == 'description': 1330 newsection = 'Description' 1331 elif newsection.lower() == 'context': 1332 newsection = 'Context' 1333 self.state = state.SPECIAL_SECTION 1334 elif newsection.lower() in ["@return", "@returns", 1335 "return", "returns"]: 1336 newsection = "Return" 1337 self.state = state.SPECIAL_SECTION 1338 elif newsection[0] == '@': 1339 self.state = state.SPECIAL_SECTION 1340 # 1341 # Initialize the contents, and get the new section going. 1342 # 1343 newcontents = doc_sect.group(2) 1344 if not newcontents: 1345 newcontents = "" 1346 self.dump_section() 1347 self.entry.begin_section(ln, newsection) 1348 self.entry.leading_space = None 1349 1350 self.entry.add_text(newcontents.lstrip()) 1351 return True 1352 return False 1353 1354 # 1355 # Helper function to detect (and effect) the end of a kerneldoc comment. 1356 # 1357 def is_comment_end(self, ln, line): 1358 if doc_end.search(line): 1359 self.dump_section() 1360 1361 # Look for doc_com + <text> + doc_end: 1362 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') 1363 if r.match(line): 1364 self.emit_msg(ln, f"suspicious ending line: {line}") 1365 1366 self.entry.prototype = "" 1367 self.entry.new_start_line = ln + 1 1368 1369 self.state = state.PROTO 1370 return True 1371 return False 1372 1373 1374 def process_decl(self, ln, line): 1375 """ 1376 STATE_DECLARATION: We've seen the beginning of a declaration 1377 """ 1378 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1379 return 1380 # 1381 # Look for anything with the " * " line beginning. 1382 # 1383 if doc_content.search(line): 1384 cont = doc_content.group(1) 1385 # 1386 # A blank line means that we have moved out of the declaration 1387 # part of the comment (without any "special section" parameter 1388 # descriptions). 1389 # 1390 if cont == "": 1391 self.state = state.BODY 1392 # 1393 # Otherwise we have more of the declaration section to soak up. 1394 # 1395 else: 1396 self.entry.declaration_purpose = \ 1397 trim_whitespace(self.entry.declaration_purpose + ' ' + cont) 1398 else: 1399 # Unknown line, ignore 1400 self.emit_msg(ln, f"bad line: {line}") 1401 1402 1403 def process_special(self, ln, line): 1404 """ 1405 STATE_SPECIAL_SECTION: a section ending with a blank line 1406 """ 1407 # 1408 # If we have hit a blank line (only the " * " marker), then this 1409 # section is done. 1410 # 1411 if KernRe(r"\s*\*\s*$").match(line): 1412 self.entry.begin_section(ln, dump = True) 1413 self.state = state.BODY 1414 return 1415 # 1416 # Not a blank line, look for the other ways to end the section. 1417 # 1418 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1419 return 1420 # 1421 # OK, we should have a continuation of the text for this section. 1422 # 1423 if doc_content.search(line): 1424 cont = doc_content.group(1) 1425 # 1426 # If the lines of text after the first in a special section have 1427 # leading white space, we need to trim it out or Sphinx will get 1428 # confused. For the second line (the None case), see what we 1429 # find there and remember it. 1430 # 1431 if self.entry.leading_space is None: 1432 r = KernRe(r'^(\s+)') 1433 if r.match(cont): 1434 self.entry.leading_space = len(r.group(1)) 1435 else: 1436 self.entry.leading_space = 0 1437 # 1438 # Otherwise, before trimming any leading chars, be *sure* 1439 # that they are white space. We should maybe warn if this 1440 # isn't the case. 1441 # 1442 for i in range(0, self.entry.leading_space): 1443 if cont[i] != " ": 1444 self.entry.leading_space = i 1445 break 1446 # 1447 # Add the trimmed result to the section and we're done. 1448 # 1449 self.entry.add_text(cont[self.entry.leading_space:]) 1450 else: 1451 # Unknown line, ignore 1452 self.emit_msg(ln, f"bad line: {line}") 1453 1454 def process_body(self, ln, line): 1455 """ 1456 STATE_BODY: the bulk of a kerneldoc comment. 1457 """ 1458 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1459 return 1460 1461 if doc_content.search(line): 1462 cont = doc_content.group(1) 1463 self.entry.add_text(cont) 1464 else: 1465 # Unknown line, ignore 1466 self.emit_msg(ln, f"bad line: {line}") 1467 1468 def process_inline_name(self, ln, line): 1469 """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" 1470 1471 if doc_inline_sect.search(line): 1472 self.entry.begin_section(ln, doc_inline_sect.group(1)) 1473 self.entry.add_text(doc_inline_sect.group(2).lstrip()) 1474 self.state = state.INLINE_TEXT 1475 elif doc_inline_end.search(line): 1476 self.dump_section() 1477 self.state = state.PROTO 1478 elif doc_content.search(line): 1479 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") 1480 self.state = state.PROTO 1481 # else ... ?? 1482 1483 def process_inline_text(self, ln, line): 1484 """STATE_INLINE_TEXT: docbook comments within a prototype.""" 1485 1486 if doc_inline_end.search(line): 1487 self.dump_section() 1488 self.state = state.PROTO 1489 elif doc_content.search(line): 1490 self.entry.add_text(doc_content.group(1)) 1491 # else ... ?? 1492 1493 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1494 """ 1495 Handle syscall definitions 1496 """ 1497 1498 is_void = False 1499 1500 # Strip newlines/CR's 1501 proto = re.sub(r'[\r\n]+', ' ', proto) 1502 1503 # Check if it's a SYSCALL_DEFINE0 1504 if 'SYSCALL_DEFINE0' in proto: 1505 is_void = True 1506 1507 # Replace SYSCALL_DEFINE with correct return type & function name 1508 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1509 1510 r = KernRe(r'long\s+(sys_.*?),') 1511 if r.search(proto): 1512 proto = KernRe(',').sub('(', proto, count=1) 1513 elif is_void: 1514 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1515 1516 # Now delete all of the odd-numbered commas in the proto 1517 # so that argument types & names don't have a comma between them 1518 count = 0 1519 length = len(proto) 1520 1521 if is_void: 1522 length = 0 # skip the loop if is_void 1523 1524 for ix in range(length): 1525 if proto[ix] == ',': 1526 count += 1 1527 if count % 2 == 1: 1528 proto = proto[:ix] + ' ' + proto[ix + 1:] 1529 1530 return proto 1531 1532 def tracepoint_munge(self, ln, proto): 1533 """ 1534 Handle tracepoint definitions 1535 """ 1536 1537 tracepointname = None 1538 tracepointargs = None 1539 1540 # Match tracepoint name based on different patterns 1541 r = KernRe(r'TRACE_EVENT\((.*?),') 1542 if r.search(proto): 1543 tracepointname = r.group(1) 1544 1545 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1546 if r.search(proto): 1547 tracepointname = r.group(1) 1548 1549 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1550 if r.search(proto): 1551 tracepointname = r.group(2) 1552 1553 if tracepointname: 1554 tracepointname = tracepointname.lstrip() 1555 1556 r = KernRe(r'TP_PROTO\((.*?)\)') 1557 if r.search(proto): 1558 tracepointargs = r.group(1) 1559 1560 if not tracepointname or not tracepointargs: 1561 self.emit_msg(ln, 1562 f"Unrecognized tracepoint format:\n{proto}\n") 1563 else: 1564 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1565 self.entry.identifier = f"trace_{self.entry.identifier}" 1566 1567 return proto 1568 1569 def process_proto_function(self, ln, line): 1570 """Ancillary routine to process a function prototype""" 1571 1572 # strip C99-style comments to end of line 1573 line = KernRe(r"//.*$", re.S).sub('', line) 1574 # 1575 # Soak up the line's worth of prototype text, stopping at { or ; if present. 1576 # 1577 if KernRe(r'\s*#\s*define').match(line): 1578 self.entry.prototype = line 1579 elif not line.startswith('#'): # skip other preprocessor stuff 1580 r = KernRe(r'([^\{]*)') 1581 if r.match(line): 1582 self.entry.prototype += r.group(1) + " " 1583 # 1584 # If we now have the whole prototype, clean it up and declare victory. 1585 # 1586 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1587 # strip comments and surrounding spaces 1588 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() 1589 # 1590 # Handle self.entry.prototypes for function pointers like: 1591 # int (*pcs_config)(struct foo) 1592 # by turning it into 1593 # int pcs_config(struct foo) 1594 # 1595 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1596 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1597 # 1598 # Handle special declaration syntaxes 1599 # 1600 if 'SYSCALL_DEFINE' in self.entry.prototype: 1601 self.entry.prototype = self.syscall_munge(ln, 1602 self.entry.prototype) 1603 else: 1604 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1605 if r.search(self.entry.prototype): 1606 self.entry.prototype = self.tracepoint_munge(ln, 1607 self.entry.prototype) 1608 # 1609 # ... and we're done 1610 # 1611 self.dump_function(ln, self.entry.prototype) 1612 self.reset_state(ln) 1613 1614 def process_proto_type(self, ln, line): 1615 """Ancillary routine to process a type""" 1616 1617 # Strip C99-style comments and surrounding whitespace 1618 line = KernRe(r"//.*$", re.S).sub('', line).strip() 1619 if not line: 1620 return # nothing to see here 1621 1622 # To distinguish preprocessor directive from regular declaration later. 1623 if line.startswith('#'): 1624 line += ";" 1625 # 1626 # Split the declaration on any of { } or ;, and accumulate pieces 1627 # until we hit a semicolon while not inside {brackets} 1628 # 1629 r = KernRe(r'(.*?)([{};])') 1630 for chunk in r.split(line): 1631 if chunk: # Ignore empty matches 1632 self.entry.prototype += chunk 1633 # 1634 # This cries out for a match statement ... someday after we can 1635 # drop Python 3.9 ... 1636 # 1637 if chunk == '{': 1638 self.entry.brcount += 1 1639 elif chunk == '}': 1640 self.entry.brcount -= 1 1641 elif chunk == ';' and self.entry.brcount <= 0: 1642 self.dump_declaration(ln, self.entry.prototype) 1643 self.reset_state(ln) 1644 return 1645 # 1646 # We hit the end of the line while still in the declaration; put 1647 # in a space to represent the newline. 1648 # 1649 self.entry.prototype += ' ' 1650 1651 def process_proto(self, ln, line): 1652 """STATE_PROTO: reading a function/whatever prototype.""" 1653 1654 if doc_inline_oneline.search(line): 1655 self.entry.begin_section(ln, doc_inline_oneline.group(1)) 1656 self.entry.add_text(doc_inline_oneline.group(2)) 1657 self.dump_section() 1658 1659 elif doc_inline_start.search(line): 1660 self.state = state.INLINE_NAME 1661 1662 elif self.entry.decl_type == 'function': 1663 self.process_proto_function(ln, line) 1664 1665 else: 1666 self.process_proto_type(ln, line) 1667 1668 def process_docblock(self, ln, line): 1669 """STATE_DOCBLOCK: within a DOC: block.""" 1670 1671 if doc_end.search(line): 1672 self.dump_section() 1673 self.output_declaration("doc", self.entry.identifier) 1674 self.reset_state(ln) 1675 1676 elif doc_content.search(line): 1677 self.entry.add_text(doc_content.group(1)) 1678 1679 def parse_export(self): 1680 """ 1681 Parses EXPORT_SYMBOL* macros from a single Kernel source file. 1682 """ 1683 1684 export_table = set() 1685 1686 try: 1687 with open(self.fname, "r", encoding="utf8", 1688 errors="backslashreplace") as fp: 1689 1690 for line in fp: 1691 self.process_export(export_table, line) 1692 1693 except IOError: 1694 return None 1695 1696 return export_table 1697 1698 # 1699 # The state/action table telling us which function to invoke in 1700 # each state. 1701 # 1702 state_actions = { 1703 state.NORMAL: process_normal, 1704 state.NAME: process_name, 1705 state.BODY: process_body, 1706 state.DECLARATION: process_decl, 1707 state.SPECIAL_SECTION: process_special, 1708 state.INLINE_NAME: process_inline_name, 1709 state.INLINE_TEXT: process_inline_text, 1710 state.PROTO: process_proto, 1711 state.DOCBLOCK: process_docblock, 1712 } 1713 1714 def parse_kdoc(self): 1715 """ 1716 Open and process each line of a C source file. 1717 The parsing is controlled via a state machine, and the line is passed 1718 to a different process function depending on the state. The process 1719 function may update the state as needed. 1720 1721 Besides parsing kernel-doc tags, it also parses export symbols. 1722 """ 1723 1724 prev = "" 1725 prev_ln = None 1726 export_table = set() 1727 1728 try: 1729 with open(self.fname, "r", encoding="utf8", 1730 errors="backslashreplace") as fp: 1731 for ln, line in enumerate(fp): 1732 1733 line = line.expandtabs().strip("\n") 1734 1735 # Group continuation lines on prototypes 1736 if self.state == state.PROTO: 1737 if line.endswith("\\"): 1738 prev += line.rstrip("\\") 1739 if not prev_ln: 1740 prev_ln = ln 1741 continue 1742 1743 if prev: 1744 ln = prev_ln 1745 line = prev + line 1746 prev = "" 1747 prev_ln = None 1748 1749 self.config.log.debug("%d %s: %s", 1750 ln, state.name[self.state], 1751 line) 1752 1753 # This is an optimization over the original script. 1754 # There, when export_file was used for the same file, 1755 # it was read twice. Here, we use the already-existing 1756 # loop to parse exported symbols as well. 1757 # 1758 if (self.state != state.NORMAL) or \ 1759 not self.process_export(export_table, line): 1760 # Hand this line to the appropriate state handler 1761 self.state_actions[self.state](self, ln, line) 1762 1763 self.emit_unused_warnings() 1764 1765 except OSError: 1766 self.config.log.error(f"Error: Cannot open file {self.fname}") 1767 1768 return export_table, self.entries 1769