1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8Classes and functions related to reading a C language source or header FILE 9and extract embedded documentation comments from it. 10""" 11 12import sys 13import re 14from pprint import pformat 15 16from kdoc.kdoc_re import NestedMatch, KernRe 17from kdoc.kdoc_item import KdocItem 18 19# 20# Regular expressions used to parse kernel-doc markups at KernelDoc class. 21# 22# Let's declare them in lowercase outside any class to make it easier to 23# convert from the Perl script. 24# 25# As those are evaluated at the beginning, no need to cache them 26# 27 28# Allow whitespace at end of comment start. 29doc_start = KernRe(r'^/\*\*\s*$', cache=False) 30 31doc_end = KernRe(r'\*/', cache=False) 32doc_com = KernRe(r'\s*\*\s*', cache=False) 33doc_com_body = KernRe(r'\s*\* ?', cache=False) 34doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 35 36# @params and a strictly limited set of supported section names 37# Specifically: 38# Match @word: 39# @...: 40# @{section-name}: 41# while trying to not match literal block starts like "example::" 42# 43known_section_names = 'description|context|returns?|notes?|examples?' 44known_sections = KernRe(known_section_names, flags = re.I) 45doc_sect = doc_com + \ 46 KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', 47 flags=re.I, cache=False) 48 49doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 50doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 51doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 52doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 53doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@\s*[\w][\w\.]*\s*):\s*(.*)\s*\*/\s*$', cache=False) 54 55export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 56export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 57 58type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 59 60# 61# Tests for the beginning of a kerneldoc block in its various forms. 62# 63doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 64doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef|var)\b\s*(\w*)", cache = False) 65doc_begin_func = KernRe(str(doc_com) + # initial " * ' 66 r"(?:\w+\s*\*\s*)?" + # type (not captured) 67 r'(?:define\s+)?' + # possible "define" (not captured) 68 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 69 r'(?:[-:].*)?$', # description (not captured) 70 cache = False) 71 72# 73# Here begins a long set of transformations to turn structure member prefixes 74# and macro invocations into something we can parse and generate kdoc for. 75# 76struct_args_pattern = r'([^,)]+)' 77 78struct_xforms = [ 79 # Strip attributes 80 (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), 81 (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), 82 (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), 83 (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), 84 (KernRe(r'\s*__packed\s*', re.S), ' '), 85 (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), 86 (KernRe(r'\s*__private', re.S), ' '), 87 (KernRe(r'\s*__rcu', re.S), ' '), 88 (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), 89 (KernRe(r'\s*____cacheline_aligned', re.S), ' '), 90 (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), 91 # 92 # Unwrap struct_group macros based on this definition: 93 # __struct_group(TAG, NAME, ATTRS, MEMBERS...) 94 # which has variants like: struct_group(NAME, MEMBERS...) 95 # Only MEMBERS arguments require documentation. 96 # 97 # Parsing them happens on two steps: 98 # 99 # 1. drop struct group arguments that aren't at MEMBERS, 100 # storing them as STRUCT_GROUP(MEMBERS) 101 # 102 # 2. remove STRUCT_GROUP() ancillary macro. 103 # 104 # The original logic used to remove STRUCT_GROUP() using an 105 # advanced regex: 106 # 107 # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; 108 # 109 # with two patterns that are incompatible with 110 # Python re module, as it has: 111 # 112 # - a recursive pattern: (?1) 113 # - an atomic grouping: (?>...) 114 # 115 # I tried a simpler version: but it didn't work either: 116 # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; 117 # 118 # As it doesn't properly match the end parenthesis on some cases. 119 # 120 # So, a better solution was crafted: there's now a NestedMatch 121 # class that ensures that delimiters after a search are properly 122 # matched. So, the implementation to drop STRUCT_GROUP() will be 123 # handled in separate. 124 # 125 (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), 126 (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), 127 (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), 128 (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), 129 # 130 # Replace macros 131 # 132 # TODO: use NestedMatch for FOO($1, $2, ...) matches 133 # 134 # it is better to also move those to the NestedMatch logic, 135 # to ensure that parentheses will be properly matched. 136 # 137 (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), 138 r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), 139 (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), 140 r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), 141 (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', 142 re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), 143 (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', 144 re.S), r'unsigned long \1[1 << ((\2) - 1)]'), 145 (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + 146 r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), 147 (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + 148 struct_args_pattern + r'\)', re.S), r'\2 *\1'), 149 (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + 150 struct_args_pattern + r'\)', re.S), r'\1 \2[]'), 151 (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), 152 (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), 153] 154# 155# Regexes here are guaranteed to have the end delimiter matching 156# the start delimiter. Yet, right now, only one replace group 157# is allowed. 158# 159struct_nested_prefixes = [ 160 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), 161] 162 163# 164# Transforms for function prototypes 165# 166function_xforms = [ 167 (KernRe(r"^static +"), ""), 168 (KernRe(r"^extern +"), ""), 169 (KernRe(r"^asmlinkage +"), ""), 170 (KernRe(r"^inline +"), ""), 171 (KernRe(r"^__inline__ +"), ""), 172 (KernRe(r"^__inline +"), ""), 173 (KernRe(r"^__always_inline +"), ""), 174 (KernRe(r"^noinline +"), ""), 175 (KernRe(r"^__FORTIFY_INLINE +"), ""), 176 (KernRe(r"__init +"), ""), 177 (KernRe(r"__init_or_module +"), ""), 178 (KernRe(r"__deprecated +"), ""), 179 (KernRe(r"__flatten +"), ""), 180 (KernRe(r"__meminit +"), ""), 181 (KernRe(r"__must_check +"), ""), 182 (KernRe(r"__weak +"), ""), 183 (KernRe(r"__sched +"), ""), 184 (KernRe(r"_noprof"), ""), 185 (KernRe(r"__always_unused *"), ""), 186 (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), 187 (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), 188 (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), 189 (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), 190 (KernRe(r"__attribute_const__ +"), ""), 191 (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), 192] 193 194# 195# Ancillary functions 196# 197 198def apply_transforms(xforms, text): 199 """ 200 Apply a set of transforms to a block of text. 201 """ 202 for search, subst in xforms: 203 text = search.sub(subst, text) 204 return text 205 206multi_space = KernRe(r'\s\s+') 207def trim_whitespace(s): 208 """ 209 A little helper to get rid of excess white space. 210 """ 211 return multi_space.sub(' ', s.strip()) 212 213def trim_private_members(text): 214 """ 215 Remove ``struct``/``enum`` members that have been marked "private". 216 """ 217 # First look for a "public:" block that ends a private region, then 218 # handle the "private until the end" case. 219 # 220 text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) 221 text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) 222 # 223 # We needed the comments to do the above, but now we can take them out. 224 # 225 return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() 226 227class state: 228 """ 229 States used by the parser's state machine. 230 """ 231 232 # Parser states 233 NORMAL = 0 #: Normal code. 234 NAME = 1 #: Looking for function name. 235 DECLARATION = 2 #: We have seen a declaration which might not be done. 236 BODY = 3 #: The body of the comment. 237 SPECIAL_SECTION = 4 #: Doc section ending with a blank line. 238 PROTO = 5 #: Scanning prototype. 239 DOCBLOCK = 6 #: Documentation block. 240 INLINE_NAME = 7 #: Gathering doc outside main block. 241 INLINE_TEXT = 8 #: Reading the body of inline docs. 242 243 #: Names for each parser state. 244 name = [ 245 "NORMAL", 246 "NAME", 247 "DECLARATION", 248 "BODY", 249 "SPECIAL_SECTION", 250 "PROTO", 251 "DOCBLOCK", 252 "INLINE_NAME", 253 "INLINE_TEXT", 254 ] 255 256 257SECTION_DEFAULT = "Description" #: Default section. 258 259class KernelEntry: 260 """ 261 Encapsulates a Kernel documentation entry. 262 """ 263 264 def __init__(self, config, fname, ln): 265 self.config = config 266 self.fname = fname 267 268 self._contents = [] 269 self.prototype = "" 270 271 self.warnings = [] 272 273 self.parameterlist = [] 274 self.parameterdescs = {} 275 self.parametertypes = {} 276 self.parameterdesc_start_lines = {} 277 278 self.section_start_lines = {} 279 self.sections = {} 280 281 self.anon_struct_union = False 282 283 self.leading_space = None 284 285 self.fname = fname 286 287 # State flags 288 self.brcount = 0 289 self.declaration_start_line = ln + 1 290 291 # 292 # Management of section contents 293 # 294 def add_text(self, text): 295 """Add a new text to the entry contents list.""" 296 self._contents.append(text) 297 298 def contents(self): 299 """Returns a string with all content texts that were added.""" 300 return '\n'.join(self._contents) + '\n' 301 302 # TODO: rename to emit_message after removal of kernel-doc.pl 303 def emit_msg(self, ln, msg, *, warning=True): 304 """Emit a message.""" 305 306 log_msg = f"{self.fname}:{ln} {msg}" 307 308 if not warning: 309 self.config.log.info(log_msg) 310 return 311 312 # Delegate warning output to output logic, as this way it 313 # will report warnings/info only for symbols that are output 314 315 self.warnings.append(log_msg) 316 return 317 318 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): 319 """ 320 Begin a new section. 321 """ 322 if dump: 323 self.dump_section(start_new = True) 324 self.section = title 325 self.new_start_line = line_no 326 327 def dump_section(self, start_new=True): 328 """ 329 Dumps section contents to arrays/hashes intended for that purpose. 330 """ 331 # 332 # If we have accumulated no contents in the default ("description") 333 # section, don't bother. 334 # 335 if self.section == SECTION_DEFAULT and not self._contents: 336 return 337 name = self.section 338 contents = self.contents() 339 340 if type_param.match(name): 341 name = type_param.group(1) 342 343 self.parameterdescs[name] = contents 344 self.parameterdesc_start_lines[name] = self.new_start_line 345 346 self.new_start_line = 0 347 348 else: 349 if name in self.sections and self.sections[name] != "": 350 # Only warn on user-specified duplicate section names 351 if name != SECTION_DEFAULT: 352 self.emit_msg(self.new_start_line, 353 f"duplicate section name '{name}'") 354 # Treat as a new paragraph - add a blank line 355 self.sections[name] += '\n' + contents 356 else: 357 self.sections[name] = contents 358 self.section_start_lines[name] = self.new_start_line 359 self.new_start_line = 0 360 361# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 362 363 if start_new: 364 self.section = SECTION_DEFAULT 365 self._contents = [] 366 367python_warning = False 368 369class KernelDoc: 370 """ 371 Read a C language source or header FILE and extract embedded 372 documentation comments. 373 """ 374 375 #: Name of context section. 376 section_context = "Context" 377 378 #: Name of return section. 379 section_return = "Return" 380 381 #: String to write when a parameter is not described. 382 undescribed = "-- undescribed --" 383 384 def __init__(self, config, fname): 385 """Initialize internal variables""" 386 387 self.fname = fname 388 self.config = config 389 390 # Initial state for the state machines 391 self.state = state.NORMAL 392 393 # Store entry currently being processed 394 self.entry = None 395 396 # Place all potential outputs into an array 397 self.entries = [] 398 399 # 400 # We need Python 3.7 for its "dicts remember the insertion 401 # order" guarantee 402 # 403 global python_warning 404 if (not python_warning and 405 sys.version_info.major == 3 and sys.version_info.minor < 7): 406 407 self.emit_msg(0, 408 'Python 3.7 or later is required for correct results') 409 python_warning = True 410 411 def emit_msg(self, ln, msg, *, warning=True): 412 """Emit a message""" 413 414 if self.entry: 415 self.entry.emit_msg(ln, msg, warning=warning) 416 return 417 418 log_msg = f"{self.fname}:{ln} {msg}" 419 420 if warning: 421 self.config.log.warning(log_msg) 422 else: 423 self.config.log.info(log_msg) 424 425 def dump_section(self, start_new=True): 426 """ 427 Dump section contents to arrays/hashes intended for that purpose. 428 """ 429 430 if self.entry: 431 self.entry.dump_section(start_new) 432 433 # TODO: rename it to store_declaration after removal of kernel-doc.pl 434 def output_declaration(self, dtype, name, **args): 435 """ 436 Store the entry into an entry array. 437 438 The actual output and output filters will be handled elsewhere. 439 """ 440 441 item = KdocItem(name, self.fname, dtype, 442 self.entry.declaration_start_line, **args) 443 item.warnings = self.entry.warnings 444 445 # Drop empty sections 446 # TODO: improve empty sections logic to emit warnings 447 sections = self.entry.sections 448 for section in ["Description", "Return"]: 449 if section in sections and not sections[section].rstrip(): 450 del sections[section] 451 item.set_sections(sections, self.entry.section_start_lines) 452 item.set_params(self.entry.parameterlist, self.entry.parameterdescs, 453 self.entry.parametertypes, 454 self.entry.parameterdesc_start_lines) 455 self.entries.append(item) 456 457 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 458 459 def emit_unused_warnings(self): 460 """ 461 When the parser fails to produce a valid entry, it places some 462 warnings under `entry.warnings` that will be discarded when resetting 463 the state. 464 465 Ensure that those warnings are not lost. 466 467 .. note:: 468 469 Because we are calling `config.warning()` here, those 470 warnings are not filtered by the `-W` parameters: they will all 471 be produced even when `-Wreturn`, `-Wshort-desc`, and/or 472 `-Wcontents-before-sections` are used. 473 474 Allowing those warnings to be filtered is complex, because it 475 would require storing them in a buffer and then filtering them 476 during the output step of the code, depending on the 477 selected symbols. 478 """ 479 if self.entry and self.entry not in self.entries: 480 for log_msg in self.entry.warnings: 481 self.config.warning(log_msg) 482 483 def reset_state(self, ln): 484 """ 485 Ancillary routine to create a new entry. It initializes all 486 variables used by the state machine. 487 """ 488 489 self.emit_unused_warnings() 490 491 self.entry = KernelEntry(self.config, self.fname, ln) 492 493 # State flags 494 self.state = state.NORMAL 495 496 def push_parameter(self, ln, decl_type, param, dtype, 497 org_arg, declaration_name): 498 """ 499 Store parameters and their descriptions at self.entry. 500 """ 501 502 if self.entry.anon_struct_union and dtype == "" and param == "}": 503 return # Ignore the ending }; from anonymous struct/union 504 505 self.entry.anon_struct_union = False 506 507 param = KernRe(r'[\[\)].*').sub('', param, count=1) 508 509 # 510 # Look at various "anonymous type" cases. 511 # 512 if dtype == '': 513 if param.endswith("..."): 514 if len(param) > 3: # there is a name provided, use that 515 param = param[:-3] 516 if not self.entry.parameterdescs.get(param): 517 self.entry.parameterdescs[param] = "variable arguments" 518 519 elif (not param) or param == "void": 520 param = "void" 521 self.entry.parameterdescs[param] = "no arguments" 522 523 elif param in ["struct", "union"]: 524 # Handle unnamed (anonymous) union or struct 525 dtype = param 526 param = "{unnamed_" + param + "}" 527 self.entry.parameterdescs[param] = "anonymous\n" 528 self.entry.anon_struct_union = True 529 530 # Warn if parameter has no description 531 # (but ignore ones starting with # as these are not parameters 532 # but inline preprocessor statements) 533 if param not in self.entry.parameterdescs and not param.startswith("#"): 534 self.entry.parameterdescs[param] = self.undescribed 535 536 if "." not in param: 537 if decl_type == 'function': 538 dname = f"{decl_type} parameter" 539 else: 540 dname = f"{decl_type} member" 541 542 self.emit_msg(ln, 543 f"{dname} '{param}' not described in '{declaration_name}'") 544 545 # Strip spaces from param so that it is one continuous string on 546 # parameterlist. This fixes a problem where check_sections() 547 # cannot find a parameter like "addr[6 + 2]" because it actually 548 # appears as "addr[6", "+", "2]" on the parameter list. 549 # However, it's better to maintain the param string unchanged for 550 # output, so just weaken the string compare in check_sections() 551 # to ignore "[blah" in a parameter string. 552 553 self.entry.parameterlist.append(param) 554 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 555 self.entry.parametertypes[param] = org_arg 556 557 558 def create_parameter_list(self, ln, decl_type, args, 559 splitter, declaration_name): 560 """ 561 Creates a list of parameters, storing them at self.entry. 562 """ 563 564 # temporarily replace all commas inside function pointer definition 565 arg_expr = KernRe(r'(\([^\),]+),') 566 while arg_expr.search(args): 567 args = arg_expr.sub(r"\1#", args) 568 569 for arg in args.split(splitter): 570 # Ignore argument attributes 571 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 572 573 # Strip leading/trailing spaces 574 arg = arg.strip() 575 arg = KernRe(r'\s+').sub(' ', arg, count=1) 576 577 if arg.startswith('#'): 578 # Treat preprocessor directive as a typeless variable just to fill 579 # corresponding data structures "correctly". Catch it later in 580 # output_* subs. 581 582 # Treat preprocessor directive as a typeless variable 583 self.push_parameter(ln, decl_type, arg, "", 584 "", declaration_name) 585 # 586 # The pointer-to-function case. 587 # 588 elif KernRe(r'\(.+\)\s*\(').search(arg): 589 arg = arg.replace('#', ',') 590 r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" 591 r'([\w\[\].]*)' # Capture the name and possible [array] 592 r'\s*\)') # Make sure the trailing ")" is there 593 if r.match(arg): 594 param = r.group(1) 595 else: 596 self.emit_msg(ln, f"Invalid param: {arg}") 597 param = arg 598 dtype = arg.replace(param, '') 599 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 600 # 601 # The array-of-pointers case. Dig the parameter name out from the middle 602 # of the declaration. 603 # 604 elif KernRe(r'\(.+\)\s*\[').search(arg): 605 r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" 606 r'([\w.]*?)' # The actual pointer name 607 r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] 608 if r.match(arg): 609 param = r.group(1) 610 else: 611 self.emit_msg(ln, f"Invalid param: {arg}") 612 param = arg 613 dtype = arg.replace(param, '') 614 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 615 elif arg: 616 # 617 # Clean up extraneous spaces and split the string at commas; the first 618 # element of the resulting list will also include the type information. 619 # 620 arg = KernRe(r'\s*:\s*').sub(":", arg) 621 arg = KernRe(r'\s*\[').sub('[', arg) 622 args = KernRe(r'\s*,\s*').split(arg) 623 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 624 # 625 # args[0] has a string of "type a". If "a" includes an [array] 626 # declaration, we want to not be fooled by any white space inside 627 # the brackets, so detect and handle that case specially. 628 # 629 r = KernRe(r'^([^[\]]*\s+)(.*)$') 630 if r.match(args[0]): 631 args[0] = r.group(2) 632 dtype = r.group(1) 633 else: 634 # No space in args[0]; this seems wrong but preserves previous behavior 635 dtype = '' 636 637 bitfield_re = KernRe(r'(.*?):(\w+)') 638 for param in args: 639 # 640 # For pointers, shift the star(s) from the variable name to the 641 # type declaration. 642 # 643 r = KernRe(r'^(\*+)\s*(.*)') 644 if r.match(param): 645 self.push_parameter(ln, decl_type, r.group(2), 646 f"{dtype} {r.group(1)}", 647 arg, declaration_name) 648 # 649 # Perform a similar shift for bitfields. 650 # 651 elif bitfield_re.search(param): 652 if dtype != "": # Skip unnamed bit-fields 653 self.push_parameter(ln, decl_type, bitfield_re.group(1), 654 f"{dtype}:{bitfield_re.group(2)}", 655 arg, declaration_name) 656 else: 657 self.push_parameter(ln, decl_type, param, dtype, 658 arg, declaration_name) 659 660 def check_sections(self, ln, decl_name, decl_type): 661 """ 662 Check for errors inside sections, emitting warnings if not found 663 parameters are described. 664 """ 665 for section in self.entry.sections: 666 if section not in self.entry.parameterlist and \ 667 not known_sections.search(section): 668 if decl_type == 'function': 669 dname = f"{decl_type} parameter" 670 else: 671 dname = f"{decl_type} member" 672 self.emit_msg(ln, 673 f"Excess {dname} '{section}' description in '{decl_name}'") 674 675 def check_return_section(self, ln, declaration_name, return_type): 676 """ 677 If the function doesn't return void, warns about the lack of a 678 return description. 679 """ 680 681 if not self.config.wreturn: 682 return 683 684 # Ignore an empty return type (It's a macro) 685 # Ignore functions with a "void" return type (but not "void *") 686 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 687 return 688 689 if not self.entry.sections.get("Return", None): 690 self.emit_msg(ln, 691 f"No description found for return value of '{declaration_name}'") 692 693 def split_struct_proto(self, proto): 694 """ 695 Split apart a structure prototype; returns (struct|union, name, 696 members) or ``None``. 697 """ 698 699 type_pattern = r'(struct|union)' 700 qualifiers = [ 701 "__attribute__", 702 "__packed", 703 "__aligned", 704 "____cacheline_aligned_in_smp", 705 "____cacheline_aligned", 706 ] 707 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 708 709 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 710 if r.search(proto): 711 return (r.group(1), r.group(2), r.group(3)) 712 else: 713 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 714 if r.search(proto): 715 return (r.group(1), r.group(3), r.group(2)) 716 return None 717 718 def rewrite_struct_members(self, members): 719 """ 720 Process ``struct``/``union`` members from the most deeply nested 721 outward. 722 723 Rewrite the members of a ``struct`` or ``union`` for easier formatting 724 later on. Among other things, this function will turn a member like:: 725 726 struct { inner_members; } foo; 727 728 into:: 729 730 struct foo; inner_members; 731 """ 732 733 # 734 # The trick is in the ``^{`` below - it prevents a match of an outer 735 # ``struct``/``union`` until the inner one has been munged 736 # (removing the ``{`` in the process). 737 # 738 struct_members = KernRe(r'(struct|union)' # 0: declaration type 739 r'([^\{\};]+)' # 1: possible name 740 r'(\{)' 741 r'([^\{\}]*)' # 3: Contents of declaration 742 r'(\})' 743 r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration 744 tuples = struct_members.findall(members) 745 while tuples: 746 for t in tuples: 747 newmember = "" 748 oldmember = "".join(t) # Reconstruct the original formatting 749 dtype, name, lbr, content, rbr, rest, semi = t 750 # 751 # Pass through each field name, normalizing the form and formatting. 752 # 753 for s_id in rest.split(','): 754 s_id = s_id.strip() 755 newmember += f"{dtype} {s_id}; " 756 # 757 # Remove bitfield/array/pointer info, getting the bare name. 758 # 759 s_id = KernRe(r'[:\[].*').sub('', s_id) 760 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 761 # 762 # Pass through the members of this inner structure/union. 763 # 764 for arg in content.split(';'): 765 arg = arg.strip() 766 # 767 # Look for (type)(*name)(args) - pointer to function 768 # 769 r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') 770 if r.match(arg): 771 dtype, name, extra = r.group(1), r.group(2), r.group(3) 772 # Pointer-to-function 773 if not s_id: 774 # Anonymous struct/union 775 newmember += f"{dtype}{name}{extra}; " 776 else: 777 newmember += f"{dtype}{s_id}.{name}{extra}; " 778 # 779 # Otherwise a non-function member. 780 # 781 else: 782 # 783 # Remove bitmap and array portions and spaces around commas 784 # 785 arg = KernRe(r':\s*\d+\s*').sub('', arg) 786 arg = KernRe(r'\[.*\]').sub('', arg) 787 arg = KernRe(r'\s*,\s*').sub(',', arg) 788 # 789 # Look for a normal decl - "type name[,name...]" 790 # 791 r = KernRe(r'(.*)\s+([\S+,]+)') 792 if r.search(arg): 793 for name in r.group(2).split(','): 794 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) 795 if not s_id: 796 # Anonymous struct/union 797 newmember += f"{r.group(1)} {name}; " 798 else: 799 newmember += f"{r.group(1)} {s_id}.{name}; " 800 else: 801 newmember += f"{arg}; " 802 # 803 # At the end of the s_id loop, replace the original declaration with 804 # the munged version. 805 # 806 members = members.replace(oldmember, newmember) 807 # 808 # End of the tuple loop - search again and see if there are outer members 809 # that now turn up. 810 # 811 tuples = struct_members.findall(members) 812 return members 813 814 def format_struct_decl(self, declaration): 815 """ 816 Format the ``struct`` declaration into a standard form for inclusion 817 in the resulting docs. 818 """ 819 820 # 821 # Insert newlines, get rid of extra spaces. 822 # 823 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 824 declaration = KernRe(r'\}\s+;').sub('};', declaration) 825 # 826 # Format inline enums with each member on its own line. 827 # 828 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 829 while r.search(declaration): 830 declaration = r.sub(r'\1,\n\2', declaration) 831 # 832 # Now go through and supply the right number of tabs 833 # for each line. 834 # 835 def_args = declaration.split('\n') 836 level = 1 837 declaration = "" 838 for clause in def_args: 839 clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) 840 if clause: 841 if '}' in clause and level > 1: 842 level -= 1 843 if not clause.startswith('#'): 844 declaration += "\t" * level 845 declaration += "\t" + clause + "\n" 846 if "{" in clause and "}" not in clause: 847 level += 1 848 return declaration 849 850 851 def dump_struct(self, ln, proto): 852 """ 853 Store an entry for a ``struct`` or ``union`` 854 """ 855 # 856 # Do the basic parse to get the pieces of the declaration. 857 # 858 struct_parts = self.split_struct_proto(proto) 859 if not struct_parts: 860 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 861 return 862 decl_type, declaration_name, members = struct_parts 863 864 if self.entry.identifier != declaration_name: 865 self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " 866 f"Prototype was for {decl_type} {declaration_name} instead\n") 867 return 868 # 869 # Go through the list of members applying all of our transformations. 870 # 871 members = trim_private_members(members) 872 members = apply_transforms(struct_xforms, members) 873 874 nested = NestedMatch() 875 for search, sub in struct_nested_prefixes: 876 members = nested.sub(search, sub, members) 877 # 878 # Deal with embedded struct and union members, and drop enums entirely. 879 # 880 declaration = members 881 members = self.rewrite_struct_members(members) 882 members = re.sub(r'(\{[^\{\}]*\})', '', members) 883 # 884 # Output the result and we are done. 885 # 886 self.create_parameter_list(ln, decl_type, members, ';', 887 declaration_name) 888 self.check_sections(ln, declaration_name, decl_type) 889 self.output_declaration(decl_type, declaration_name, 890 definition=self.format_struct_decl(declaration), 891 purpose=self.entry.declaration_purpose) 892 893 def dump_enum(self, ln, proto): 894 """ 895 Store an ``enum`` inside self.entries array. 896 """ 897 # 898 # Strip preprocessor directives. Note that this depends on the 899 # trailing semicolon we added in process_proto_type(). 900 # 901 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 902 # 903 # Parse out the name and members of the enum. Typedef form first. 904 # 905 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 906 if r.search(proto): 907 declaration_name = r.group(2) 908 members = trim_private_members(r.group(1)) 909 # 910 # Failing that, look for a straight enum 911 # 912 else: 913 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 914 if r.match(proto): 915 declaration_name = r.group(1) 916 members = trim_private_members(r.group(2)) 917 # 918 # OK, this isn't going to work. 919 # 920 else: 921 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 922 return 923 # 924 # Make sure we found what we were expecting. 925 # 926 if self.entry.identifier != declaration_name: 927 if self.entry.identifier == "": 928 self.emit_msg(ln, 929 f"{proto}: wrong kernel-doc identifier on prototype") 930 else: 931 self.emit_msg(ln, 932 f"expecting prototype for enum {self.entry.identifier}. " 933 f"Prototype was for enum {declaration_name} instead") 934 return 935 936 if not declaration_name: 937 declaration_name = "(anonymous)" 938 # 939 # Parse out the name of each enum member, and verify that we 940 # have a description for it. 941 # 942 member_set = set() 943 members = KernRe(r'\([^;)]*\)').sub('', members) 944 for arg in members.split(','): 945 if not arg: 946 continue 947 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 948 self.entry.parameterlist.append(arg) 949 if arg not in self.entry.parameterdescs: 950 self.entry.parameterdescs[arg] = self.undescribed 951 self.emit_msg(ln, 952 f"Enum value '{arg}' not described in enum '{declaration_name}'") 953 member_set.add(arg) 954 # 955 # Ensure that every described member actually exists in the enum. 956 # 957 for k in self.entry.parameterdescs: 958 if k not in member_set: 959 self.emit_msg(ln, 960 f"Excess enum value '@{k}' description in '{declaration_name}'") 961 962 self.output_declaration('enum', declaration_name, 963 purpose=self.entry.declaration_purpose) 964 965 def dump_var(self, ln, proto): 966 """ 967 Store variables that are part of kAPI. 968 """ 969 VAR_ATTRIBS = [ 970 "extern", 971 ] 972 OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?" 973 974 sub_prefixes = [ 975 (KernRe(r"__read_mostly"), ""), 976 (KernRe(r"__ro_after_init"), ""), 977 (KernRe(r"(?://.*)$"), ""), 978 (KernRe(r"(?:/\*.*\*/)"), ""), 979 (KernRe(r";$"), ""), 980 (KernRe(r"=.*"), ""), 981 ] 982 983 # 984 # Store the full prototype before modifying it 985 # 986 full_proto = proto 987 declaration_name = None 988 989 # 990 # Handle macro definitions 991 # 992 macro_prefixes = [ 993 KernRe(r"DEFINE_[\w_]+\s*\(([\w_]+)\)"), 994 ] 995 996 for r in macro_prefixes: 997 match = r.search(proto) 998 if match: 999 declaration_name = match.group(1) 1000 break 1001 1002 # 1003 # Drop comments and macros to have a pure C prototype 1004 # 1005 if not declaration_name: 1006 for r, sub in sub_prefixes: 1007 proto = r.sub(sub, proto) 1008 1009 proto = proto.rstrip() 1010 1011 # 1012 # Variable name is at the end of the declaration 1013 # 1014 1015 default_val = None 1016 1017 r= KernRe(OPTIONAL_VAR_ATTR + r"\w.*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") 1018 if r.match(proto): 1019 if not declaration_name: 1020 declaration_name = r.group(1) 1021 1022 default_val = r.group(2) 1023 else: 1024 r= KernRe(OPTIONAL_VAR_ATTR + r"(?:\w.*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") 1025 if r.match(proto): 1026 default_val = r.group(1) 1027 1028 if not declaration_name: 1029 self.emit_msg(ln,f"{proto}: can't parse variable") 1030 return 1031 1032 if default_val: 1033 default_val = default_val.lstrip("=").strip() 1034 1035 self.output_declaration("var", declaration_name, 1036 full_proto=full_proto, 1037 default_val=default_val, 1038 purpose=self.entry.declaration_purpose) 1039 1040 def dump_declaration(self, ln, prototype): 1041 """ 1042 Store a data declaration inside self.entries array. 1043 """ 1044 1045 if self.entry.decl_type == "enum": 1046 self.dump_enum(ln, prototype) 1047 elif self.entry.decl_type == "typedef": 1048 self.dump_typedef(ln, prototype) 1049 elif self.entry.decl_type in ["union", "struct"]: 1050 self.dump_struct(ln, prototype) 1051 elif self.entry.decl_type == "var": 1052 self.dump_var(ln, prototype) 1053 else: 1054 # This would be a bug 1055 self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') 1056 1057 def dump_function(self, ln, prototype): 1058 """ 1059 Store a function or function macro inside self.entries array. 1060 """ 1061 1062 found = func_macro = False 1063 return_type = '' 1064 decl_type = 'function' 1065 # 1066 # Apply the initial transformations. 1067 # 1068 prototype = apply_transforms(function_xforms, prototype) 1069 # 1070 # If we have a macro, remove the "#define" at the front. 1071 # 1072 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 1073 if new_proto != prototype: 1074 prototype = new_proto 1075 # 1076 # Dispense with the simple "#define A B" case here; the key 1077 # is the space after the name of the symbol being defined. 1078 # NOTE that the seemingly misnamed "func_macro" indicates a 1079 # macro *without* arguments. 1080 # 1081 r = KernRe(r'^(\w+)\s+') 1082 if r.search(prototype): 1083 return_type = '' 1084 declaration_name = r.group(1) 1085 func_macro = True 1086 found = True 1087 1088 # Yes, this truly is vile. We are looking for: 1089 # 1. Return type (may be nothing if we're looking at a macro) 1090 # 2. Function name 1091 # 3. Function parameters. 1092 # 1093 # All the while we have to watch out for function pointer parameters 1094 # (which IIRC is what the two sections are for), C types (these 1095 # regexps don't even start to express all the possibilities), and 1096 # so on. 1097 # 1098 # If you mess with these regexps, it's a good idea to check that 1099 # the following functions' documentation still comes out right: 1100 # - parport_register_device (function pointer parameters) 1101 # - atomic_set (macro) 1102 # - pci_match_device, __copy_to_user (long return type) 1103 1104 name = r'\w+' 1105 type1 = r'(?:[\w\s]+)?' 1106 type2 = r'(?:[\w\s]+\*+)+' 1107 # 1108 # Attempt to match first on (args) with no internal parentheses; this 1109 # lets us easily filter out __acquires() and other post-args stuff. If 1110 # that fails, just grab the rest of the line to the last closing 1111 # parenthesis. 1112 # 1113 proto_args = r'\(([^\(]*|.*)\)' 1114 # 1115 # (Except for the simple macro case) attempt to split up the prototype 1116 # in the various ways we understand. 1117 # 1118 if not found: 1119 patterns = [ 1120 rf'^()({name})\s*{proto_args}', 1121 rf'^({type1})\s+({name})\s*{proto_args}', 1122 rf'^({type2})\s*({name})\s*{proto_args}', 1123 ] 1124 1125 for p in patterns: 1126 r = KernRe(p) 1127 if r.match(prototype): 1128 return_type = r.group(1) 1129 declaration_name = r.group(2) 1130 args = r.group(3) 1131 self.create_parameter_list(ln, decl_type, args, ',', 1132 declaration_name) 1133 found = True 1134 break 1135 # 1136 # Parsing done; make sure that things are as we expect. 1137 # 1138 if not found: 1139 self.emit_msg(ln, 1140 f"cannot understand function prototype: '{prototype}'") 1141 return 1142 if self.entry.identifier != declaration_name: 1143 self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " 1144 f"Prototype was for {declaration_name}() instead") 1145 return 1146 self.check_sections(ln, declaration_name, "function") 1147 self.check_return_section(ln, declaration_name, return_type) 1148 # 1149 # Store the result. 1150 # 1151 self.output_declaration(decl_type, declaration_name, 1152 typedef=('typedef' in return_type), 1153 functiontype=return_type, 1154 purpose=self.entry.declaration_purpose, 1155 func_macro=func_macro) 1156 1157 1158 def dump_typedef(self, ln, proto): 1159 """ 1160 Store a ``typedef`` inside self.entries array. 1161 """ 1162 # 1163 # We start by looking for function typedefs. 1164 # 1165 typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1166 typedef_ident = r'\*?\s*(\w\S+)\s*' 1167 typedef_args = r'\s*\((.*)\);' 1168 1169 typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1170 typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) 1171 1172 # Parse function typedef prototypes 1173 for r in [typedef1, typedef2]: 1174 if not r.match(proto): 1175 continue 1176 1177 return_type = r.group(1).strip() 1178 declaration_name = r.group(2) 1179 args = r.group(3) 1180 1181 if self.entry.identifier != declaration_name: 1182 self.emit_msg(ln, 1183 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1184 return 1185 1186 self.create_parameter_list(ln, 'function', args, ',', declaration_name) 1187 1188 self.output_declaration('function', declaration_name, 1189 typedef=True, 1190 functiontype=return_type, 1191 purpose=self.entry.declaration_purpose) 1192 return 1193 # 1194 # Not a function, try to parse a simple typedef. 1195 # 1196 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1197 if r.match(proto): 1198 declaration_name = r.group(1) 1199 1200 if self.entry.identifier != declaration_name: 1201 self.emit_msg(ln, 1202 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1203 return 1204 1205 self.output_declaration('typedef', declaration_name, 1206 purpose=self.entry.declaration_purpose) 1207 return 1208 1209 self.emit_msg(ln, "error: Cannot parse typedef!") 1210 1211 @staticmethod 1212 def process_export(function_set, line): 1213 """ 1214 process ``EXPORT_SYMBOL*`` tags 1215 1216 This method doesn't use any variable from the class, so declare it 1217 with a staticmethod decorator. 1218 """ 1219 1220 # We support documenting some exported symbols with different 1221 # names. A horrible hack. 1222 suffixes = [ '_noprof' ] 1223 1224 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1225 # multiple export lines would violate Kernel coding style. 1226 1227 if export_symbol.search(line): 1228 symbol = export_symbol.group(2) 1229 elif export_symbol_ns.search(line): 1230 symbol = export_symbol_ns.group(2) 1231 else: 1232 return False 1233 # 1234 # Found an export, trim out any special suffixes 1235 # 1236 for suffix in suffixes: 1237 # Be backward compatible with Python < 3.9 1238 if symbol.endswith(suffix): 1239 symbol = symbol[:-len(suffix)] 1240 function_set.add(symbol) 1241 return True 1242 1243 def process_normal(self, ln, line): 1244 """ 1245 STATE_NORMAL: looking for the ``/**`` to begin everything. 1246 """ 1247 1248 if not doc_start.match(line): 1249 return 1250 1251 # start a new entry 1252 self.reset_state(ln) 1253 1254 # next line is always the function name 1255 self.state = state.NAME 1256 1257 def process_name(self, ln, line): 1258 """ 1259 STATE_NAME: Looking for the "name - description" line 1260 """ 1261 # 1262 # Check for a DOC: block and handle them specially. 1263 # 1264 if doc_block.search(line): 1265 1266 if not doc_block.group(1): 1267 self.entry.begin_section(ln, "Introduction") 1268 else: 1269 self.entry.begin_section(ln, doc_block.group(1)) 1270 1271 self.entry.identifier = self.entry.section 1272 self.state = state.DOCBLOCK 1273 # 1274 # Otherwise we're looking for a normal kerneldoc declaration line. 1275 # 1276 elif doc_decl.search(line): 1277 self.entry.identifier = doc_decl.group(1) 1278 1279 # Test for data declaration 1280 if doc_begin_data.search(line): 1281 self.entry.decl_type = doc_begin_data.group(1) 1282 self.entry.identifier = doc_begin_data.group(2) 1283 # 1284 # Look for a function description 1285 # 1286 elif doc_begin_func.search(line): 1287 self.entry.identifier = doc_begin_func.group(1) 1288 self.entry.decl_type = "function" 1289 # 1290 # We struck out. 1291 # 1292 else: 1293 self.emit_msg(ln, 1294 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}") 1295 self.state = state.NORMAL 1296 return 1297 # 1298 # OK, set up for a new kerneldoc entry. 1299 # 1300 self.state = state.BODY 1301 self.entry.identifier = self.entry.identifier.strip(" ") 1302 # if there's no @param blocks need to set up default section here 1303 self.entry.begin_section(ln + 1) 1304 # 1305 # Find the description portion, which *should* be there but 1306 # isn't always. 1307 # (We should be able to capture this from the previous parsing - someday) 1308 # 1309 r = KernRe("[-:](.*)") 1310 if r.search(line): 1311 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1312 self.state = state.DECLARATION 1313 else: 1314 self.entry.declaration_purpose = "" 1315 1316 if not self.entry.declaration_purpose and self.config.wshort_desc: 1317 self.emit_msg(ln, 1318 f"missing initial short description on line:\n{line}") 1319 1320 if not self.entry.identifier and self.entry.decl_type != "enum": 1321 self.emit_msg(ln, 1322 f"wrong kernel-doc identifier on line:\n{line}") 1323 self.state = state.NORMAL 1324 1325 if self.config.verbose: 1326 self.emit_msg(ln, 1327 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1328 warning=False) 1329 # 1330 # Failed to find an identifier. Emit a warning 1331 # 1332 else: 1333 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1334 1335 def is_new_section(self, ln, line): 1336 """ 1337 Helper function to determine if a new section is being started. 1338 """ 1339 if doc_sect.search(line): 1340 self.state = state.BODY 1341 # 1342 # Pick out the name of our new section, tweaking it if need be. 1343 # 1344 newsection = doc_sect.group(1) 1345 if newsection.lower() == 'description': 1346 newsection = 'Description' 1347 elif newsection.lower() == 'context': 1348 newsection = 'Context' 1349 self.state = state.SPECIAL_SECTION 1350 elif newsection.lower() in ["@return", "@returns", 1351 "return", "returns"]: 1352 newsection = "Return" 1353 self.state = state.SPECIAL_SECTION 1354 elif newsection[0] == '@': 1355 self.state = state.SPECIAL_SECTION 1356 # 1357 # Initialize the contents, and get the new section going. 1358 # 1359 newcontents = doc_sect.group(2) 1360 if not newcontents: 1361 newcontents = "" 1362 self.dump_section() 1363 self.entry.begin_section(ln, newsection) 1364 self.entry.leading_space = None 1365 1366 self.entry.add_text(newcontents.lstrip()) 1367 return True 1368 return False 1369 1370 def is_comment_end(self, ln, line): 1371 """ 1372 Helper function to detect (and effect) the end of a kerneldoc comment. 1373 """ 1374 if doc_end.search(line): 1375 self.dump_section() 1376 1377 # Look for doc_com + <text> + doc_end: 1378 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') 1379 if r.match(line): 1380 self.emit_msg(ln, f"suspicious ending line: {line}") 1381 1382 self.entry.prototype = "" 1383 self.entry.new_start_line = ln + 1 1384 1385 self.state = state.PROTO 1386 return True 1387 return False 1388 1389 1390 def process_decl(self, ln, line): 1391 """ 1392 STATE_DECLARATION: We've seen the beginning of a declaration. 1393 """ 1394 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1395 return 1396 # 1397 # Look for anything with the " * " line beginning. 1398 # 1399 if doc_content.search(line): 1400 cont = doc_content.group(1) 1401 # 1402 # A blank line means that we have moved out of the declaration 1403 # part of the comment (without any "special section" parameter 1404 # descriptions). 1405 # 1406 if cont == "": 1407 self.state = state.BODY 1408 # 1409 # Otherwise we have more of the declaration section to soak up. 1410 # 1411 else: 1412 self.entry.declaration_purpose = \ 1413 trim_whitespace(self.entry.declaration_purpose + ' ' + cont) 1414 else: 1415 # Unknown line, ignore 1416 self.emit_msg(ln, f"bad line: {line}") 1417 1418 1419 def process_special(self, ln, line): 1420 """ 1421 STATE_SPECIAL_SECTION: a section ending with a blank line. 1422 """ 1423 # 1424 # If we have hit a blank line (only the " * " marker), then this 1425 # section is done. 1426 # 1427 if KernRe(r"\s*\*\s*$").match(line): 1428 self.entry.begin_section(ln, dump = True) 1429 self.state = state.BODY 1430 return 1431 # 1432 # Not a blank line, look for the other ways to end the section. 1433 # 1434 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1435 return 1436 # 1437 # OK, we should have a continuation of the text for this section. 1438 # 1439 if doc_content.search(line): 1440 cont = doc_content.group(1) 1441 # 1442 # If the lines of text after the first in a special section have 1443 # leading white space, we need to trim it out or Sphinx will get 1444 # confused. For the second line (the None case), see what we 1445 # find there and remember it. 1446 # 1447 if self.entry.leading_space is None: 1448 r = KernRe(r'^(\s+)') 1449 if r.match(cont): 1450 self.entry.leading_space = len(r.group(1)) 1451 else: 1452 self.entry.leading_space = 0 1453 # 1454 # Otherwise, before trimming any leading chars, be *sure* 1455 # that they are white space. We should maybe warn if this 1456 # isn't the case. 1457 # 1458 for i in range(0, self.entry.leading_space): 1459 if cont[i] != " ": 1460 self.entry.leading_space = i 1461 break 1462 # 1463 # Add the trimmed result to the section and we're done. 1464 # 1465 self.entry.add_text(cont[self.entry.leading_space:]) 1466 else: 1467 # Unknown line, ignore 1468 self.emit_msg(ln, f"bad line: {line}") 1469 1470 def process_body(self, ln, line): 1471 """ 1472 STATE_BODY: the bulk of a kerneldoc comment. 1473 """ 1474 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1475 return 1476 1477 if doc_content.search(line): 1478 cont = doc_content.group(1) 1479 self.entry.add_text(cont) 1480 else: 1481 # Unknown line, ignore 1482 self.emit_msg(ln, f"bad line: {line}") 1483 1484 def process_inline_name(self, ln, line): 1485 """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" 1486 1487 if doc_inline_sect.search(line): 1488 self.entry.begin_section(ln, doc_inline_sect.group(1)) 1489 self.entry.add_text(doc_inline_sect.group(2).lstrip()) 1490 self.state = state.INLINE_TEXT 1491 elif doc_inline_end.search(line): 1492 self.dump_section() 1493 self.state = state.PROTO 1494 elif doc_content.search(line): 1495 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") 1496 self.state = state.PROTO 1497 # else ... ?? 1498 1499 def process_inline_text(self, ln, line): 1500 """STATE_INLINE_TEXT: docbook comments within a prototype.""" 1501 1502 if doc_inline_end.search(line): 1503 self.dump_section() 1504 self.state = state.PROTO 1505 elif doc_content.search(line): 1506 self.entry.add_text(doc_content.group(1)) 1507 # else ... ?? 1508 1509 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1510 """ 1511 Handle syscall definitions. 1512 """ 1513 1514 is_void = False 1515 1516 # Strip newlines/CR's 1517 proto = re.sub(r'[\r\n]+', ' ', proto) 1518 1519 # Check if it's a SYSCALL_DEFINE0 1520 if 'SYSCALL_DEFINE0' in proto: 1521 is_void = True 1522 1523 # Replace SYSCALL_DEFINE with correct return type & function name 1524 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1525 1526 r = KernRe(r'long\s+(sys_.*?),') 1527 if r.search(proto): 1528 proto = KernRe(',').sub('(', proto, count=1) 1529 elif is_void: 1530 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1531 1532 # Now delete all of the odd-numbered commas in the proto 1533 # so that argument types & names don't have a comma between them 1534 count = 0 1535 length = len(proto) 1536 1537 if is_void: 1538 length = 0 # skip the loop if is_void 1539 1540 for ix in range(length): 1541 if proto[ix] == ',': 1542 count += 1 1543 if count % 2 == 1: 1544 proto = proto[:ix] + ' ' + proto[ix + 1:] 1545 1546 return proto 1547 1548 def tracepoint_munge(self, ln, proto): 1549 """ 1550 Handle tracepoint definitions. 1551 """ 1552 1553 tracepointname = None 1554 tracepointargs = None 1555 1556 # Match tracepoint name based on different patterns 1557 r = KernRe(r'TRACE_EVENT\((.*?),') 1558 if r.search(proto): 1559 tracepointname = r.group(1) 1560 1561 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1562 if r.search(proto): 1563 tracepointname = r.group(1) 1564 1565 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1566 if r.search(proto): 1567 tracepointname = r.group(2) 1568 1569 if tracepointname: 1570 tracepointname = tracepointname.lstrip() 1571 1572 r = KernRe(r'TP_PROTO\((.*?)\)') 1573 if r.search(proto): 1574 tracepointargs = r.group(1) 1575 1576 if not tracepointname or not tracepointargs: 1577 self.emit_msg(ln, 1578 f"Unrecognized tracepoint format:\n{proto}\n") 1579 else: 1580 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1581 self.entry.identifier = f"trace_{self.entry.identifier}" 1582 1583 return proto 1584 1585 def process_proto_function(self, ln, line): 1586 """Ancillary routine to process a function prototype.""" 1587 1588 # strip C99-style comments to end of line 1589 line = KernRe(r"//.*$", re.S).sub('', line) 1590 # 1591 # Soak up the line's worth of prototype text, stopping at { or ; if present. 1592 # 1593 if KernRe(r'\s*#\s*define').match(line): 1594 self.entry.prototype = line 1595 elif not line.startswith('#'): # skip other preprocessor stuff 1596 r = KernRe(r'([^\{]*)') 1597 if r.match(line): 1598 self.entry.prototype += r.group(1) + " " 1599 # 1600 # If we now have the whole prototype, clean it up and declare victory. 1601 # 1602 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1603 # strip comments and surrounding spaces 1604 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() 1605 # 1606 # Handle self.entry.prototypes for function pointers like: 1607 # int (*pcs_config)(struct foo) 1608 # by turning it into 1609 # int pcs_config(struct foo) 1610 # 1611 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1612 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1613 # 1614 # Handle special declaration syntaxes 1615 # 1616 if 'SYSCALL_DEFINE' in self.entry.prototype: 1617 self.entry.prototype = self.syscall_munge(ln, 1618 self.entry.prototype) 1619 else: 1620 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1621 if r.search(self.entry.prototype): 1622 self.entry.prototype = self.tracepoint_munge(ln, 1623 self.entry.prototype) 1624 # 1625 # ... and we're done 1626 # 1627 self.dump_function(ln, self.entry.prototype) 1628 self.reset_state(ln) 1629 1630 def process_proto_type(self, ln, line): 1631 """ 1632 Ancillary routine to process a type. 1633 """ 1634 1635 # Strip C99-style comments and surrounding whitespace 1636 line = KernRe(r"//.*$", re.S).sub('', line).strip() 1637 if not line: 1638 return # nothing to see here 1639 1640 # To distinguish preprocessor directive from regular declaration later. 1641 if line.startswith('#'): 1642 line += ";" 1643 # 1644 # Split the declaration on any of { } or ;, and accumulate pieces 1645 # until we hit a semicolon while not inside {brackets} 1646 # 1647 r = KernRe(r'(.*?)([{};])') 1648 for chunk in r.split(line): 1649 if chunk: # Ignore empty matches 1650 self.entry.prototype += chunk 1651 # 1652 # This cries out for a match statement ... someday after we can 1653 # drop Python 3.9 ... 1654 # 1655 if chunk == '{': 1656 self.entry.brcount += 1 1657 elif chunk == '}': 1658 self.entry.brcount -= 1 1659 elif chunk == ';' and self.entry.brcount <= 0: 1660 self.dump_declaration(ln, self.entry.prototype) 1661 self.reset_state(ln) 1662 return 1663 # 1664 # We hit the end of the line while still in the declaration; put 1665 # in a space to represent the newline. 1666 # 1667 self.entry.prototype += ' ' 1668 1669 def process_proto(self, ln, line): 1670 """STATE_PROTO: reading a function/whatever prototype.""" 1671 1672 if doc_inline_oneline.search(line): 1673 self.entry.begin_section(ln, doc_inline_oneline.group(1)) 1674 self.entry.add_text(doc_inline_oneline.group(2)) 1675 self.dump_section() 1676 1677 elif doc_inline_start.search(line): 1678 self.state = state.INLINE_NAME 1679 1680 elif self.entry.decl_type == 'function': 1681 self.process_proto_function(ln, line) 1682 1683 else: 1684 self.process_proto_type(ln, line) 1685 1686 def process_docblock(self, ln, line): 1687 """STATE_DOCBLOCK: within a ``DOC:`` block.""" 1688 1689 if doc_end.search(line): 1690 self.dump_section() 1691 self.output_declaration("doc", self.entry.identifier) 1692 self.reset_state(ln) 1693 1694 elif doc_content.search(line): 1695 self.entry.add_text(doc_content.group(1)) 1696 1697 def parse_export(self): 1698 """ 1699 Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file. 1700 """ 1701 1702 export_table = set() 1703 1704 try: 1705 with open(self.fname, "r", encoding="utf8", 1706 errors="backslashreplace") as fp: 1707 1708 for line in fp: 1709 self.process_export(export_table, line) 1710 1711 except IOError: 1712 return None 1713 1714 return export_table 1715 1716 #: The state/action table telling us which function to invoke in each state. 1717 state_actions = { 1718 state.NORMAL: process_normal, 1719 state.NAME: process_name, 1720 state.BODY: process_body, 1721 state.DECLARATION: process_decl, 1722 state.SPECIAL_SECTION: process_special, 1723 state.INLINE_NAME: process_inline_name, 1724 state.INLINE_TEXT: process_inline_text, 1725 state.PROTO: process_proto, 1726 state.DOCBLOCK: process_docblock, 1727 } 1728 1729 def parse_kdoc(self): 1730 """ 1731 Open and process each line of a C source file. 1732 The parsing is controlled via a state machine, and the line is passed 1733 to a different process function depending on the state. The process 1734 function may update the state as needed. 1735 1736 Besides parsing kernel-doc tags, it also parses export symbols. 1737 """ 1738 1739 prev = "" 1740 prev_ln = None 1741 export_table = set() 1742 1743 try: 1744 with open(self.fname, "r", encoding="utf8", 1745 errors="backslashreplace") as fp: 1746 for ln, line in enumerate(fp): 1747 1748 line = line.expandtabs().strip("\n") 1749 1750 # Group continuation lines on prototypes 1751 if self.state == state.PROTO: 1752 if line.endswith("\\"): 1753 prev += line.rstrip("\\") 1754 if not prev_ln: 1755 prev_ln = ln 1756 continue 1757 1758 if prev: 1759 ln = prev_ln 1760 line = prev + line 1761 prev = "" 1762 prev_ln = None 1763 1764 self.config.log.debug("%d %s: %s", 1765 ln, state.name[self.state], 1766 line) 1767 1768 # This is an optimization over the original script. 1769 # There, when export_file was used for the same file, 1770 # it was read twice. Here, we use the already-existing 1771 # loop to parse exported symbols as well. 1772 # 1773 if (self.state != state.NORMAL) or \ 1774 not self.process_export(export_table, line): 1775 # Hand this line to the appropriate state handler 1776 self.state_actions[self.state](self, ln, line) 1777 1778 self.emit_unused_warnings() 1779 1780 except OSError: 1781 self.config.log.error(f"Error: Cannot open file {self.fname}") 1782 1783 return export_table, self.entries 1784