1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8Classes and functions related to reading a C language source or header FILE 9and extract embedded documentation comments from it. 10""" 11 12import sys 13import re 14from pprint import pformat 15 16from kdoc.kdoc_re import NestedMatch, KernRe 17from kdoc.kdoc_item import KdocItem 18 19# 20# Regular expressions used to parse kernel-doc markups at KernelDoc class. 21# 22# Let's declare them in lowercase outside any class to make it easier to 23# convert from the Perl script. 24# 25# As those are evaluated at the beginning, no need to cache them 26# 27 28# Allow whitespace at end of comment start. 29doc_start = KernRe(r'^/\*\*\s*$', cache=False) 30 31doc_end = KernRe(r'\*/', cache=False) 32doc_com = KernRe(r'\s*\*\s*', cache=False) 33doc_com_body = KernRe(r'\s*\* ?', cache=False) 34doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 35 36# @params and a strictly limited set of supported section names 37# Specifically: 38# Match @word: 39# @...: 40# @{section-name}: 41# while trying to not match literal block starts like "example::" 42# 43known_section_names = 'description|context|returns?|notes?|examples?' 44known_sections = KernRe(known_section_names, flags = re.I) 45doc_sect = doc_com + \ 46 KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', 47 flags=re.I, cache=False) 48 49doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 50doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 51doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 52doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 53doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@\s*[\w][\w\.]*\s*):\s*(.*)\s*\*/\s*$', cache=False) 54 55export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 56export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 57 58type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 59 60# 61# Tests for the beginning of a kerneldoc block in its various forms. 62# 63doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 64doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef|var)\b\s*(\w*)", cache = False) 65doc_begin_func = KernRe(str(doc_com) + # initial " * ' 66 r"(?:\w+\s*\*\s*)?" + # type (not captured) 67 r'(?:define\s+)?' + # possible "define" (not captured) 68 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 69 r'(?:[-:].*)?$', # description (not captured) 70 cache = False) 71 72# 73# Here begins a long set of transformations to turn structure member prefixes 74# and macro invocations into something we can parse and generate kdoc for. 75# 76struct_args_pattern = r'([^,)]+)' 77 78struct_xforms = [ 79 # Strip attributes 80 (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), 81 (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), 82 (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), 83 (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), 84 (KernRe(r'\s*__packed\s*', re.S), ' '), 85 (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), 86 (KernRe(r'\s*__private', re.S), ' '), 87 (KernRe(r'\s*__rcu', re.S), ' '), 88 (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), 89 (KernRe(r'\s*____cacheline_aligned', re.S), ' '), 90 (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), 91 # 92 # Unwrap struct_group macros based on this definition: 93 # __struct_group(TAG, NAME, ATTRS, MEMBERS...) 94 # which has variants like: struct_group(NAME, MEMBERS...) 95 # Only MEMBERS arguments require documentation. 96 # 97 # Parsing them happens on two steps: 98 # 99 # 1. drop struct group arguments that aren't at MEMBERS, 100 # storing them as STRUCT_GROUP(MEMBERS) 101 # 102 # 2. remove STRUCT_GROUP() ancillary macro. 103 # 104 # The original logic used to remove STRUCT_GROUP() using an 105 # advanced regex: 106 # 107 # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; 108 # 109 # with two patterns that are incompatible with 110 # Python re module, as it has: 111 # 112 # - a recursive pattern: (?1) 113 # - an atomic grouping: (?>...) 114 # 115 # I tried a simpler version: but it didn't work either: 116 # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; 117 # 118 # As it doesn't properly match the end parenthesis on some cases. 119 # 120 # So, a better solution was crafted: there's now a NestedMatch 121 # class that ensures that delimiters after a search are properly 122 # matched. So, the implementation to drop STRUCT_GROUP() will be 123 # handled in separate. 124 # 125 (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), 126 (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), 127 (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), 128 (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), 129 # 130 # Replace macros 131 # 132 # TODO: use NestedMatch for FOO($1, $2, ...) matches 133 # 134 # it is better to also move those to the NestedMatch logic, 135 # to ensure that parentheses will be properly matched. 136 # 137 (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), 138 r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), 139 (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), 140 r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), 141 (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', 142 re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), 143 (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', 144 re.S), r'unsigned long \1[1 << ((\2) - 1)]'), 145 (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + 146 r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), 147 (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + 148 struct_args_pattern + r'\)', re.S), r'\2 *\1'), 149 (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + 150 struct_args_pattern + r'\)', re.S), r'\1 \2[]'), 151 (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), 152 (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), 153] 154# 155# Regexes here are guaranteed to have the end delimiter matching 156# the start delimiter. Yet, right now, only one replace group 157# is allowed. 158# 159struct_nested_prefixes = [ 160 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), 161] 162 163# 164# Transforms for function prototypes 165# 166function_xforms = [ 167 (KernRe(r"^static +"), ""), 168 (KernRe(r"^extern +"), ""), 169 (KernRe(r"^asmlinkage +"), ""), 170 (KernRe(r"^inline +"), ""), 171 (KernRe(r"^__inline__ +"), ""), 172 (KernRe(r"^__inline +"), ""), 173 (KernRe(r"^__always_inline +"), ""), 174 (KernRe(r"^noinline +"), ""), 175 (KernRe(r"^__FORTIFY_INLINE +"), ""), 176 (KernRe(r"__init +"), ""), 177 (KernRe(r"__init_or_module +"), ""), 178 (KernRe(r"__exit +"), ""), 179 (KernRe(r"__deprecated +"), ""), 180 (KernRe(r"__flatten +"), ""), 181 (KernRe(r"__meminit +"), ""), 182 (KernRe(r"__must_check +"), ""), 183 (KernRe(r"__weak +"), ""), 184 (KernRe(r"__sched +"), ""), 185 (KernRe(r"_noprof"), ""), 186 (KernRe(r"__always_unused *"), ""), 187 (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), 188 (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), 189 (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), 190 (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), 191 (KernRe(r"__attribute_const__ +"), ""), 192 (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), 193] 194 195# 196# Ancillary functions 197# 198 199def apply_transforms(xforms, text): 200 """ 201 Apply a set of transforms to a block of text. 202 """ 203 for search, subst in xforms: 204 text = search.sub(subst, text) 205 return text 206 207multi_space = KernRe(r'\s\s+') 208def trim_whitespace(s): 209 """ 210 A little helper to get rid of excess white space. 211 """ 212 return multi_space.sub(' ', s.strip()) 213 214def trim_private_members(text): 215 """ 216 Remove ``struct``/``enum`` members that have been marked "private". 217 """ 218 # First look for a "public:" block that ends a private region, then 219 # handle the "private until the end" case. 220 # 221 text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) 222 text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) 223 # 224 # We needed the comments to do the above, but now we can take them out. 225 # 226 return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() 227 228class state: 229 """ 230 States used by the parser's state machine. 231 """ 232 233 # Parser states 234 NORMAL = 0 #: Normal code. 235 NAME = 1 #: Looking for function name. 236 DECLARATION = 2 #: We have seen a declaration which might not be done. 237 BODY = 3 #: The body of the comment. 238 SPECIAL_SECTION = 4 #: Doc section ending with a blank line. 239 PROTO = 5 #: Scanning prototype. 240 DOCBLOCK = 6 #: Documentation block. 241 INLINE_NAME = 7 #: Gathering doc outside main block. 242 INLINE_TEXT = 8 #: Reading the body of inline docs. 243 244 #: Names for each parser state. 245 name = [ 246 "NORMAL", 247 "NAME", 248 "DECLARATION", 249 "BODY", 250 "SPECIAL_SECTION", 251 "PROTO", 252 "DOCBLOCK", 253 "INLINE_NAME", 254 "INLINE_TEXT", 255 ] 256 257 258SECTION_DEFAULT = "Description" #: Default section. 259 260class KernelEntry: 261 """ 262 Encapsulates a Kernel documentation entry. 263 """ 264 265 def __init__(self, config, fname, ln): 266 self.config = config 267 self.fname = fname 268 269 self._contents = [] 270 self.prototype = "" 271 272 self.warnings = [] 273 274 self.parameterlist = [] 275 self.parameterdescs = {} 276 self.parametertypes = {} 277 self.parameterdesc_start_lines = {} 278 279 self.section_start_lines = {} 280 self.sections = {} 281 282 self.anon_struct_union = False 283 284 self.leading_space = None 285 286 self.fname = fname 287 288 # State flags 289 self.brcount = 0 290 self.declaration_start_line = ln + 1 291 292 # 293 # Management of section contents 294 # 295 def add_text(self, text): 296 """Add a new text to the entry contents list.""" 297 self._contents.append(text) 298 299 def contents(self): 300 """Returns a string with all content texts that were added.""" 301 return '\n'.join(self._contents) + '\n' 302 303 # TODO: rename to emit_message after removal of kernel-doc.pl 304 def emit_msg(self, ln, msg, *, warning=True): 305 """Emit a message.""" 306 307 log_msg = f"{self.fname}:{ln} {msg}" 308 309 if not warning: 310 self.config.log.info(log_msg) 311 return 312 313 # Delegate warning output to output logic, as this way it 314 # will report warnings/info only for symbols that are output 315 316 self.warnings.append(log_msg) 317 return 318 319 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): 320 """ 321 Begin a new section. 322 """ 323 if dump: 324 self.dump_section(start_new = True) 325 self.section = title 326 self.new_start_line = line_no 327 328 def dump_section(self, start_new=True): 329 """ 330 Dumps section contents to arrays/hashes intended for that purpose. 331 """ 332 # 333 # If we have accumulated no contents in the default ("description") 334 # section, don't bother. 335 # 336 if self.section == SECTION_DEFAULT and not self._contents: 337 return 338 name = self.section 339 contents = self.contents() 340 341 if type_param.match(name): 342 name = type_param.group(1) 343 344 self.parameterdescs[name] = contents 345 self.parameterdesc_start_lines[name] = self.new_start_line 346 347 self.new_start_line = 0 348 349 else: 350 if name in self.sections and self.sections[name] != "": 351 # Only warn on user-specified duplicate section names 352 if name != SECTION_DEFAULT: 353 self.emit_msg(self.new_start_line, 354 f"duplicate section name '{name}'") 355 # Treat as a new paragraph - add a blank line 356 self.sections[name] += '\n' + contents 357 else: 358 self.sections[name] = contents 359 self.section_start_lines[name] = self.new_start_line 360 self.new_start_line = 0 361 362# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 363 364 if start_new: 365 self.section = SECTION_DEFAULT 366 self._contents = [] 367 368python_warning = False 369 370class KernelDoc: 371 """ 372 Read a C language source or header FILE and extract embedded 373 documentation comments. 374 """ 375 376 #: Name of context section. 377 section_context = "Context" 378 379 #: Name of return section. 380 section_return = "Return" 381 382 #: String to write when a parameter is not described. 383 undescribed = "-- undescribed --" 384 385 def __init__(self, config, fname): 386 """Initialize internal variables""" 387 388 self.fname = fname 389 self.config = config 390 391 # Initial state for the state machines 392 self.state = state.NORMAL 393 394 # Store entry currently being processed 395 self.entry = None 396 397 # Place all potential outputs into an array 398 self.entries = [] 399 400 # 401 # We need Python 3.7 for its "dicts remember the insertion 402 # order" guarantee 403 # 404 global python_warning 405 if (not python_warning and 406 sys.version_info.major == 3 and sys.version_info.minor < 7): 407 408 self.emit_msg(0, 409 'Python 3.7 or later is required for correct results') 410 python_warning = True 411 412 def emit_msg(self, ln, msg, *, warning=True): 413 """Emit a message""" 414 415 if self.entry: 416 self.entry.emit_msg(ln, msg, warning=warning) 417 return 418 419 log_msg = f"{self.fname}:{ln} {msg}" 420 421 if warning: 422 self.config.log.warning(log_msg) 423 else: 424 self.config.log.info(log_msg) 425 426 def dump_section(self, start_new=True): 427 """ 428 Dump section contents to arrays/hashes intended for that purpose. 429 """ 430 431 if self.entry: 432 self.entry.dump_section(start_new) 433 434 # TODO: rename it to store_declaration after removal of kernel-doc.pl 435 def output_declaration(self, dtype, name, **args): 436 """ 437 Store the entry into an entry array. 438 439 The actual output and output filters will be handled elsewhere. 440 """ 441 442 item = KdocItem(name, self.fname, dtype, 443 self.entry.declaration_start_line, **args) 444 item.warnings = self.entry.warnings 445 446 # Drop empty sections 447 # TODO: improve empty sections logic to emit warnings 448 sections = self.entry.sections 449 for section in ["Description", "Return"]: 450 if section in sections and not sections[section].rstrip(): 451 del sections[section] 452 item.set_sections(sections, self.entry.section_start_lines) 453 item.set_params(self.entry.parameterlist, self.entry.parameterdescs, 454 self.entry.parametertypes, 455 self.entry.parameterdesc_start_lines) 456 self.entries.append(item) 457 458 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 459 460 def emit_unused_warnings(self): 461 """ 462 When the parser fails to produce a valid entry, it places some 463 warnings under `entry.warnings` that will be discarded when resetting 464 the state. 465 466 Ensure that those warnings are not lost. 467 468 .. note:: 469 470 Because we are calling `config.warning()` here, those 471 warnings are not filtered by the `-W` parameters: they will all 472 be produced even when `-Wreturn`, `-Wshort-desc`, and/or 473 `-Wcontents-before-sections` are used. 474 475 Allowing those warnings to be filtered is complex, because it 476 would require storing them in a buffer and then filtering them 477 during the output step of the code, depending on the 478 selected symbols. 479 """ 480 if self.entry and self.entry not in self.entries: 481 for log_msg in self.entry.warnings: 482 self.config.warning(log_msg) 483 484 def reset_state(self, ln): 485 """ 486 Ancillary routine to create a new entry. It initializes all 487 variables used by the state machine. 488 """ 489 490 self.emit_unused_warnings() 491 492 self.entry = KernelEntry(self.config, self.fname, ln) 493 494 # State flags 495 self.state = state.NORMAL 496 497 def push_parameter(self, ln, decl_type, param, dtype, 498 org_arg, declaration_name): 499 """ 500 Store parameters and their descriptions at self.entry. 501 """ 502 503 if self.entry.anon_struct_union and dtype == "" and param == "}": 504 return # Ignore the ending }; from anonymous struct/union 505 506 self.entry.anon_struct_union = False 507 508 param = KernRe(r'[\[\)].*').sub('', param, count=1) 509 510 # 511 # Look at various "anonymous type" cases. 512 # 513 if dtype == '': 514 if param.endswith("..."): 515 if len(param) > 3: # there is a name provided, use that 516 param = param[:-3] 517 if not self.entry.parameterdescs.get(param): 518 self.entry.parameterdescs[param] = "variable arguments" 519 520 elif (not param) or param == "void": 521 param = "void" 522 self.entry.parameterdescs[param] = "no arguments" 523 524 elif param in ["struct", "union"]: 525 # Handle unnamed (anonymous) union or struct 526 dtype = param 527 param = "{unnamed_" + param + "}" 528 self.entry.parameterdescs[param] = "anonymous\n" 529 self.entry.anon_struct_union = True 530 531 # Warn if parameter has no description 532 # (but ignore ones starting with # as these are not parameters 533 # but inline preprocessor statements) 534 if param not in self.entry.parameterdescs and not param.startswith("#"): 535 self.entry.parameterdescs[param] = self.undescribed 536 537 if "." not in param: 538 if decl_type == 'function': 539 dname = f"{decl_type} parameter" 540 else: 541 dname = f"{decl_type} member" 542 543 self.emit_msg(ln, 544 f"{dname} '{param}' not described in '{declaration_name}'") 545 546 # Strip spaces from param so that it is one continuous string on 547 # parameterlist. This fixes a problem where check_sections() 548 # cannot find a parameter like "addr[6 + 2]" because it actually 549 # appears as "addr[6", "+", "2]" on the parameter list. 550 # However, it's better to maintain the param string unchanged for 551 # output, so just weaken the string compare in check_sections() 552 # to ignore "[blah" in a parameter string. 553 554 self.entry.parameterlist.append(param) 555 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 556 self.entry.parametertypes[param] = org_arg 557 558 559 def create_parameter_list(self, ln, decl_type, args, 560 splitter, declaration_name): 561 """ 562 Creates a list of parameters, storing them at self.entry. 563 """ 564 565 # temporarily replace all commas inside function pointer definition 566 arg_expr = KernRe(r'(\([^\),]+),') 567 while arg_expr.search(args): 568 args = arg_expr.sub(r"\1#", args) 569 570 for arg in args.split(splitter): 571 # Ignore argument attributes 572 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 573 574 # Strip leading/trailing spaces 575 arg = arg.strip() 576 arg = KernRe(r'\s+').sub(' ', arg, count=1) 577 578 if arg.startswith('#'): 579 # Treat preprocessor directive as a typeless variable just to fill 580 # corresponding data structures "correctly". Catch it later in 581 # output_* subs. 582 583 # Treat preprocessor directive as a typeless variable 584 self.push_parameter(ln, decl_type, arg, "", 585 "", declaration_name) 586 # 587 # The pointer-to-function case. 588 # 589 elif KernRe(r'\(.+\)\s*\(').search(arg): 590 arg = arg.replace('#', ',') 591 r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" 592 r'([\w\[\].]*)' # Capture the name and possible [array] 593 r'\s*\)') # Make sure the trailing ")" is there 594 if r.match(arg): 595 param = r.group(1) 596 else: 597 self.emit_msg(ln, f"Invalid param: {arg}") 598 param = arg 599 dtype = arg.replace(param, '') 600 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 601 # 602 # The array-of-pointers case. Dig the parameter name out from the middle 603 # of the declaration. 604 # 605 elif KernRe(r'\(.+\)\s*\[').search(arg): 606 r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" 607 r'([\w.]*?)' # The actual pointer name 608 r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] 609 if r.match(arg): 610 param = r.group(1) 611 else: 612 self.emit_msg(ln, f"Invalid param: {arg}") 613 param = arg 614 dtype = arg.replace(param, '') 615 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 616 elif arg: 617 # 618 # Clean up extraneous spaces and split the string at commas; the first 619 # element of the resulting list will also include the type information. 620 # 621 arg = KernRe(r'\s*:\s*').sub(":", arg) 622 arg = KernRe(r'\s*\[').sub('[', arg) 623 args = KernRe(r'\s*,\s*').split(arg) 624 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 625 # 626 # args[0] has a string of "type a". If "a" includes an [array] 627 # declaration, we want to not be fooled by any white space inside 628 # the brackets, so detect and handle that case specially. 629 # 630 r = KernRe(r'^([^[\]]*\s+)(.*)$') 631 if r.match(args[0]): 632 args[0] = r.group(2) 633 dtype = r.group(1) 634 else: 635 # No space in args[0]; this seems wrong but preserves previous behavior 636 dtype = '' 637 638 bitfield_re = KernRe(r'(.*?):(\w+)') 639 for param in args: 640 # 641 # For pointers, shift the star(s) from the variable name to the 642 # type declaration. 643 # 644 r = KernRe(r'^(\*+)\s*(.*)') 645 if r.match(param): 646 self.push_parameter(ln, decl_type, r.group(2), 647 f"{dtype} {r.group(1)}", 648 arg, declaration_name) 649 # 650 # Perform a similar shift for bitfields. 651 # 652 elif bitfield_re.search(param): 653 if dtype != "": # Skip unnamed bit-fields 654 self.push_parameter(ln, decl_type, bitfield_re.group(1), 655 f"{dtype}:{bitfield_re.group(2)}", 656 arg, declaration_name) 657 else: 658 self.push_parameter(ln, decl_type, param, dtype, 659 arg, declaration_name) 660 661 def check_sections(self, ln, decl_name, decl_type): 662 """ 663 Check for errors inside sections, emitting warnings if not found 664 parameters are described. 665 """ 666 for section in self.entry.sections: 667 if section not in self.entry.parameterlist and \ 668 not known_sections.search(section): 669 if decl_type == 'function': 670 dname = f"{decl_type} parameter" 671 else: 672 dname = f"{decl_type} member" 673 self.emit_msg(ln, 674 f"Excess {dname} '{section}' description in '{decl_name}'") 675 676 def check_return_section(self, ln, declaration_name, return_type): 677 """ 678 If the function doesn't return void, warns about the lack of a 679 return description. 680 """ 681 682 if not self.config.wreturn: 683 return 684 685 # Ignore an empty return type (It's a macro) 686 # Ignore functions with a "void" return type (but not "void *") 687 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 688 return 689 690 if not self.entry.sections.get("Return", None): 691 self.emit_msg(ln, 692 f"No description found for return value of '{declaration_name}'") 693 694 def split_struct_proto(self, proto): 695 """ 696 Split apart a structure prototype; returns (struct|union, name, 697 members) or ``None``. 698 """ 699 700 type_pattern = r'(struct|union)' 701 qualifiers = [ 702 "__attribute__", 703 "__packed", 704 "__aligned", 705 "____cacheline_aligned_in_smp", 706 "____cacheline_aligned", 707 ] 708 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 709 710 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 711 if r.search(proto): 712 return (r.group(1), r.group(2), r.group(3)) 713 else: 714 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 715 if r.search(proto): 716 return (r.group(1), r.group(3), r.group(2)) 717 return None 718 719 def rewrite_struct_members(self, members): 720 """ 721 Process ``struct``/``union`` members from the most deeply nested 722 outward. 723 724 Rewrite the members of a ``struct`` or ``union`` for easier formatting 725 later on. Among other things, this function will turn a member like:: 726 727 struct { inner_members; } foo; 728 729 into:: 730 731 struct foo; inner_members; 732 """ 733 734 # 735 # The trick is in the ``^{`` below - it prevents a match of an outer 736 # ``struct``/``union`` until the inner one has been munged 737 # (removing the ``{`` in the process). 738 # 739 struct_members = KernRe(r'(struct|union)' # 0: declaration type 740 r'([^\{\};]+)' # 1: possible name 741 r'(\{)' 742 r'([^\{\}]*)' # 3: Contents of declaration 743 r'(\})' 744 r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration 745 tuples = struct_members.findall(members) 746 while tuples: 747 for t in tuples: 748 newmember = "" 749 oldmember = "".join(t) # Reconstruct the original formatting 750 dtype, name, lbr, content, rbr, rest, semi = t 751 # 752 # Pass through each field name, normalizing the form and formatting. 753 # 754 for s_id in rest.split(','): 755 s_id = s_id.strip() 756 newmember += f"{dtype} {s_id}; " 757 # 758 # Remove bitfield/array/pointer info, getting the bare name. 759 # 760 s_id = KernRe(r'[:\[].*').sub('', s_id) 761 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 762 # 763 # Pass through the members of this inner structure/union. 764 # 765 for arg in content.split(';'): 766 arg = arg.strip() 767 # 768 # Look for (type)(*name)(args) - pointer to function 769 # 770 r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') 771 if r.match(arg): 772 dtype, name, extra = r.group(1), r.group(2), r.group(3) 773 # Pointer-to-function 774 if not s_id: 775 # Anonymous struct/union 776 newmember += f"{dtype}{name}{extra}; " 777 else: 778 newmember += f"{dtype}{s_id}.{name}{extra}; " 779 # 780 # Otherwise a non-function member. 781 # 782 else: 783 # 784 # Remove bitmap and array portions and spaces around commas 785 # 786 arg = KernRe(r':\s*\d+\s*').sub('', arg) 787 arg = KernRe(r'\[.*\]').sub('', arg) 788 arg = KernRe(r'\s*,\s*').sub(',', arg) 789 # 790 # Look for a normal decl - "type name[,name...]" 791 # 792 r = KernRe(r'(.*)\s+([\S+,]+)') 793 if r.search(arg): 794 for name in r.group(2).split(','): 795 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) 796 if not s_id: 797 # Anonymous struct/union 798 newmember += f"{r.group(1)} {name}; " 799 else: 800 newmember += f"{r.group(1)} {s_id}.{name}; " 801 else: 802 newmember += f"{arg}; " 803 # 804 # At the end of the s_id loop, replace the original declaration with 805 # the munged version. 806 # 807 members = members.replace(oldmember, newmember) 808 # 809 # End of the tuple loop - search again and see if there are outer members 810 # that now turn up. 811 # 812 tuples = struct_members.findall(members) 813 return members 814 815 def format_struct_decl(self, declaration): 816 """ 817 Format the ``struct`` declaration into a standard form for inclusion 818 in the resulting docs. 819 """ 820 821 # 822 # Insert newlines, get rid of extra spaces. 823 # 824 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 825 declaration = KernRe(r'\}\s+;').sub('};', declaration) 826 # 827 # Format inline enums with each member on its own line. 828 # 829 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 830 while r.search(declaration): 831 declaration = r.sub(r'\1,\n\2', declaration) 832 # 833 # Now go through and supply the right number of tabs 834 # for each line. 835 # 836 def_args = declaration.split('\n') 837 level = 1 838 declaration = "" 839 for clause in def_args: 840 clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) 841 if clause: 842 if '}' in clause and level > 1: 843 level -= 1 844 if not clause.startswith('#'): 845 declaration += "\t" * level 846 declaration += "\t" + clause + "\n" 847 if "{" in clause and "}" not in clause: 848 level += 1 849 return declaration 850 851 852 def dump_struct(self, ln, proto): 853 """ 854 Store an entry for a ``struct`` or ``union`` 855 """ 856 # 857 # Do the basic parse to get the pieces of the declaration. 858 # 859 struct_parts = self.split_struct_proto(proto) 860 if not struct_parts: 861 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 862 return 863 decl_type, declaration_name, members = struct_parts 864 865 if self.entry.identifier != declaration_name: 866 self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " 867 f"Prototype was for {decl_type} {declaration_name} instead\n") 868 return 869 # 870 # Go through the list of members applying all of our transformations. 871 # 872 members = trim_private_members(members) 873 members = apply_transforms(struct_xforms, members) 874 875 nested = NestedMatch() 876 for search, sub in struct_nested_prefixes: 877 members = nested.sub(search, sub, members) 878 # 879 # Deal with embedded struct and union members, and drop enums entirely. 880 # 881 declaration = members 882 members = self.rewrite_struct_members(members) 883 members = re.sub(r'(\{[^\{\}]*\})', '', members) 884 # 885 # Output the result and we are done. 886 # 887 self.create_parameter_list(ln, decl_type, members, ';', 888 declaration_name) 889 self.check_sections(ln, declaration_name, decl_type) 890 self.output_declaration(decl_type, declaration_name, 891 definition=self.format_struct_decl(declaration), 892 purpose=self.entry.declaration_purpose) 893 894 def dump_enum(self, ln, proto): 895 """ 896 Store an ``enum`` inside self.entries array. 897 """ 898 # 899 # Strip preprocessor directives. Note that this depends on the 900 # trailing semicolon we added in process_proto_type(). 901 # 902 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 903 # 904 # Parse out the name and members of the enum. Typedef form first. 905 # 906 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 907 if r.search(proto): 908 declaration_name = r.group(2) 909 members = trim_private_members(r.group(1)) 910 # 911 # Failing that, look for a straight enum 912 # 913 else: 914 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 915 if r.match(proto): 916 declaration_name = r.group(1) 917 members = trim_private_members(r.group(2)) 918 # 919 # OK, this isn't going to work. 920 # 921 else: 922 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 923 return 924 # 925 # Make sure we found what we were expecting. 926 # 927 if self.entry.identifier != declaration_name: 928 if self.entry.identifier == "": 929 self.emit_msg(ln, 930 f"{proto}: wrong kernel-doc identifier on prototype") 931 else: 932 self.emit_msg(ln, 933 f"expecting prototype for enum {self.entry.identifier}. " 934 f"Prototype was for enum {declaration_name} instead") 935 return 936 937 if not declaration_name: 938 declaration_name = "(anonymous)" 939 # 940 # Parse out the name of each enum member, and verify that we 941 # have a description for it. 942 # 943 member_set = set() 944 members = KernRe(r'\([^;)]*\)').sub('', members) 945 for arg in members.split(','): 946 if not arg: 947 continue 948 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 949 self.entry.parameterlist.append(arg) 950 if arg not in self.entry.parameterdescs: 951 self.entry.parameterdescs[arg] = self.undescribed 952 self.emit_msg(ln, 953 f"Enum value '{arg}' not described in enum '{declaration_name}'") 954 member_set.add(arg) 955 # 956 # Ensure that every described member actually exists in the enum. 957 # 958 for k in self.entry.parameterdescs: 959 if k not in member_set: 960 self.emit_msg(ln, 961 f"Excess enum value '@{k}' description in '{declaration_name}'") 962 963 self.output_declaration('enum', declaration_name, 964 purpose=self.entry.declaration_purpose) 965 966 def dump_var(self, ln, proto): 967 """ 968 Store variables that are part of kAPI. 969 """ 970 VAR_ATTRIBS = [ 971 "extern", 972 ] 973 OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?" 974 975 sub_prefixes = [ 976 (KernRe(r"__read_mostly"), ""), 977 (KernRe(r"__ro_after_init"), ""), 978 (KernRe(r"(?://.*)$"), ""), 979 (KernRe(r"(?:/\*.*\*/)"), ""), 980 (KernRe(r";$"), ""), 981 (KernRe(r"=.*"), ""), 982 ] 983 984 # 985 # Store the full prototype before modifying it 986 # 987 full_proto = proto 988 declaration_name = None 989 990 # 991 # Handle macro definitions 992 # 993 macro_prefixes = [ 994 KernRe(r"DEFINE_[\w_]+\s*\(([\w_]+)\)"), 995 ] 996 997 for r in macro_prefixes: 998 match = r.search(proto) 999 if match: 1000 declaration_name = match.group(1) 1001 break 1002 1003 # 1004 # Drop comments and macros to have a pure C prototype 1005 # 1006 if not declaration_name: 1007 for r, sub in sub_prefixes: 1008 proto = r.sub(sub, proto) 1009 1010 proto = proto.rstrip() 1011 1012 # 1013 # Variable name is at the end of the declaration 1014 # 1015 1016 default_val = None 1017 1018 r= KernRe(OPTIONAL_VAR_ATTR + r"\w.*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") 1019 if r.match(proto): 1020 if not declaration_name: 1021 declaration_name = r.group(1) 1022 1023 default_val = r.group(2) 1024 else: 1025 r= KernRe(OPTIONAL_VAR_ATTR + r"(?:\w.*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") 1026 if r.match(proto): 1027 default_val = r.group(1) 1028 1029 if not declaration_name: 1030 self.emit_msg(ln,f"{proto}: can't parse variable") 1031 return 1032 1033 if default_val: 1034 default_val = default_val.lstrip("=").strip() 1035 1036 self.output_declaration("var", declaration_name, 1037 full_proto=full_proto, 1038 default_val=default_val, 1039 purpose=self.entry.declaration_purpose) 1040 1041 def dump_declaration(self, ln, prototype): 1042 """ 1043 Store a data declaration inside self.entries array. 1044 """ 1045 1046 if self.entry.decl_type == "enum": 1047 self.dump_enum(ln, prototype) 1048 elif self.entry.decl_type == "typedef": 1049 self.dump_typedef(ln, prototype) 1050 elif self.entry.decl_type in ["union", "struct"]: 1051 self.dump_struct(ln, prototype) 1052 elif self.entry.decl_type == "var": 1053 self.dump_var(ln, prototype) 1054 else: 1055 # This would be a bug 1056 self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') 1057 1058 def dump_function(self, ln, prototype): 1059 """ 1060 Store a function or function macro inside self.entries array. 1061 """ 1062 1063 found = func_macro = False 1064 return_type = '' 1065 decl_type = 'function' 1066 # 1067 # Apply the initial transformations. 1068 # 1069 prototype = apply_transforms(function_xforms, prototype) 1070 # 1071 # If we have a macro, remove the "#define" at the front. 1072 # 1073 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 1074 if new_proto != prototype: 1075 prototype = new_proto 1076 # 1077 # Dispense with the simple "#define A B" case here; the key 1078 # is the space after the name of the symbol being defined. 1079 # NOTE that the seemingly misnamed "func_macro" indicates a 1080 # macro *without* arguments. 1081 # 1082 r = KernRe(r'^(\w+)\s+') 1083 if r.search(prototype): 1084 return_type = '' 1085 declaration_name = r.group(1) 1086 func_macro = True 1087 found = True 1088 1089 # Yes, this truly is vile. We are looking for: 1090 # 1. Return type (may be nothing if we're looking at a macro) 1091 # 2. Function name 1092 # 3. Function parameters. 1093 # 1094 # All the while we have to watch out for function pointer parameters 1095 # (which IIRC is what the two sections are for), C types (these 1096 # regexps don't even start to express all the possibilities), and 1097 # so on. 1098 # 1099 # If you mess with these regexps, it's a good idea to check that 1100 # the following functions' documentation still comes out right: 1101 # - parport_register_device (function pointer parameters) 1102 # - atomic_set (macro) 1103 # - pci_match_device, __copy_to_user (long return type) 1104 1105 name = r'\w+' 1106 type1 = r'(?:[\w\s]+)?' 1107 type2 = r'(?:[\w\s]+\*+)+' 1108 # 1109 # Attempt to match first on (args) with no internal parentheses; this 1110 # lets us easily filter out __acquires() and other post-args stuff. If 1111 # that fails, just grab the rest of the line to the last closing 1112 # parenthesis. 1113 # 1114 proto_args = r'\(([^\(]*|.*)\)' 1115 # 1116 # (Except for the simple macro case) attempt to split up the prototype 1117 # in the various ways we understand. 1118 # 1119 if not found: 1120 patterns = [ 1121 rf'^()({name})\s*{proto_args}', 1122 rf'^({type1})\s+({name})\s*{proto_args}', 1123 rf'^({type2})\s*({name})\s*{proto_args}', 1124 ] 1125 1126 for p in patterns: 1127 r = KernRe(p) 1128 if r.match(prototype): 1129 return_type = r.group(1) 1130 declaration_name = r.group(2) 1131 args = r.group(3) 1132 self.create_parameter_list(ln, decl_type, args, ',', 1133 declaration_name) 1134 found = True 1135 break 1136 # 1137 # Parsing done; make sure that things are as we expect. 1138 # 1139 if not found: 1140 self.emit_msg(ln, 1141 f"cannot understand function prototype: '{prototype}'") 1142 return 1143 if self.entry.identifier != declaration_name: 1144 self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " 1145 f"Prototype was for {declaration_name}() instead") 1146 return 1147 self.check_sections(ln, declaration_name, "function") 1148 self.check_return_section(ln, declaration_name, return_type) 1149 # 1150 # Store the result. 1151 # 1152 self.output_declaration(decl_type, declaration_name, 1153 typedef=('typedef' in return_type), 1154 functiontype=return_type, 1155 purpose=self.entry.declaration_purpose, 1156 func_macro=func_macro) 1157 1158 1159 def dump_typedef(self, ln, proto): 1160 """ 1161 Store a ``typedef`` inside self.entries array. 1162 """ 1163 # 1164 # We start by looking for function typedefs. 1165 # 1166 typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1167 typedef_ident = r'\*?\s*(\w\S+)\s*' 1168 typedef_args = r'\s*\((.*)\);' 1169 1170 typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1171 typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) 1172 1173 # Parse function typedef prototypes 1174 for r in [typedef1, typedef2]: 1175 if not r.match(proto): 1176 continue 1177 1178 return_type = r.group(1).strip() 1179 declaration_name = r.group(2) 1180 args = r.group(3) 1181 1182 if self.entry.identifier != declaration_name: 1183 self.emit_msg(ln, 1184 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1185 return 1186 1187 self.create_parameter_list(ln, 'function', args, ',', declaration_name) 1188 1189 self.output_declaration('function', declaration_name, 1190 typedef=True, 1191 functiontype=return_type, 1192 purpose=self.entry.declaration_purpose) 1193 return 1194 # 1195 # Not a function, try to parse a simple typedef. 1196 # 1197 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1198 if r.match(proto): 1199 declaration_name = r.group(1) 1200 1201 if self.entry.identifier != declaration_name: 1202 self.emit_msg(ln, 1203 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1204 return 1205 1206 self.output_declaration('typedef', declaration_name, 1207 purpose=self.entry.declaration_purpose) 1208 return 1209 1210 self.emit_msg(ln, "error: Cannot parse typedef!") 1211 1212 @staticmethod 1213 def process_export(function_set, line): 1214 """ 1215 process ``EXPORT_SYMBOL*`` tags 1216 1217 This method doesn't use any variable from the class, so declare it 1218 with a staticmethod decorator. 1219 """ 1220 1221 # We support documenting some exported symbols with different 1222 # names. A horrible hack. 1223 suffixes = [ '_noprof' ] 1224 1225 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1226 # multiple export lines would violate Kernel coding style. 1227 1228 if export_symbol.search(line): 1229 symbol = export_symbol.group(2) 1230 elif export_symbol_ns.search(line): 1231 symbol = export_symbol_ns.group(2) 1232 else: 1233 return False 1234 # 1235 # Found an export, trim out any special suffixes 1236 # 1237 for suffix in suffixes: 1238 # Be backward compatible with Python < 3.9 1239 if symbol.endswith(suffix): 1240 symbol = symbol[:-len(suffix)] 1241 function_set.add(symbol) 1242 return True 1243 1244 def process_normal(self, ln, line): 1245 """ 1246 STATE_NORMAL: looking for the ``/**`` to begin everything. 1247 """ 1248 1249 if not doc_start.match(line): 1250 return 1251 1252 # start a new entry 1253 self.reset_state(ln) 1254 1255 # next line is always the function name 1256 self.state = state.NAME 1257 1258 def process_name(self, ln, line): 1259 """ 1260 STATE_NAME: Looking for the "name - description" line 1261 """ 1262 # 1263 # Check for a DOC: block and handle them specially. 1264 # 1265 if doc_block.search(line): 1266 1267 if not doc_block.group(1): 1268 self.entry.begin_section(ln, "Introduction") 1269 else: 1270 self.entry.begin_section(ln, doc_block.group(1)) 1271 1272 self.entry.identifier = self.entry.section 1273 self.state = state.DOCBLOCK 1274 # 1275 # Otherwise we're looking for a normal kerneldoc declaration line. 1276 # 1277 elif doc_decl.search(line): 1278 self.entry.identifier = doc_decl.group(1) 1279 1280 # Test for data declaration 1281 if doc_begin_data.search(line): 1282 self.entry.decl_type = doc_begin_data.group(1) 1283 self.entry.identifier = doc_begin_data.group(2) 1284 # 1285 # Look for a function description 1286 # 1287 elif doc_begin_func.search(line): 1288 self.entry.identifier = doc_begin_func.group(1) 1289 self.entry.decl_type = "function" 1290 # 1291 # We struck out. 1292 # 1293 else: 1294 self.emit_msg(ln, 1295 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}") 1296 self.state = state.NORMAL 1297 return 1298 # 1299 # OK, set up for a new kerneldoc entry. 1300 # 1301 self.state = state.BODY 1302 self.entry.identifier = self.entry.identifier.strip(" ") 1303 # if there's no @param blocks need to set up default section here 1304 self.entry.begin_section(ln + 1) 1305 # 1306 # Find the description portion, which *should* be there but 1307 # isn't always. 1308 # (We should be able to capture this from the previous parsing - someday) 1309 # 1310 r = KernRe("[-:](.*)") 1311 if r.search(line): 1312 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1313 self.state = state.DECLARATION 1314 else: 1315 self.entry.declaration_purpose = "" 1316 1317 if not self.entry.declaration_purpose and self.config.wshort_desc: 1318 self.emit_msg(ln, 1319 f"missing initial short description on line:\n{line}") 1320 1321 if not self.entry.identifier and self.entry.decl_type != "enum": 1322 self.emit_msg(ln, 1323 f"wrong kernel-doc identifier on line:\n{line}") 1324 self.state = state.NORMAL 1325 1326 if self.config.verbose: 1327 self.emit_msg(ln, 1328 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1329 warning=False) 1330 # 1331 # Failed to find an identifier. Emit a warning 1332 # 1333 else: 1334 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1335 1336 def is_new_section(self, ln, line): 1337 """ 1338 Helper function to determine if a new section is being started. 1339 """ 1340 if doc_sect.search(line): 1341 self.state = state.BODY 1342 # 1343 # Pick out the name of our new section, tweaking it if need be. 1344 # 1345 newsection = doc_sect.group(1) 1346 if newsection.lower() == 'description': 1347 newsection = 'Description' 1348 elif newsection.lower() == 'context': 1349 newsection = 'Context' 1350 self.state = state.SPECIAL_SECTION 1351 elif newsection.lower() in ["@return", "@returns", 1352 "return", "returns"]: 1353 newsection = "Return" 1354 self.state = state.SPECIAL_SECTION 1355 elif newsection[0] == '@': 1356 self.state = state.SPECIAL_SECTION 1357 # 1358 # Initialize the contents, and get the new section going. 1359 # 1360 newcontents = doc_sect.group(2) 1361 if not newcontents: 1362 newcontents = "" 1363 self.dump_section() 1364 self.entry.begin_section(ln, newsection) 1365 self.entry.leading_space = None 1366 1367 self.entry.add_text(newcontents.lstrip()) 1368 return True 1369 return False 1370 1371 def is_comment_end(self, ln, line): 1372 """ 1373 Helper function to detect (and effect) the end of a kerneldoc comment. 1374 """ 1375 if doc_end.search(line): 1376 self.dump_section() 1377 1378 # Look for doc_com + <text> + doc_end: 1379 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') 1380 if r.match(line): 1381 self.emit_msg(ln, f"suspicious ending line: {line}") 1382 1383 self.entry.prototype = "" 1384 self.entry.new_start_line = ln + 1 1385 1386 self.state = state.PROTO 1387 return True 1388 return False 1389 1390 1391 def process_decl(self, ln, line): 1392 """ 1393 STATE_DECLARATION: We've seen the beginning of a declaration. 1394 """ 1395 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1396 return 1397 # 1398 # Look for anything with the " * " line beginning. 1399 # 1400 if doc_content.search(line): 1401 cont = doc_content.group(1) 1402 # 1403 # A blank line means that we have moved out of the declaration 1404 # part of the comment (without any "special section" parameter 1405 # descriptions). 1406 # 1407 if cont == "": 1408 self.state = state.BODY 1409 # 1410 # Otherwise we have more of the declaration section to soak up. 1411 # 1412 else: 1413 self.entry.declaration_purpose = \ 1414 trim_whitespace(self.entry.declaration_purpose + ' ' + cont) 1415 else: 1416 # Unknown line, ignore 1417 self.emit_msg(ln, f"bad line: {line}") 1418 1419 1420 def process_special(self, ln, line): 1421 """ 1422 STATE_SPECIAL_SECTION: a section ending with a blank line. 1423 """ 1424 # 1425 # If we have hit a blank line (only the " * " marker), then this 1426 # section is done. 1427 # 1428 if KernRe(r"\s*\*\s*$").match(line): 1429 self.entry.begin_section(ln, dump = True) 1430 self.state = state.BODY 1431 return 1432 # 1433 # Not a blank line, look for the other ways to end the section. 1434 # 1435 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1436 return 1437 # 1438 # OK, we should have a continuation of the text for this section. 1439 # 1440 if doc_content.search(line): 1441 cont = doc_content.group(1) 1442 # 1443 # If the lines of text after the first in a special section have 1444 # leading white space, we need to trim it out or Sphinx will get 1445 # confused. For the second line (the None case), see what we 1446 # find there and remember it. 1447 # 1448 if self.entry.leading_space is None: 1449 r = KernRe(r'^(\s+)') 1450 if r.match(cont): 1451 self.entry.leading_space = len(r.group(1)) 1452 else: 1453 self.entry.leading_space = 0 1454 # 1455 # Otherwise, before trimming any leading chars, be *sure* 1456 # that they are white space. We should maybe warn if this 1457 # isn't the case. 1458 # 1459 for i in range(0, self.entry.leading_space): 1460 if cont[i] != " ": 1461 self.entry.leading_space = i 1462 break 1463 # 1464 # Add the trimmed result to the section and we're done. 1465 # 1466 self.entry.add_text(cont[self.entry.leading_space:]) 1467 else: 1468 # Unknown line, ignore 1469 self.emit_msg(ln, f"bad line: {line}") 1470 1471 def process_body(self, ln, line): 1472 """ 1473 STATE_BODY: the bulk of a kerneldoc comment. 1474 """ 1475 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1476 return 1477 1478 if doc_content.search(line): 1479 cont = doc_content.group(1) 1480 self.entry.add_text(cont) 1481 else: 1482 # Unknown line, ignore 1483 self.emit_msg(ln, f"bad line: {line}") 1484 1485 def process_inline_name(self, ln, line): 1486 """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" 1487 1488 if doc_inline_sect.search(line): 1489 self.entry.begin_section(ln, doc_inline_sect.group(1)) 1490 self.entry.add_text(doc_inline_sect.group(2).lstrip()) 1491 self.state = state.INLINE_TEXT 1492 elif doc_inline_end.search(line): 1493 self.dump_section() 1494 self.state = state.PROTO 1495 elif doc_content.search(line): 1496 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") 1497 self.state = state.PROTO 1498 # else ... ?? 1499 1500 def process_inline_text(self, ln, line): 1501 """STATE_INLINE_TEXT: docbook comments within a prototype.""" 1502 1503 if doc_inline_end.search(line): 1504 self.dump_section() 1505 self.state = state.PROTO 1506 elif doc_content.search(line): 1507 self.entry.add_text(doc_content.group(1)) 1508 # else ... ?? 1509 1510 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1511 """ 1512 Handle syscall definitions. 1513 """ 1514 1515 is_void = False 1516 1517 # Strip newlines/CR's 1518 proto = re.sub(r'[\r\n]+', ' ', proto) 1519 1520 # Check if it's a SYSCALL_DEFINE0 1521 if 'SYSCALL_DEFINE0' in proto: 1522 is_void = True 1523 1524 # Replace SYSCALL_DEFINE with correct return type & function name 1525 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1526 1527 r = KernRe(r'long\s+(sys_.*?),') 1528 if r.search(proto): 1529 proto = KernRe(',').sub('(', proto, count=1) 1530 elif is_void: 1531 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1532 1533 # Now delete all of the odd-numbered commas in the proto 1534 # so that argument types & names don't have a comma between them 1535 count = 0 1536 length = len(proto) 1537 1538 if is_void: 1539 length = 0 # skip the loop if is_void 1540 1541 for ix in range(length): 1542 if proto[ix] == ',': 1543 count += 1 1544 if count % 2 == 1: 1545 proto = proto[:ix] + ' ' + proto[ix + 1:] 1546 1547 return proto 1548 1549 def tracepoint_munge(self, ln, proto): 1550 """ 1551 Handle tracepoint definitions. 1552 """ 1553 1554 tracepointname = None 1555 tracepointargs = None 1556 1557 # Match tracepoint name based on different patterns 1558 r = KernRe(r'TRACE_EVENT\((.*?),') 1559 if r.search(proto): 1560 tracepointname = r.group(1) 1561 1562 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1563 if r.search(proto): 1564 tracepointname = r.group(1) 1565 1566 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1567 if r.search(proto): 1568 tracepointname = r.group(2) 1569 1570 if tracepointname: 1571 tracepointname = tracepointname.lstrip() 1572 1573 r = KernRe(r'TP_PROTO\((.*?)\)') 1574 if r.search(proto): 1575 tracepointargs = r.group(1) 1576 1577 if not tracepointname or not tracepointargs: 1578 self.emit_msg(ln, 1579 f"Unrecognized tracepoint format:\n{proto}\n") 1580 else: 1581 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1582 self.entry.identifier = f"trace_{self.entry.identifier}" 1583 1584 return proto 1585 1586 def process_proto_function(self, ln, line): 1587 """Ancillary routine to process a function prototype.""" 1588 1589 # strip C99-style comments to end of line 1590 line = KernRe(r"//.*$", re.S).sub('', line) 1591 # 1592 # Soak up the line's worth of prototype text, stopping at { or ; if present. 1593 # 1594 if KernRe(r'\s*#\s*define').match(line): 1595 self.entry.prototype = line 1596 elif not line.startswith('#'): # skip other preprocessor stuff 1597 r = KernRe(r'([^\{]*)') 1598 if r.match(line): 1599 self.entry.prototype += r.group(1) + " " 1600 # 1601 # If we now have the whole prototype, clean it up and declare victory. 1602 # 1603 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1604 # strip comments and surrounding spaces 1605 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() 1606 # 1607 # Handle self.entry.prototypes for function pointers like: 1608 # int (*pcs_config)(struct foo) 1609 # by turning it into 1610 # int pcs_config(struct foo) 1611 # 1612 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1613 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1614 # 1615 # Handle special declaration syntaxes 1616 # 1617 if 'SYSCALL_DEFINE' in self.entry.prototype: 1618 self.entry.prototype = self.syscall_munge(ln, 1619 self.entry.prototype) 1620 else: 1621 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1622 if r.search(self.entry.prototype): 1623 self.entry.prototype = self.tracepoint_munge(ln, 1624 self.entry.prototype) 1625 # 1626 # ... and we're done 1627 # 1628 self.dump_function(ln, self.entry.prototype) 1629 self.reset_state(ln) 1630 1631 def process_proto_type(self, ln, line): 1632 """ 1633 Ancillary routine to process a type. 1634 """ 1635 1636 # Strip C99-style comments and surrounding whitespace 1637 line = KernRe(r"//.*$", re.S).sub('', line).strip() 1638 if not line: 1639 return # nothing to see here 1640 1641 # To distinguish preprocessor directive from regular declaration later. 1642 if line.startswith('#'): 1643 line += ";" 1644 # 1645 # Split the declaration on any of { } or ;, and accumulate pieces 1646 # until we hit a semicolon while not inside {brackets} 1647 # 1648 r = KernRe(r'(.*?)([{};])') 1649 for chunk in r.split(line): 1650 if chunk: # Ignore empty matches 1651 self.entry.prototype += chunk 1652 # 1653 # This cries out for a match statement ... someday after we can 1654 # drop Python 3.9 ... 1655 # 1656 if chunk == '{': 1657 self.entry.brcount += 1 1658 elif chunk == '}': 1659 self.entry.brcount -= 1 1660 elif chunk == ';' and self.entry.brcount <= 0: 1661 self.dump_declaration(ln, self.entry.prototype) 1662 self.reset_state(ln) 1663 return 1664 # 1665 # We hit the end of the line while still in the declaration; put 1666 # in a space to represent the newline. 1667 # 1668 self.entry.prototype += ' ' 1669 1670 def process_proto(self, ln, line): 1671 """STATE_PROTO: reading a function/whatever prototype.""" 1672 1673 if doc_inline_oneline.search(line): 1674 self.entry.begin_section(ln, doc_inline_oneline.group(1)) 1675 self.entry.add_text(doc_inline_oneline.group(2)) 1676 self.dump_section() 1677 1678 elif doc_inline_start.search(line): 1679 self.state = state.INLINE_NAME 1680 1681 elif self.entry.decl_type == 'function': 1682 self.process_proto_function(ln, line) 1683 1684 else: 1685 self.process_proto_type(ln, line) 1686 1687 def process_docblock(self, ln, line): 1688 """STATE_DOCBLOCK: within a ``DOC:`` block.""" 1689 1690 if doc_end.search(line): 1691 self.dump_section() 1692 self.output_declaration("doc", self.entry.identifier) 1693 self.reset_state(ln) 1694 1695 elif doc_content.search(line): 1696 self.entry.add_text(doc_content.group(1)) 1697 1698 def parse_export(self): 1699 """ 1700 Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file. 1701 """ 1702 1703 export_table = set() 1704 1705 try: 1706 with open(self.fname, "r", encoding="utf8", 1707 errors="backslashreplace") as fp: 1708 1709 for line in fp: 1710 self.process_export(export_table, line) 1711 1712 except IOError: 1713 return None 1714 1715 return export_table 1716 1717 #: The state/action table telling us which function to invoke in each state. 1718 state_actions = { 1719 state.NORMAL: process_normal, 1720 state.NAME: process_name, 1721 state.BODY: process_body, 1722 state.DECLARATION: process_decl, 1723 state.SPECIAL_SECTION: process_special, 1724 state.INLINE_NAME: process_inline_name, 1725 state.INLINE_TEXT: process_inline_text, 1726 state.PROTO: process_proto, 1727 state.DOCBLOCK: process_docblock, 1728 } 1729 1730 def parse_kdoc(self): 1731 """ 1732 Open and process each line of a C source file. 1733 The parsing is controlled via a state machine, and the line is passed 1734 to a different process function depending on the state. The process 1735 function may update the state as needed. 1736 1737 Besides parsing kernel-doc tags, it also parses export symbols. 1738 """ 1739 1740 prev = "" 1741 prev_ln = None 1742 export_table = set() 1743 1744 try: 1745 with open(self.fname, "r", encoding="utf8", 1746 errors="backslashreplace") as fp: 1747 for ln, line in enumerate(fp): 1748 1749 line = line.expandtabs().strip("\n") 1750 1751 # Group continuation lines on prototypes 1752 if self.state == state.PROTO: 1753 if line.endswith("\\"): 1754 prev += line.rstrip("\\") 1755 if not prev_ln: 1756 prev_ln = ln 1757 continue 1758 1759 if prev: 1760 ln = prev_ln 1761 line = prev + line 1762 prev = "" 1763 prev_ln = None 1764 1765 self.config.log.debug("%d %s: %s", 1766 ln, state.name[self.state], 1767 line) 1768 1769 # This is an optimization over the original script. 1770 # There, when export_file was used for the same file, 1771 # it was read twice. Here, we use the already-existing 1772 # loop to parse exported symbols as well. 1773 # 1774 if (self.state != state.NORMAL) or \ 1775 not self.process_export(export_table, line): 1776 # Hand this line to the appropriate state handler 1777 self.state_actions[self.state](self, ln, line) 1778 1779 self.emit_unused_warnings() 1780 1781 except OSError: 1782 self.config.log.error(f"Error: Cannot open file {self.fname}") 1783 1784 return export_table, self.entries 1785