1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8Classes and functions related to reading a C language source or header FILE 9and extract embedded documentation comments from it. 10""" 11 12import sys 13import re 14from pprint import pformat 15 16from kdoc.kdoc_re import NestedMatch, KernRe 17from kdoc.kdoc_item import KdocItem 18 19# 20# Regular expressions used to parse kernel-doc markups at KernelDoc class. 21# 22# Let's declare them in lowercase outside any class to make it easier to 23# convert from the Perl script. 24# 25# As those are evaluated at the beginning, no need to cache them 26# 27 28# Allow whitespace at end of comment start. 29doc_start = KernRe(r'^/\*\*\s*$', cache=False) 30 31doc_end = KernRe(r'\*/', cache=False) 32doc_com = KernRe(r'\s*\*\s*', cache=False) 33doc_com_body = KernRe(r'\s*\* ?', cache=False) 34doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 35 36# @params and a strictly limited set of supported section names 37# Specifically: 38# Match @word: 39# @...: 40# @{section-name}: 41# while trying to not match literal block starts like "example::" 42# 43known_section_names = 'description|context|returns?|notes?|examples?' 44known_sections = KernRe(known_section_names, flags = re.I) 45doc_sect = doc_com + \ 46 KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', 47 flags=re.I, cache=False) 48 49doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 50doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 51doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 52doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 53doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@\s*[\w][\w\.]*\s*):\s*(.*)\s*\*/\s*$', cache=False) 54 55export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 56export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 57 58type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 59 60# 61# Tests for the beginning of a kerneldoc block in its various forms. 62# 63doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 64doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef|var)\b\s*(\w*)", cache = False) 65doc_begin_func = KernRe(str(doc_com) + # initial " * ' 66 r"(?:\w+\s*\*\s*)?" + # type (not captured) 67 r'(?:define\s+)?' + # possible "define" (not captured) 68 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 69 r'(?:[-:].*)?$', # description (not captured) 70 cache = False) 71 72# 73# Here begins a long set of transformations to turn structure member prefixes 74# and macro invocations into something we can parse and generate kdoc for. 75# 76struct_args_pattern = r'([^,)]+)' 77 78struct_xforms = [ 79 # Strip attributes 80 (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), 81 (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), 82 (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), 83 (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), 84 (KernRe(r'\s*__packed\s*', re.S), ' '), 85 (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), 86 (KernRe(r'\s*__private', re.S), ' '), 87 (KernRe(r'\s*__rcu', re.S), ' '), 88 (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), 89 (KernRe(r'\s*____cacheline_aligned', re.S), ' '), 90 (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), 91 # 92 # Unwrap struct_group macros based on this definition: 93 # __struct_group(TAG, NAME, ATTRS, MEMBERS...) 94 # which has variants like: struct_group(NAME, MEMBERS...) 95 # Only MEMBERS arguments require documentation. 96 # 97 # Parsing them happens on two steps: 98 # 99 # 1. drop struct group arguments that aren't at MEMBERS, 100 # storing them as STRUCT_GROUP(MEMBERS) 101 # 102 # 2. remove STRUCT_GROUP() ancillary macro. 103 # 104 # The original logic used to remove STRUCT_GROUP() using an 105 # advanced regex: 106 # 107 # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; 108 # 109 # with two patterns that are incompatible with 110 # Python re module, as it has: 111 # 112 # - a recursive pattern: (?1) 113 # - an atomic grouping: (?>...) 114 # 115 # I tried a simpler version: but it didn't work either: 116 # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; 117 # 118 # As it doesn't properly match the end parenthesis on some cases. 119 # 120 # So, a better solution was crafted: there's now a NestedMatch 121 # class that ensures that delimiters after a search are properly 122 # matched. So, the implementation to drop STRUCT_GROUP() will be 123 # handled in separate. 124 # 125 (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), 126 (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), 127 (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), 128 (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), 129 # 130 # Replace macros 131 # 132 # TODO: use NestedMatch for FOO($1, $2, ...) matches 133 # 134 # it is better to also move those to the NestedMatch logic, 135 # to ensure that parentheses will be properly matched. 136 # 137 (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), 138 r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), 139 (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), 140 r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), 141 (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', 142 re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), 143 (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', 144 re.S), r'unsigned long \1[1 << ((\2) - 1)]'), 145 (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + 146 r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), 147 (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + 148 struct_args_pattern + r'\)', re.S), r'\2 *\1'), 149 (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + 150 struct_args_pattern + r'\)', re.S), r'\1 \2[]'), 151 (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), 152 (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), 153] 154# 155# Regexes here are guaranteed to have the end delimiter matching 156# the start delimiter. Yet, right now, only one replace group 157# is allowed. 158# 159struct_nested_prefixes = [ 160 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), 161] 162 163# 164# Transforms for function prototypes 165# 166function_xforms = [ 167 (KernRe(r"^static +"), ""), 168 (KernRe(r"^extern +"), ""), 169 (KernRe(r"^asmlinkage +"), ""), 170 (KernRe(r"^inline +"), ""), 171 (KernRe(r"^__inline__ +"), ""), 172 (KernRe(r"^__inline +"), ""), 173 (KernRe(r"^__always_inline +"), ""), 174 (KernRe(r"^noinline +"), ""), 175 (KernRe(r"^__FORTIFY_INLINE +"), ""), 176 (KernRe(r"__init +"), ""), 177 (KernRe(r"__init_or_module +"), ""), 178 (KernRe(r"__exit +"), ""), 179 (KernRe(r"__deprecated +"), ""), 180 (KernRe(r"__flatten +"), ""), 181 (KernRe(r"__meminit +"), ""), 182 (KernRe(r"__must_check +"), ""), 183 (KernRe(r"__weak +"), ""), 184 (KernRe(r"__sched +"), ""), 185 (KernRe(r"_noprof"), ""), 186 (KernRe(r"__always_unused *"), ""), 187 (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), 188 (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), 189 (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), 190 (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), 191 (KernRe(r"__attribute_const__ +"), ""), 192 (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), 193] 194 195# 196# Transforms for variable prototypes 197# 198var_xforms = [ 199 (KernRe(r"__read_mostly"), ""), 200 (KernRe(r"__ro_after_init"), ""), 201 (KernRe(r"(?://.*)$"), ""), 202 (KernRe(r"(?:/\*.*\*/)"), ""), 203 (KernRe(r";$"), ""), 204 (KernRe(r"=.*"), ""), 205] 206 207# 208# Ancillary functions 209# 210 211def apply_transforms(xforms, text): 212 """ 213 Apply a set of transforms to a block of text. 214 """ 215 for search, subst in xforms: 216 text = search.sub(subst, text) 217 return text 218 219multi_space = KernRe(r'\s\s+') 220def trim_whitespace(s): 221 """ 222 A little helper to get rid of excess white space. 223 """ 224 return multi_space.sub(' ', s.strip()) 225 226def trim_private_members(text): 227 """ 228 Remove ``struct``/``enum`` members that have been marked "private". 229 """ 230 # First look for a "public:" block that ends a private region, then 231 # handle the "private until the end" case. 232 # 233 text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) 234 text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) 235 # 236 # We needed the comments to do the above, but now we can take them out. 237 # 238 return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() 239 240class state: 241 """ 242 States used by the parser's state machine. 243 """ 244 245 # Parser states 246 NORMAL = 0 #: Normal code. 247 NAME = 1 #: Looking for function name. 248 DECLARATION = 2 #: We have seen a declaration which might not be done. 249 BODY = 3 #: The body of the comment. 250 SPECIAL_SECTION = 4 #: Doc section ending with a blank line. 251 PROTO = 5 #: Scanning prototype. 252 DOCBLOCK = 6 #: Documentation block. 253 INLINE_NAME = 7 #: Gathering doc outside main block. 254 INLINE_TEXT = 8 #: Reading the body of inline docs. 255 256 #: Names for each parser state. 257 name = [ 258 "NORMAL", 259 "NAME", 260 "DECLARATION", 261 "BODY", 262 "SPECIAL_SECTION", 263 "PROTO", 264 "DOCBLOCK", 265 "INLINE_NAME", 266 "INLINE_TEXT", 267 ] 268 269 270SECTION_DEFAULT = "Description" #: Default section. 271 272class KernelEntry: 273 """ 274 Encapsulates a Kernel documentation entry. 275 """ 276 277 def __init__(self, config, fname, ln): 278 self.config = config 279 self.fname = fname 280 281 self._contents = [] 282 self.prototype = "" 283 284 self.warnings = [] 285 286 self.parameterlist = [] 287 self.parameterdescs = {} 288 self.parametertypes = {} 289 self.parameterdesc_start_lines = {} 290 291 self.section_start_lines = {} 292 self.sections = {} 293 294 self.anon_struct_union = False 295 296 self.leading_space = None 297 298 self.fname = fname 299 300 # State flags 301 self.brcount = 0 302 self.declaration_start_line = ln + 1 303 304 # 305 # Management of section contents 306 # 307 def add_text(self, text): 308 """Add a new text to the entry contents list.""" 309 self._contents.append(text) 310 311 def contents(self): 312 """Returns a string with all content texts that were added.""" 313 return '\n'.join(self._contents) + '\n' 314 315 # TODO: rename to emit_message after removal of kernel-doc.pl 316 def emit_msg(self, ln, msg, *, warning=True): 317 """Emit a message.""" 318 319 log_msg = f"{self.fname}:{ln} {msg}" 320 321 if not warning: 322 self.config.log.info(log_msg) 323 return 324 325 # Delegate warning output to output logic, as this way it 326 # will report warnings/info only for symbols that are output 327 328 self.warnings.append(log_msg) 329 return 330 331 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): 332 """ 333 Begin a new section. 334 """ 335 if dump: 336 self.dump_section(start_new = True) 337 self.section = title 338 self.new_start_line = line_no 339 340 def dump_section(self, start_new=True): 341 """ 342 Dumps section contents to arrays/hashes intended for that purpose. 343 """ 344 # 345 # If we have accumulated no contents in the default ("description") 346 # section, don't bother. 347 # 348 if self.section == SECTION_DEFAULT and not self._contents: 349 return 350 name = self.section 351 contents = self.contents() 352 353 if type_param.match(name): 354 name = type_param.group(1) 355 356 self.parameterdescs[name] = contents 357 self.parameterdesc_start_lines[name] = self.new_start_line 358 359 self.new_start_line = 0 360 361 else: 362 if name in self.sections and self.sections[name] != "": 363 # Only warn on user-specified duplicate section names 364 if name != SECTION_DEFAULT: 365 self.emit_msg(self.new_start_line, 366 f"duplicate section name '{name}'") 367 # Treat as a new paragraph - add a blank line 368 self.sections[name] += '\n' + contents 369 else: 370 self.sections[name] = contents 371 self.section_start_lines[name] = self.new_start_line 372 self.new_start_line = 0 373 374# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 375 376 if start_new: 377 self.section = SECTION_DEFAULT 378 self._contents = [] 379 380python_warning = False 381 382class KernelDoc: 383 """ 384 Read a C language source or header FILE and extract embedded 385 documentation comments. 386 """ 387 388 #: Name of context section. 389 section_context = "Context" 390 391 #: Name of return section. 392 section_return = "Return" 393 394 #: String to write when a parameter is not described. 395 undescribed = "-- undescribed --" 396 397 def __init__(self, config, fname): 398 """Initialize internal variables""" 399 400 self.fname = fname 401 self.config = config 402 403 # Initial state for the state machines 404 self.state = state.NORMAL 405 406 # Store entry currently being processed 407 self.entry = None 408 409 # Place all potential outputs into an array 410 self.entries = [] 411 412 # 413 # We need Python 3.7 for its "dicts remember the insertion 414 # order" guarantee 415 # 416 global python_warning 417 if (not python_warning and 418 sys.version_info.major == 3 and sys.version_info.minor < 7): 419 420 self.emit_msg(0, 421 'Python 3.7 or later is required for correct results') 422 python_warning = True 423 424 def emit_msg(self, ln, msg, *, warning=True): 425 """Emit a message""" 426 427 if self.entry: 428 self.entry.emit_msg(ln, msg, warning=warning) 429 return 430 431 log_msg = f"{self.fname}:{ln} {msg}" 432 433 if warning: 434 self.config.log.warning(log_msg) 435 else: 436 self.config.log.info(log_msg) 437 438 def dump_section(self, start_new=True): 439 """ 440 Dump section contents to arrays/hashes intended for that purpose. 441 """ 442 443 if self.entry: 444 self.entry.dump_section(start_new) 445 446 # TODO: rename it to store_declaration after removal of kernel-doc.pl 447 def output_declaration(self, dtype, name, **args): 448 """ 449 Store the entry into an entry array. 450 451 The actual output and output filters will be handled elsewhere. 452 """ 453 454 item = KdocItem(name, self.fname, dtype, 455 self.entry.declaration_start_line, **args) 456 item.warnings = self.entry.warnings 457 458 # Drop empty sections 459 # TODO: improve empty sections logic to emit warnings 460 sections = self.entry.sections 461 for section in ["Description", "Return"]: 462 if section in sections and not sections[section].rstrip(): 463 del sections[section] 464 item.set_sections(sections, self.entry.section_start_lines) 465 item.set_params(self.entry.parameterlist, self.entry.parameterdescs, 466 self.entry.parametertypes, 467 self.entry.parameterdesc_start_lines) 468 self.entries.append(item) 469 470 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 471 472 def emit_unused_warnings(self): 473 """ 474 When the parser fails to produce a valid entry, it places some 475 warnings under `entry.warnings` that will be discarded when resetting 476 the state. 477 478 Ensure that those warnings are not lost. 479 480 .. note:: 481 482 Because we are calling `config.warning()` here, those 483 warnings are not filtered by the `-W` parameters: they will all 484 be produced even when `-Wreturn`, `-Wshort-desc`, and/or 485 `-Wcontents-before-sections` are used. 486 487 Allowing those warnings to be filtered is complex, because it 488 would require storing them in a buffer and then filtering them 489 during the output step of the code, depending on the 490 selected symbols. 491 """ 492 if self.entry and self.entry not in self.entries: 493 for log_msg in self.entry.warnings: 494 self.config.warning(log_msg) 495 496 def reset_state(self, ln): 497 """ 498 Ancillary routine to create a new entry. It initializes all 499 variables used by the state machine. 500 """ 501 502 self.emit_unused_warnings() 503 504 self.entry = KernelEntry(self.config, self.fname, ln) 505 506 # State flags 507 self.state = state.NORMAL 508 509 def push_parameter(self, ln, decl_type, param, dtype, 510 org_arg, declaration_name): 511 """ 512 Store parameters and their descriptions at self.entry. 513 """ 514 515 if self.entry.anon_struct_union and dtype == "" and param == "}": 516 return # Ignore the ending }; from anonymous struct/union 517 518 self.entry.anon_struct_union = False 519 520 param = KernRe(r'[\[\)].*').sub('', param, count=1) 521 522 # 523 # Look at various "anonymous type" cases. 524 # 525 if dtype == '': 526 if param.endswith("..."): 527 if len(param) > 3: # there is a name provided, use that 528 param = param[:-3] 529 if not self.entry.parameterdescs.get(param): 530 self.entry.parameterdescs[param] = "variable arguments" 531 532 elif (not param) or param == "void": 533 param = "void" 534 self.entry.parameterdescs[param] = "no arguments" 535 536 elif param in ["struct", "union"]: 537 # Handle unnamed (anonymous) union or struct 538 dtype = param 539 param = "{unnamed_" + param + "}" 540 self.entry.parameterdescs[param] = "anonymous\n" 541 self.entry.anon_struct_union = True 542 543 # Warn if parameter has no description 544 # (but ignore ones starting with # as these are not parameters 545 # but inline preprocessor statements) 546 if param not in self.entry.parameterdescs and not param.startswith("#"): 547 self.entry.parameterdescs[param] = self.undescribed 548 549 if "." not in param: 550 if decl_type == 'function': 551 dname = f"{decl_type} parameter" 552 else: 553 dname = f"{decl_type} member" 554 555 self.emit_msg(ln, 556 f"{dname} '{param}' not described in '{declaration_name}'") 557 558 # Strip spaces from param so that it is one continuous string on 559 # parameterlist. This fixes a problem where check_sections() 560 # cannot find a parameter like "addr[6 + 2]" because it actually 561 # appears as "addr[6", "+", "2]" on the parameter list. 562 # However, it's better to maintain the param string unchanged for 563 # output, so just weaken the string compare in check_sections() 564 # to ignore "[blah" in a parameter string. 565 566 self.entry.parameterlist.append(param) 567 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 568 self.entry.parametertypes[param] = org_arg 569 570 571 def create_parameter_list(self, ln, decl_type, args, 572 splitter, declaration_name): 573 """ 574 Creates a list of parameters, storing them at self.entry. 575 """ 576 577 # temporarily replace all commas inside function pointer definition 578 arg_expr = KernRe(r'(\([^\),]+),') 579 while arg_expr.search(args): 580 args = arg_expr.sub(r"\1#", args) 581 582 for arg in args.split(splitter): 583 # Ignore argument attributes 584 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 585 586 # Strip leading/trailing spaces 587 arg = arg.strip() 588 arg = KernRe(r'\s+').sub(' ', arg, count=1) 589 590 if arg.startswith('#'): 591 # Treat preprocessor directive as a typeless variable just to fill 592 # corresponding data structures "correctly". Catch it later in 593 # output_* subs. 594 595 # Treat preprocessor directive as a typeless variable 596 self.push_parameter(ln, decl_type, arg, "", 597 "", declaration_name) 598 # 599 # The pointer-to-function case. 600 # 601 elif KernRe(r'\(.+\)\s*\(').search(arg): 602 arg = arg.replace('#', ',') 603 r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" 604 r'([\w\[\].]*)' # Capture the name and possible [array] 605 r'\s*\)') # Make sure the trailing ")" is there 606 if r.match(arg): 607 param = r.group(1) 608 else: 609 self.emit_msg(ln, f"Invalid param: {arg}") 610 param = arg 611 dtype = arg.replace(param, '') 612 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 613 # 614 # The array-of-pointers case. Dig the parameter name out from the middle 615 # of the declaration. 616 # 617 elif KernRe(r'\(.+\)\s*\[').search(arg): 618 r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" 619 r'([\w.]*?)' # The actual pointer name 620 r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] 621 if r.match(arg): 622 param = r.group(1) 623 else: 624 self.emit_msg(ln, f"Invalid param: {arg}") 625 param = arg 626 dtype = arg.replace(param, '') 627 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 628 elif arg: 629 # 630 # Clean up extraneous spaces and split the string at commas; the first 631 # element of the resulting list will also include the type information. 632 # 633 arg = KernRe(r'\s*:\s*').sub(":", arg) 634 arg = KernRe(r'\s*\[').sub('[', arg) 635 args = KernRe(r'\s*,\s*').split(arg) 636 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 637 # 638 # args[0] has a string of "type a". If "a" includes an [array] 639 # declaration, we want to not be fooled by any white space inside 640 # the brackets, so detect and handle that case specially. 641 # 642 r = KernRe(r'^([^[\]]*\s+)(.*)$') 643 if r.match(args[0]): 644 args[0] = r.group(2) 645 dtype = r.group(1) 646 else: 647 # No space in args[0]; this seems wrong but preserves previous behavior 648 dtype = '' 649 650 bitfield_re = KernRe(r'(.*?):(\w+)') 651 for param in args: 652 # 653 # For pointers, shift the star(s) from the variable name to the 654 # type declaration. 655 # 656 r = KernRe(r'^(\*+)\s*(.*)') 657 if r.match(param): 658 self.push_parameter(ln, decl_type, r.group(2), 659 f"{dtype} {r.group(1)}", 660 arg, declaration_name) 661 # 662 # Perform a similar shift for bitfields. 663 # 664 elif bitfield_re.search(param): 665 if dtype != "": # Skip unnamed bit-fields 666 self.push_parameter(ln, decl_type, bitfield_re.group(1), 667 f"{dtype}:{bitfield_re.group(2)}", 668 arg, declaration_name) 669 else: 670 self.push_parameter(ln, decl_type, param, dtype, 671 arg, declaration_name) 672 673 def check_sections(self, ln, decl_name, decl_type): 674 """ 675 Check for errors inside sections, emitting warnings if not found 676 parameters are described. 677 """ 678 for section in self.entry.sections: 679 if section not in self.entry.parameterlist and \ 680 not known_sections.search(section): 681 if decl_type == 'function': 682 dname = f"{decl_type} parameter" 683 else: 684 dname = f"{decl_type} member" 685 self.emit_msg(ln, 686 f"Excess {dname} '{section}' description in '{decl_name}'") 687 688 def check_return_section(self, ln, declaration_name, return_type): 689 """ 690 If the function doesn't return void, warns about the lack of a 691 return description. 692 """ 693 694 if not self.config.wreturn: 695 return 696 697 # Ignore an empty return type (It's a macro) 698 # Ignore functions with a "void" return type (but not "void *") 699 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 700 return 701 702 if not self.entry.sections.get("Return", None): 703 self.emit_msg(ln, 704 f"No description found for return value of '{declaration_name}'") 705 706 def split_struct_proto(self, proto): 707 """ 708 Split apart a structure prototype; returns (struct|union, name, 709 members) or ``None``. 710 """ 711 712 type_pattern = r'(struct|union)' 713 qualifiers = [ 714 "__attribute__", 715 "__packed", 716 "__aligned", 717 "____cacheline_aligned_in_smp", 718 "____cacheline_aligned", 719 ] 720 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 721 722 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 723 if r.search(proto): 724 return (r.group(1), r.group(2), r.group(3)) 725 else: 726 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 727 if r.search(proto): 728 return (r.group(1), r.group(3), r.group(2)) 729 return None 730 731 def rewrite_struct_members(self, members): 732 """ 733 Process ``struct``/``union`` members from the most deeply nested 734 outward. 735 736 Rewrite the members of a ``struct`` or ``union`` for easier formatting 737 later on. Among other things, this function will turn a member like:: 738 739 struct { inner_members; } foo; 740 741 into:: 742 743 struct foo; inner_members; 744 """ 745 746 # 747 # The trick is in the ``^{`` below - it prevents a match of an outer 748 # ``struct``/``union`` until the inner one has been munged 749 # (removing the ``{`` in the process). 750 # 751 struct_members = KernRe(r'(struct|union)' # 0: declaration type 752 r'([^\{\};]+)' # 1: possible name 753 r'(\{)' 754 r'([^\{\}]*)' # 3: Contents of declaration 755 r'(\})' 756 r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration 757 tuples = struct_members.findall(members) 758 while tuples: 759 for t in tuples: 760 newmember = "" 761 oldmember = "".join(t) # Reconstruct the original formatting 762 dtype, name, lbr, content, rbr, rest, semi = t 763 # 764 # Pass through each field name, normalizing the form and formatting. 765 # 766 for s_id in rest.split(','): 767 s_id = s_id.strip() 768 newmember += f"{dtype} {s_id}; " 769 # 770 # Remove bitfield/array/pointer info, getting the bare name. 771 # 772 s_id = KernRe(r'[:\[].*').sub('', s_id) 773 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 774 # 775 # Pass through the members of this inner structure/union. 776 # 777 for arg in content.split(';'): 778 arg = arg.strip() 779 # 780 # Look for (type)(*name)(args) - pointer to function 781 # 782 r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') 783 if r.match(arg): 784 dtype, name, extra = r.group(1), r.group(2), r.group(3) 785 # Pointer-to-function 786 if not s_id: 787 # Anonymous struct/union 788 newmember += f"{dtype}{name}{extra}; " 789 else: 790 newmember += f"{dtype}{s_id}.{name}{extra}; " 791 # 792 # Otherwise a non-function member. 793 # 794 else: 795 # 796 # Remove bitmap and array portions and spaces around commas 797 # 798 arg = KernRe(r':\s*\d+\s*').sub('', arg) 799 arg = KernRe(r'\[.*\]').sub('', arg) 800 arg = KernRe(r'\s*,\s*').sub(',', arg) 801 # 802 # Look for a normal decl - "type name[,name...]" 803 # 804 r = KernRe(r'(.*)\s+([\S+,]+)') 805 if r.search(arg): 806 for name in r.group(2).split(','): 807 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) 808 if not s_id: 809 # Anonymous struct/union 810 newmember += f"{r.group(1)} {name}; " 811 else: 812 newmember += f"{r.group(1)} {s_id}.{name}; " 813 else: 814 newmember += f"{arg}; " 815 # 816 # At the end of the s_id loop, replace the original declaration with 817 # the munged version. 818 # 819 members = members.replace(oldmember, newmember) 820 # 821 # End of the tuple loop - search again and see if there are outer members 822 # that now turn up. 823 # 824 tuples = struct_members.findall(members) 825 return members 826 827 def format_struct_decl(self, declaration): 828 """ 829 Format the ``struct`` declaration into a standard form for inclusion 830 in the resulting docs. 831 """ 832 833 # 834 # Insert newlines, get rid of extra spaces. 835 # 836 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 837 declaration = KernRe(r'\}\s+;').sub('};', declaration) 838 # 839 # Format inline enums with each member on its own line. 840 # 841 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 842 while r.search(declaration): 843 declaration = r.sub(r'\1,\n\2', declaration) 844 # 845 # Now go through and supply the right number of tabs 846 # for each line. 847 # 848 def_args = declaration.split('\n') 849 level = 1 850 declaration = "" 851 for clause in def_args: 852 clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) 853 if clause: 854 if '}' in clause and level > 1: 855 level -= 1 856 if not clause.startswith('#'): 857 declaration += "\t" * level 858 declaration += "\t" + clause + "\n" 859 if "{" in clause and "}" not in clause: 860 level += 1 861 return declaration 862 863 864 def dump_struct(self, ln, proto): 865 """ 866 Store an entry for a ``struct`` or ``union`` 867 """ 868 # 869 # Do the basic parse to get the pieces of the declaration. 870 # 871 struct_parts = self.split_struct_proto(proto) 872 if not struct_parts: 873 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 874 return 875 decl_type, declaration_name, members = struct_parts 876 877 if self.entry.identifier != declaration_name: 878 self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " 879 f"Prototype was for {decl_type} {declaration_name} instead\n") 880 return 881 # 882 # Go through the list of members applying all of our transformations. 883 # 884 members = trim_private_members(members) 885 members = apply_transforms(struct_xforms, members) 886 887 nested = NestedMatch() 888 for search, sub in struct_nested_prefixes: 889 members = nested.sub(search, sub, members) 890 # 891 # Deal with embedded struct and union members, and drop enums entirely. 892 # 893 declaration = members 894 members = self.rewrite_struct_members(members) 895 members = re.sub(r'(\{[^\{\}]*\})', '', members) 896 # 897 # Output the result and we are done. 898 # 899 self.create_parameter_list(ln, decl_type, members, ';', 900 declaration_name) 901 self.check_sections(ln, declaration_name, decl_type) 902 self.output_declaration(decl_type, declaration_name, 903 definition=self.format_struct_decl(declaration), 904 purpose=self.entry.declaration_purpose) 905 906 def dump_enum(self, ln, proto): 907 """ 908 Store an ``enum`` inside self.entries array. 909 """ 910 # 911 # Strip preprocessor directives. Note that this depends on the 912 # trailing semicolon we added in process_proto_type(). 913 # 914 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 915 # 916 # Parse out the name and members of the enum. Typedef form first. 917 # 918 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 919 if r.search(proto): 920 declaration_name = r.group(2) 921 members = trim_private_members(r.group(1)) 922 # 923 # Failing that, look for a straight enum 924 # 925 else: 926 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 927 if r.match(proto): 928 declaration_name = r.group(1) 929 members = trim_private_members(r.group(2)) 930 # 931 # OK, this isn't going to work. 932 # 933 else: 934 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 935 return 936 # 937 # Make sure we found what we were expecting. 938 # 939 if self.entry.identifier != declaration_name: 940 if self.entry.identifier == "": 941 self.emit_msg(ln, 942 f"{proto}: wrong kernel-doc identifier on prototype") 943 else: 944 self.emit_msg(ln, 945 f"expecting prototype for enum {self.entry.identifier}. " 946 f"Prototype was for enum {declaration_name} instead") 947 return 948 949 if not declaration_name: 950 declaration_name = "(anonymous)" 951 # 952 # Parse out the name of each enum member, and verify that we 953 # have a description for it. 954 # 955 member_set = set() 956 members = KernRe(r'\([^;)]*\)').sub('', members) 957 for arg in members.split(','): 958 if not arg: 959 continue 960 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 961 self.entry.parameterlist.append(arg) 962 if arg not in self.entry.parameterdescs: 963 self.entry.parameterdescs[arg] = self.undescribed 964 self.emit_msg(ln, 965 f"Enum value '{arg}' not described in enum '{declaration_name}'") 966 member_set.add(arg) 967 # 968 # Ensure that every described member actually exists in the enum. 969 # 970 for k in self.entry.parameterdescs: 971 if k not in member_set: 972 self.emit_msg(ln, 973 f"Excess enum value '@{k}' description in '{declaration_name}'") 974 975 self.output_declaration('enum', declaration_name, 976 purpose=self.entry.declaration_purpose) 977 978 def dump_var(self, ln, proto): 979 """ 980 Store variables that are part of kAPI. 981 """ 982 VAR_ATTRIBS = [ 983 "extern", 984 "const", 985 ] 986 OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*" 987 988 # 989 # Store the full prototype before modifying it 990 # 991 full_proto = proto 992 declaration_name = None 993 994 # 995 # Handle macro definitions 996 # 997 macro_prefixes = [ 998 KernRe(r"DEFINE_[\w_]+\s*\(([\w_]+)\)"), 999 ] 1000 1001 for r in macro_prefixes: 1002 match = r.search(proto) 1003 if match: 1004 declaration_name = match.group(1) 1005 break 1006 1007 # 1008 # Drop comments and macros to have a pure C prototype 1009 # 1010 if not declaration_name: 1011 for r, sub in var_xforms: 1012 proto = r.sub(sub, proto) 1013 1014 proto = proto.rstrip() 1015 1016 # 1017 # Variable name is at the end of the declaration 1018 # 1019 1020 default_val = None 1021 1022 r= KernRe(OPTIONAL_VAR_ATTR + r"[\w_]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") 1023 if r.match(proto): 1024 if not declaration_name: 1025 declaration_name = r.group(1) 1026 1027 default_val = r.group(2) 1028 else: 1029 r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") 1030 1031 if r.match(proto): 1032 default_val = r.group(1) 1033 if not declaration_name: 1034 self.emit_msg(ln,f"{proto}: can't parse variable") 1035 return 1036 1037 if default_val: 1038 default_val = default_val.lstrip("=").strip() 1039 1040 self.output_declaration("var", declaration_name, 1041 full_proto=full_proto, 1042 default_val=default_val, 1043 purpose=self.entry.declaration_purpose) 1044 1045 def dump_declaration(self, ln, prototype): 1046 """ 1047 Store a data declaration inside self.entries array. 1048 """ 1049 1050 if self.entry.decl_type == "enum": 1051 self.dump_enum(ln, prototype) 1052 elif self.entry.decl_type == "typedef": 1053 self.dump_typedef(ln, prototype) 1054 elif self.entry.decl_type in ["union", "struct"]: 1055 self.dump_struct(ln, prototype) 1056 elif self.entry.decl_type == "var": 1057 self.dump_var(ln, prototype) 1058 else: 1059 # This would be a bug 1060 self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') 1061 1062 def dump_function(self, ln, prototype): 1063 """ 1064 Store a function or function macro inside self.entries array. 1065 """ 1066 1067 found = func_macro = False 1068 return_type = '' 1069 decl_type = 'function' 1070 1071 # 1072 # If we have a macro, remove the "#define" at the front. 1073 # 1074 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 1075 if new_proto != prototype: 1076 prototype = new_proto 1077 # 1078 # Dispense with the simple "#define A B" case here; the key 1079 # is the space after the name of the symbol being defined. 1080 # NOTE that the seemingly misnamed "func_macro" indicates a 1081 # macro *without* arguments. 1082 # 1083 r = KernRe(r'^(\w+)\s+') 1084 if r.search(prototype): 1085 return_type = '' 1086 declaration_name = r.group(1) 1087 func_macro = True 1088 found = True 1089 else: 1090 # 1091 # Apply the initial transformations. 1092 # 1093 prototype = apply_transforms(function_xforms, prototype) 1094 1095 # Yes, this truly is vile. We are looking for: 1096 # 1. Return type (may be nothing if we're looking at a macro) 1097 # 2. Function name 1098 # 3. Function parameters. 1099 # 1100 # All the while we have to watch out for function pointer parameters 1101 # (which IIRC is what the two sections are for), C types (these 1102 # regexps don't even start to express all the possibilities), and 1103 # so on. 1104 # 1105 # If you mess with these regexps, it's a good idea to check that 1106 # the following functions' documentation still comes out right: 1107 # - parport_register_device (function pointer parameters) 1108 # - atomic_set (macro) 1109 # - pci_match_device, __copy_to_user (long return type) 1110 1111 name = r'\w+' 1112 type1 = r'(?:[\w\s]+)?' 1113 type2 = r'(?:[\w\s]+\*+)+' 1114 # 1115 # Attempt to match first on (args) with no internal parentheses; this 1116 # lets us easily filter out __acquires() and other post-args stuff. If 1117 # that fails, just grab the rest of the line to the last closing 1118 # parenthesis. 1119 # 1120 proto_args = r'\(([^\(]*|.*)\)' 1121 # 1122 # (Except for the simple macro case) attempt to split up the prototype 1123 # in the various ways we understand. 1124 # 1125 if not found: 1126 patterns = [ 1127 rf'^()({name})\s*{proto_args}', 1128 rf'^({type1})\s+({name})\s*{proto_args}', 1129 rf'^({type2})\s*({name})\s*{proto_args}', 1130 ] 1131 1132 for p in patterns: 1133 r = KernRe(p) 1134 if r.match(prototype): 1135 return_type = r.group(1) 1136 declaration_name = r.group(2) 1137 args = r.group(3) 1138 self.create_parameter_list(ln, decl_type, args, ',', 1139 declaration_name) 1140 found = True 1141 break 1142 # 1143 # Parsing done; make sure that things are as we expect. 1144 # 1145 if not found: 1146 self.emit_msg(ln, 1147 f"cannot understand function prototype: '{prototype}'") 1148 return 1149 if self.entry.identifier != declaration_name: 1150 self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " 1151 f"Prototype was for {declaration_name}() instead") 1152 return 1153 self.check_sections(ln, declaration_name, "function") 1154 self.check_return_section(ln, declaration_name, return_type) 1155 # 1156 # Store the result. 1157 # 1158 self.output_declaration(decl_type, declaration_name, 1159 typedef=('typedef' in return_type), 1160 functiontype=return_type, 1161 purpose=self.entry.declaration_purpose, 1162 func_macro=func_macro) 1163 1164 1165 def dump_typedef(self, ln, proto): 1166 """ 1167 Store a ``typedef`` inside self.entries array. 1168 """ 1169 # 1170 # We start by looking for function typedefs. 1171 # 1172 typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1173 typedef_ident = r'\*?\s*(\w\S+)\s*' 1174 typedef_args = r'\s*\((.*)\);' 1175 1176 typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1177 typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) 1178 1179 # Parse function typedef prototypes 1180 for r in [typedef1, typedef2]: 1181 if not r.match(proto): 1182 continue 1183 1184 return_type = r.group(1).strip() 1185 declaration_name = r.group(2) 1186 args = r.group(3) 1187 1188 if self.entry.identifier != declaration_name: 1189 self.emit_msg(ln, 1190 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1191 return 1192 1193 self.create_parameter_list(ln, 'function', args, ',', declaration_name) 1194 1195 self.output_declaration('function', declaration_name, 1196 typedef=True, 1197 functiontype=return_type, 1198 purpose=self.entry.declaration_purpose) 1199 return 1200 # 1201 # Not a function, try to parse a simple typedef. 1202 # 1203 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1204 if r.match(proto): 1205 declaration_name = r.group(1) 1206 1207 if self.entry.identifier != declaration_name: 1208 self.emit_msg(ln, 1209 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1210 return 1211 1212 self.output_declaration('typedef', declaration_name, 1213 purpose=self.entry.declaration_purpose) 1214 return 1215 1216 self.emit_msg(ln, "error: Cannot parse typedef!") 1217 1218 @staticmethod 1219 def process_export(function_set, line): 1220 """ 1221 process ``EXPORT_SYMBOL*`` tags 1222 1223 This method doesn't use any variable from the class, so declare it 1224 with a staticmethod decorator. 1225 """ 1226 1227 # We support documenting some exported symbols with different 1228 # names. A horrible hack. 1229 suffixes = [ '_noprof' ] 1230 1231 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1232 # multiple export lines would violate Kernel coding style. 1233 1234 if export_symbol.search(line): 1235 symbol = export_symbol.group(2) 1236 elif export_symbol_ns.search(line): 1237 symbol = export_symbol_ns.group(2) 1238 else: 1239 return False 1240 # 1241 # Found an export, trim out any special suffixes 1242 # 1243 for suffix in suffixes: 1244 # Be backward compatible with Python < 3.9 1245 if symbol.endswith(suffix): 1246 symbol = symbol[:-len(suffix)] 1247 function_set.add(symbol) 1248 return True 1249 1250 def process_normal(self, ln, line): 1251 """ 1252 STATE_NORMAL: looking for the ``/**`` to begin everything. 1253 """ 1254 1255 if not doc_start.match(line): 1256 return 1257 1258 # start a new entry 1259 self.reset_state(ln) 1260 1261 # next line is always the function name 1262 self.state = state.NAME 1263 1264 def process_name(self, ln, line): 1265 """ 1266 STATE_NAME: Looking for the "name - description" line 1267 """ 1268 # 1269 # Check for a DOC: block and handle them specially. 1270 # 1271 if doc_block.search(line): 1272 1273 if not doc_block.group(1): 1274 self.entry.begin_section(ln, "Introduction") 1275 else: 1276 self.entry.begin_section(ln, doc_block.group(1)) 1277 1278 self.entry.identifier = self.entry.section 1279 self.state = state.DOCBLOCK 1280 # 1281 # Otherwise we're looking for a normal kerneldoc declaration line. 1282 # 1283 elif doc_decl.search(line): 1284 self.entry.identifier = doc_decl.group(1) 1285 1286 # Test for data declaration 1287 if doc_begin_data.search(line): 1288 self.entry.decl_type = doc_begin_data.group(1) 1289 self.entry.identifier = doc_begin_data.group(2) 1290 # 1291 # Look for a function description 1292 # 1293 elif doc_begin_func.search(line): 1294 self.entry.identifier = doc_begin_func.group(1) 1295 self.entry.decl_type = "function" 1296 # 1297 # We struck out. 1298 # 1299 else: 1300 self.emit_msg(ln, 1301 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}") 1302 self.state = state.NORMAL 1303 return 1304 # 1305 # OK, set up for a new kerneldoc entry. 1306 # 1307 self.state = state.BODY 1308 self.entry.identifier = self.entry.identifier.strip(" ") 1309 # if there's no @param blocks need to set up default section here 1310 self.entry.begin_section(ln + 1) 1311 # 1312 # Find the description portion, which *should* be there but 1313 # isn't always. 1314 # (We should be able to capture this from the previous parsing - someday) 1315 # 1316 r = KernRe("[-:](.*)") 1317 if r.search(line): 1318 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1319 self.state = state.DECLARATION 1320 else: 1321 self.entry.declaration_purpose = "" 1322 1323 if not self.entry.declaration_purpose and self.config.wshort_desc: 1324 self.emit_msg(ln, 1325 f"missing initial short description on line:\n{line}") 1326 1327 if not self.entry.identifier and self.entry.decl_type != "enum": 1328 self.emit_msg(ln, 1329 f"wrong kernel-doc identifier on line:\n{line}") 1330 self.state = state.NORMAL 1331 1332 if self.config.verbose: 1333 self.emit_msg(ln, 1334 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1335 warning=False) 1336 # 1337 # Failed to find an identifier. Emit a warning 1338 # 1339 else: 1340 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1341 1342 def is_new_section(self, ln, line): 1343 """ 1344 Helper function to determine if a new section is being started. 1345 """ 1346 if doc_sect.search(line): 1347 self.state = state.BODY 1348 # 1349 # Pick out the name of our new section, tweaking it if need be. 1350 # 1351 newsection = doc_sect.group(1) 1352 if newsection.lower() == 'description': 1353 newsection = 'Description' 1354 elif newsection.lower() == 'context': 1355 newsection = 'Context' 1356 self.state = state.SPECIAL_SECTION 1357 elif newsection.lower() in ["@return", "@returns", 1358 "return", "returns"]: 1359 newsection = "Return" 1360 self.state = state.SPECIAL_SECTION 1361 elif newsection[0] == '@': 1362 self.state = state.SPECIAL_SECTION 1363 # 1364 # Initialize the contents, and get the new section going. 1365 # 1366 newcontents = doc_sect.group(2) 1367 if not newcontents: 1368 newcontents = "" 1369 self.dump_section() 1370 self.entry.begin_section(ln, newsection) 1371 self.entry.leading_space = None 1372 1373 self.entry.add_text(newcontents.lstrip()) 1374 return True 1375 return False 1376 1377 def is_comment_end(self, ln, line): 1378 """ 1379 Helper function to detect (and effect) the end of a kerneldoc comment. 1380 """ 1381 if doc_end.search(line): 1382 self.dump_section() 1383 1384 # Look for doc_com + <text> + doc_end: 1385 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') 1386 if r.match(line): 1387 self.emit_msg(ln, f"suspicious ending line: {line}") 1388 1389 self.entry.prototype = "" 1390 self.entry.new_start_line = ln + 1 1391 1392 self.state = state.PROTO 1393 return True 1394 return False 1395 1396 1397 def process_decl(self, ln, line): 1398 """ 1399 STATE_DECLARATION: We've seen the beginning of a declaration. 1400 """ 1401 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1402 return 1403 # 1404 # Look for anything with the " * " line beginning. 1405 # 1406 if doc_content.search(line): 1407 cont = doc_content.group(1) 1408 # 1409 # A blank line means that we have moved out of the declaration 1410 # part of the comment (without any "special section" parameter 1411 # descriptions). 1412 # 1413 if cont == "": 1414 self.state = state.BODY 1415 # 1416 # Otherwise we have more of the declaration section to soak up. 1417 # 1418 else: 1419 self.entry.declaration_purpose = \ 1420 trim_whitespace(self.entry.declaration_purpose + ' ' + cont) 1421 else: 1422 # Unknown line, ignore 1423 self.emit_msg(ln, f"bad line: {line}") 1424 1425 1426 def process_special(self, ln, line): 1427 """ 1428 STATE_SPECIAL_SECTION: a section ending with a blank line. 1429 """ 1430 # 1431 # If we have hit a blank line (only the " * " marker), then this 1432 # section is done. 1433 # 1434 if KernRe(r"\s*\*\s*$").match(line): 1435 self.entry.begin_section(ln, dump = True) 1436 self.state = state.BODY 1437 return 1438 # 1439 # Not a blank line, look for the other ways to end the section. 1440 # 1441 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1442 return 1443 # 1444 # OK, we should have a continuation of the text for this section. 1445 # 1446 if doc_content.search(line): 1447 cont = doc_content.group(1) 1448 # 1449 # If the lines of text after the first in a special section have 1450 # leading white space, we need to trim it out or Sphinx will get 1451 # confused. For the second line (the None case), see what we 1452 # find there and remember it. 1453 # 1454 if self.entry.leading_space is None: 1455 r = KernRe(r'^(\s+)') 1456 if r.match(cont): 1457 self.entry.leading_space = len(r.group(1)) 1458 else: 1459 self.entry.leading_space = 0 1460 # 1461 # Otherwise, before trimming any leading chars, be *sure* 1462 # that they are white space. We should maybe warn if this 1463 # isn't the case. 1464 # 1465 for i in range(0, self.entry.leading_space): 1466 if cont[i] != " ": 1467 self.entry.leading_space = i 1468 break 1469 # 1470 # Add the trimmed result to the section and we're done. 1471 # 1472 self.entry.add_text(cont[self.entry.leading_space:]) 1473 else: 1474 # Unknown line, ignore 1475 self.emit_msg(ln, f"bad line: {line}") 1476 1477 def process_body(self, ln, line): 1478 """ 1479 STATE_BODY: the bulk of a kerneldoc comment. 1480 """ 1481 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1482 return 1483 1484 if doc_content.search(line): 1485 cont = doc_content.group(1) 1486 self.entry.add_text(cont) 1487 else: 1488 # Unknown line, ignore 1489 self.emit_msg(ln, f"bad line: {line}") 1490 1491 def process_inline_name(self, ln, line): 1492 """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" 1493 1494 if doc_inline_sect.search(line): 1495 self.entry.begin_section(ln, doc_inline_sect.group(1)) 1496 self.entry.add_text(doc_inline_sect.group(2).lstrip()) 1497 self.state = state.INLINE_TEXT 1498 elif doc_inline_end.search(line): 1499 self.dump_section() 1500 self.state = state.PROTO 1501 elif doc_content.search(line): 1502 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") 1503 self.state = state.PROTO 1504 # else ... ?? 1505 1506 def process_inline_text(self, ln, line): 1507 """STATE_INLINE_TEXT: docbook comments within a prototype.""" 1508 1509 if doc_inline_end.search(line): 1510 self.dump_section() 1511 self.state = state.PROTO 1512 elif doc_content.search(line): 1513 self.entry.add_text(doc_content.group(1)) 1514 # else ... ?? 1515 1516 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1517 """ 1518 Handle syscall definitions. 1519 """ 1520 1521 is_void = False 1522 1523 # Strip newlines/CR's 1524 proto = re.sub(r'[\r\n]+', ' ', proto) 1525 1526 # Check if it's a SYSCALL_DEFINE0 1527 if 'SYSCALL_DEFINE0' in proto: 1528 is_void = True 1529 1530 # Replace SYSCALL_DEFINE with correct return type & function name 1531 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1532 1533 r = KernRe(r'long\s+(sys_.*?),') 1534 if r.search(proto): 1535 proto = KernRe(',').sub('(', proto, count=1) 1536 elif is_void: 1537 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1538 1539 # Now delete all of the odd-numbered commas in the proto 1540 # so that argument types & names don't have a comma between them 1541 count = 0 1542 length = len(proto) 1543 1544 if is_void: 1545 length = 0 # skip the loop if is_void 1546 1547 for ix in range(length): 1548 if proto[ix] == ',': 1549 count += 1 1550 if count % 2 == 1: 1551 proto = proto[:ix] + ' ' + proto[ix + 1:] 1552 1553 return proto 1554 1555 def tracepoint_munge(self, ln, proto): 1556 """ 1557 Handle tracepoint definitions. 1558 """ 1559 1560 tracepointname = None 1561 tracepointargs = None 1562 1563 # Match tracepoint name based on different patterns 1564 r = KernRe(r'TRACE_EVENT\((.*?),') 1565 if r.search(proto): 1566 tracepointname = r.group(1) 1567 1568 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1569 if r.search(proto): 1570 tracepointname = r.group(1) 1571 1572 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1573 if r.search(proto): 1574 tracepointname = r.group(2) 1575 1576 if tracepointname: 1577 tracepointname = tracepointname.lstrip() 1578 1579 r = KernRe(r'TP_PROTO\((.*?)\)') 1580 if r.search(proto): 1581 tracepointargs = r.group(1) 1582 1583 if not tracepointname or not tracepointargs: 1584 self.emit_msg(ln, 1585 f"Unrecognized tracepoint format:\n{proto}\n") 1586 else: 1587 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1588 self.entry.identifier = f"trace_{self.entry.identifier}" 1589 1590 return proto 1591 1592 def process_proto_function(self, ln, line): 1593 """Ancillary routine to process a function prototype.""" 1594 1595 # strip C99-style comments to end of line 1596 line = KernRe(r"//.*$", re.S).sub('', line) 1597 # 1598 # Soak up the line's worth of prototype text, stopping at { or ; if present. 1599 # 1600 if KernRe(r'\s*#\s*define').match(line): 1601 self.entry.prototype = line 1602 elif not line.startswith('#'): # skip other preprocessor stuff 1603 r = KernRe(r'([^\{]*)') 1604 if r.match(line): 1605 self.entry.prototype += r.group(1) + " " 1606 # 1607 # If we now have the whole prototype, clean it up and declare victory. 1608 # 1609 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1610 # strip comments and surrounding spaces 1611 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() 1612 # 1613 # Handle self.entry.prototypes for function pointers like: 1614 # int (*pcs_config)(struct foo) 1615 # by turning it into 1616 # int pcs_config(struct foo) 1617 # 1618 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1619 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1620 # 1621 # Handle special declaration syntaxes 1622 # 1623 if 'SYSCALL_DEFINE' in self.entry.prototype: 1624 self.entry.prototype = self.syscall_munge(ln, 1625 self.entry.prototype) 1626 else: 1627 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1628 if r.search(self.entry.prototype): 1629 self.entry.prototype = self.tracepoint_munge(ln, 1630 self.entry.prototype) 1631 # 1632 # ... and we're done 1633 # 1634 self.dump_function(ln, self.entry.prototype) 1635 self.reset_state(ln) 1636 1637 def process_proto_type(self, ln, line): 1638 """ 1639 Ancillary routine to process a type. 1640 """ 1641 1642 # Strip C99-style comments and surrounding whitespace 1643 line = KernRe(r"//.*$", re.S).sub('', line).strip() 1644 if not line: 1645 return # nothing to see here 1646 1647 # To distinguish preprocessor directive from regular declaration later. 1648 if line.startswith('#'): 1649 line += ";" 1650 # 1651 # Split the declaration on any of { } or ;, and accumulate pieces 1652 # until we hit a semicolon while not inside {brackets} 1653 # 1654 r = KernRe(r'(.*?)([{};])') 1655 for chunk in r.split(line): 1656 if chunk: # Ignore empty matches 1657 self.entry.prototype += chunk 1658 # 1659 # This cries out for a match statement ... someday after we can 1660 # drop Python 3.9 ... 1661 # 1662 if chunk == '{': 1663 self.entry.brcount += 1 1664 elif chunk == '}': 1665 self.entry.brcount -= 1 1666 elif chunk == ';' and self.entry.brcount <= 0: 1667 self.dump_declaration(ln, self.entry.prototype) 1668 self.reset_state(ln) 1669 return 1670 # 1671 # We hit the end of the line while still in the declaration; put 1672 # in a space to represent the newline. 1673 # 1674 self.entry.prototype += ' ' 1675 1676 def process_proto(self, ln, line): 1677 """STATE_PROTO: reading a function/whatever prototype.""" 1678 1679 if doc_inline_oneline.search(line): 1680 self.entry.begin_section(ln, doc_inline_oneline.group(1)) 1681 self.entry.add_text(doc_inline_oneline.group(2)) 1682 self.dump_section() 1683 1684 elif doc_inline_start.search(line): 1685 self.state = state.INLINE_NAME 1686 1687 elif self.entry.decl_type == 'function': 1688 self.process_proto_function(ln, line) 1689 1690 else: 1691 self.process_proto_type(ln, line) 1692 1693 def process_docblock(self, ln, line): 1694 """STATE_DOCBLOCK: within a ``DOC:`` block.""" 1695 1696 if doc_end.search(line): 1697 self.dump_section() 1698 self.output_declaration("doc", self.entry.identifier) 1699 self.reset_state(ln) 1700 1701 elif doc_content.search(line): 1702 self.entry.add_text(doc_content.group(1)) 1703 1704 def parse_export(self): 1705 """ 1706 Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file. 1707 """ 1708 1709 export_table = set() 1710 1711 try: 1712 with open(self.fname, "r", encoding="utf8", 1713 errors="backslashreplace") as fp: 1714 1715 for line in fp: 1716 self.process_export(export_table, line) 1717 1718 except IOError: 1719 return None 1720 1721 return export_table 1722 1723 #: The state/action table telling us which function to invoke in each state. 1724 state_actions = { 1725 state.NORMAL: process_normal, 1726 state.NAME: process_name, 1727 state.BODY: process_body, 1728 state.DECLARATION: process_decl, 1729 state.SPECIAL_SECTION: process_special, 1730 state.INLINE_NAME: process_inline_name, 1731 state.INLINE_TEXT: process_inline_text, 1732 state.PROTO: process_proto, 1733 state.DOCBLOCK: process_docblock, 1734 } 1735 1736 def parse_kdoc(self): 1737 """ 1738 Open and process each line of a C source file. 1739 The parsing is controlled via a state machine, and the line is passed 1740 to a different process function depending on the state. The process 1741 function may update the state as needed. 1742 1743 Besides parsing kernel-doc tags, it also parses export symbols. 1744 """ 1745 1746 prev = "" 1747 prev_ln = None 1748 export_table = set() 1749 1750 try: 1751 with open(self.fname, "r", encoding="utf8", 1752 errors="backslashreplace") as fp: 1753 for ln, line in enumerate(fp): 1754 1755 line = line.expandtabs().strip("\n") 1756 1757 # Group continuation lines on prototypes 1758 if self.state == state.PROTO: 1759 if line.endswith("\\"): 1760 prev += line.rstrip("\\") 1761 if not prev_ln: 1762 prev_ln = ln 1763 continue 1764 1765 if prev: 1766 ln = prev_ln 1767 line = prev + line 1768 prev = "" 1769 prev_ln = None 1770 1771 self.config.log.debug("%d %s: %s", 1772 ln, state.name[self.state], 1773 line) 1774 1775 # This is an optimization over the original script. 1776 # There, when export_file was used for the same file, 1777 # it was read twice. Here, we use the already-existing 1778 # loop to parse exported symbols as well. 1779 # 1780 if (self.state != state.NORMAL) or \ 1781 not self.process_export(export_table, line): 1782 # Hand this line to the appropriate state handler 1783 self.state_actions[self.state](self, ln, line) 1784 1785 self.emit_unused_warnings() 1786 1787 except OSError: 1788 self.config.log.error(f"Error: Cannot open file {self.fname}") 1789 1790 return export_table, self.entries 1791