1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8Classes and functions related to reading a C language source or header FILE 9and extract embedded documentation comments from it. 10""" 11 12import sys 13import re 14from pprint import pformat 15 16from kdoc.kdoc_re import NestedMatch, KernRe 17from kdoc.kdoc_item import KdocItem 18 19# 20# Regular expressions used to parse kernel-doc markups at KernelDoc class. 21# 22# Let's declare them in lowercase outside any class to make it easier to 23# convert from the Perl script. 24# 25# As those are evaluated at the beginning, no need to cache them 26# 27 28# Allow whitespace at end of comment start. 29doc_start = KernRe(r'^/\*\*\s*$', cache=False) 30 31doc_end = KernRe(r'\*/', cache=False) 32doc_com = KernRe(r'\s*\*\s*', cache=False) 33doc_com_body = KernRe(r'\s*\* ?', cache=False) 34doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 35 36# @params and a strictly limited set of supported section names 37# Specifically: 38# Match @word: 39# @...: 40# @{section-name}: 41# while trying to not match literal block starts like "example::" 42# 43known_section_names = 'description|context|returns?|notes?|examples?' 44known_sections = KernRe(known_section_names, flags = re.I) 45doc_sect = doc_com + \ 46 KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', 47 flags=re.I, cache=False) 48 49doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 50doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 51doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 52doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 53doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@\s*[\w][\w\.]*\s*):\s*(.*)\s*\*/\s*$', cache=False) 54 55export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 56export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 57 58type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 59 60# 61# Tests for the beginning of a kerneldoc block in its various forms. 62# 63doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 64doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef|var)\b\s*(\w*)", cache = False) 65doc_begin_func = KernRe(str(doc_com) + # initial " * ' 66 r"(?:\w+\s*\*\s*)?" + # type (not captured) 67 r'(?:define\s+)?' + # possible "define" (not captured) 68 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 69 r'(?:[-:].*)?$', # description (not captured) 70 cache = False) 71 72# 73# Here begins a long set of transformations to turn structure member prefixes 74# and macro invocations into something we can parse and generate kdoc for. 75# 76struct_args_pattern = r'([^,)]+)' 77 78struct_xforms = [ 79 # Strip attributes 80 (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), 81 (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), 82 (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), 83 (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), 84 (KernRe(r'\s*__packed\s*', re.S), ' '), 85 (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), 86 (KernRe(r'\s*__private', re.S), ' '), 87 (KernRe(r'\s*__rcu', re.S), ' '), 88 (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), 89 (KernRe(r'\s*____cacheline_aligned', re.S), ' '), 90 (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), 91 # 92 # Unwrap struct_group macros based on this definition: 93 # __struct_group(TAG, NAME, ATTRS, MEMBERS...) 94 # which has variants like: struct_group(NAME, MEMBERS...) 95 # Only MEMBERS arguments require documentation. 96 # 97 # Parsing them happens on two steps: 98 # 99 # 1. drop struct group arguments that aren't at MEMBERS, 100 # storing them as STRUCT_GROUP(MEMBERS) 101 # 102 # 2. remove STRUCT_GROUP() ancillary macro. 103 # 104 # The original logic used to remove STRUCT_GROUP() using an 105 # advanced regex: 106 # 107 # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; 108 # 109 # with two patterns that are incompatible with 110 # Python re module, as it has: 111 # 112 # - a recursive pattern: (?1) 113 # - an atomic grouping: (?>...) 114 # 115 # I tried a simpler version: but it didn't work either: 116 # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; 117 # 118 # As it doesn't properly match the end parenthesis on some cases. 119 # 120 # So, a better solution was crafted: there's now a NestedMatch 121 # class that ensures that delimiters after a search are properly 122 # matched. So, the implementation to drop STRUCT_GROUP() will be 123 # handled in separate. 124 # 125 (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), 126 (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), 127 (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), 128 (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), 129 # 130 # Replace macros 131 # 132 # TODO: use NestedMatch for FOO($1, $2, ...) matches 133 # 134 # it is better to also move those to the NestedMatch logic, 135 # to ensure that parentheses will be properly matched. 136 # 137 (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), 138 r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), 139 (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), 140 r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), 141 (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', 142 re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), 143 (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', 144 re.S), r'unsigned long \1[1 << ((\2) - 1)]'), 145 (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + 146 r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), 147 (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + 148 struct_args_pattern + r'\)', re.S), r'\2 *\1'), 149 (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + 150 struct_args_pattern + r'\)', re.S), r'\1 \2[]'), 151 (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), 152 (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), 153 (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), 154] 155# 156# Regexes here are guaranteed to have the end delimiter matching 157# the start delimiter. Yet, right now, only one replace group 158# is allowed. 159# 160struct_nested_prefixes = [ 161 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), 162] 163 164# 165# Transforms for function prototypes 166# 167function_xforms = [ 168 (KernRe(r"^static +"), ""), 169 (KernRe(r"^extern +"), ""), 170 (KernRe(r"^asmlinkage +"), ""), 171 (KernRe(r"^inline +"), ""), 172 (KernRe(r"^__inline__ +"), ""), 173 (KernRe(r"^__inline +"), ""), 174 (KernRe(r"^__always_inline +"), ""), 175 (KernRe(r"^noinline +"), ""), 176 (KernRe(r"^__FORTIFY_INLINE +"), ""), 177 (KernRe(r"__init +"), ""), 178 (KernRe(r"__init_or_module +"), ""), 179 (KernRe(r"__exit +"), ""), 180 (KernRe(r"__deprecated +"), ""), 181 (KernRe(r"__flatten +"), ""), 182 (KernRe(r"__meminit +"), ""), 183 (KernRe(r"__must_check +"), ""), 184 (KernRe(r"__weak +"), ""), 185 (KernRe(r"__sched +"), ""), 186 (KernRe(r"_noprof"), ""), 187 (KernRe(r"__always_unused *"), ""), 188 (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), 189 (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), 190 (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), 191 (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), 192 (KernRe(r"__attribute_const__ +"), ""), 193 (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), 194] 195 196# 197# Transforms for variable prototypes 198# 199var_xforms = [ 200 (KernRe(r"__read_mostly"), ""), 201 (KernRe(r"__ro_after_init"), ""), 202 (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"), 203 (KernRe(r"(?://.*)$"), ""), 204 (KernRe(r"(?:/\*.*\*/)"), ""), 205 (KernRe(r";$"), ""), 206] 207 208# 209# Ancillary functions 210# 211 212def apply_transforms(xforms, text): 213 """ 214 Apply a set of transforms to a block of text. 215 """ 216 for search, subst in xforms: 217 text = search.sub(subst, text) 218 return text 219 220multi_space = KernRe(r'\s\s+') 221def trim_whitespace(s): 222 """ 223 A little helper to get rid of excess white space. 224 """ 225 return multi_space.sub(' ', s.strip()) 226 227def trim_private_members(text): 228 """ 229 Remove ``struct``/``enum`` members that have been marked "private". 230 """ 231 # First look for a "public:" block that ends a private region, then 232 # handle the "private until the end" case. 233 # 234 text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) 235 text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) 236 # 237 # We needed the comments to do the above, but now we can take them out. 238 # 239 return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() 240 241class state: 242 """ 243 States used by the parser's state machine. 244 """ 245 246 # Parser states 247 NORMAL = 0 #: Normal code. 248 NAME = 1 #: Looking for function name. 249 DECLARATION = 2 #: We have seen a declaration which might not be done. 250 BODY = 3 #: The body of the comment. 251 SPECIAL_SECTION = 4 #: Doc section ending with a blank line. 252 PROTO = 5 #: Scanning prototype. 253 DOCBLOCK = 6 #: Documentation block. 254 INLINE_NAME = 7 #: Gathering doc outside main block. 255 INLINE_TEXT = 8 #: Reading the body of inline docs. 256 257 #: Names for each parser state. 258 name = [ 259 "NORMAL", 260 "NAME", 261 "DECLARATION", 262 "BODY", 263 "SPECIAL_SECTION", 264 "PROTO", 265 "DOCBLOCK", 266 "INLINE_NAME", 267 "INLINE_TEXT", 268 ] 269 270 271SECTION_DEFAULT = "Description" #: Default section. 272 273class KernelEntry: 274 """ 275 Encapsulates a Kernel documentation entry. 276 """ 277 278 def __init__(self, config, fname, ln): 279 self.config = config 280 self.fname = fname 281 282 self._contents = [] 283 self.prototype = "" 284 285 self.warnings = [] 286 287 self.parameterlist = [] 288 self.parameterdescs = {} 289 self.parametertypes = {} 290 self.parameterdesc_start_lines = {} 291 292 self.section_start_lines = {} 293 self.sections = {} 294 295 self.anon_struct_union = False 296 297 self.leading_space = None 298 299 self.fname = fname 300 301 # State flags 302 self.brcount = 0 303 self.declaration_start_line = ln + 1 304 305 # 306 # Management of section contents 307 # 308 def add_text(self, text): 309 """Add a new text to the entry contents list.""" 310 self._contents.append(text) 311 312 def contents(self): 313 """Returns a string with all content texts that were added.""" 314 return '\n'.join(self._contents) + '\n' 315 316 # TODO: rename to emit_message after removal of kernel-doc.pl 317 def emit_msg(self, ln, msg, *, warning=True): 318 """Emit a message.""" 319 320 log_msg = f"{self.fname}:{ln} {msg}" 321 322 if not warning: 323 self.config.log.info(log_msg) 324 return 325 326 # Delegate warning output to output logic, as this way it 327 # will report warnings/info only for symbols that are output 328 329 self.warnings.append(log_msg) 330 return 331 332 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): 333 """ 334 Begin a new section. 335 """ 336 if dump: 337 self.dump_section(start_new = True) 338 self.section = title 339 self.new_start_line = line_no 340 341 def dump_section(self, start_new=True): 342 """ 343 Dumps section contents to arrays/hashes intended for that purpose. 344 """ 345 # 346 # If we have accumulated no contents in the default ("description") 347 # section, don't bother. 348 # 349 if self.section == SECTION_DEFAULT and not self._contents: 350 return 351 name = self.section 352 contents = self.contents() 353 354 if type_param.match(name): 355 name = type_param.group(1) 356 357 self.parameterdescs[name] = contents 358 self.parameterdesc_start_lines[name] = self.new_start_line 359 360 self.new_start_line = 0 361 362 else: 363 if name in self.sections and self.sections[name] != "": 364 # Only warn on user-specified duplicate section names 365 if name != SECTION_DEFAULT: 366 self.emit_msg(self.new_start_line, 367 f"duplicate section name '{name}'") 368 # Treat as a new paragraph - add a blank line 369 self.sections[name] += '\n' + contents 370 else: 371 self.sections[name] = contents 372 self.section_start_lines[name] = self.new_start_line 373 self.new_start_line = 0 374 375# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 376 377 if start_new: 378 self.section = SECTION_DEFAULT 379 self._contents = [] 380 381python_warning = False 382 383class KernelDoc: 384 """ 385 Read a C language source or header FILE and extract embedded 386 documentation comments. 387 """ 388 389 #: Name of context section. 390 section_context = "Context" 391 392 #: Name of return section. 393 section_return = "Return" 394 395 #: String to write when a parameter is not described. 396 undescribed = "-- undescribed --" 397 398 def __init__(self, config, fname): 399 """Initialize internal variables""" 400 401 self.fname = fname 402 self.config = config 403 404 # Initial state for the state machines 405 self.state = state.NORMAL 406 407 # Store entry currently being processed 408 self.entry = None 409 410 # Place all potential outputs into an array 411 self.entries = [] 412 413 # 414 # We need Python 3.7 for its "dicts remember the insertion 415 # order" guarantee 416 # 417 global python_warning 418 if (not python_warning and 419 sys.version_info.major == 3 and sys.version_info.minor < 7): 420 421 self.emit_msg(0, 422 'Python 3.7 or later is required for correct results') 423 python_warning = True 424 425 def emit_msg(self, ln, msg, *, warning=True): 426 """Emit a message""" 427 428 if self.entry: 429 self.entry.emit_msg(ln, msg, warning=warning) 430 return 431 432 log_msg = f"{self.fname}:{ln} {msg}" 433 434 if warning: 435 self.config.log.warning(log_msg) 436 else: 437 self.config.log.info(log_msg) 438 439 def dump_section(self, start_new=True): 440 """ 441 Dump section contents to arrays/hashes intended for that purpose. 442 """ 443 444 if self.entry: 445 self.entry.dump_section(start_new) 446 447 # TODO: rename it to store_declaration after removal of kernel-doc.pl 448 def output_declaration(self, dtype, name, **args): 449 """ 450 Store the entry into an entry array. 451 452 The actual output and output filters will be handled elsewhere. 453 """ 454 455 item = KdocItem(name, self.fname, dtype, 456 self.entry.declaration_start_line, **args) 457 item.warnings = self.entry.warnings 458 459 # Drop empty sections 460 # TODO: improve empty sections logic to emit warnings 461 sections = self.entry.sections 462 for section in ["Description", "Return"]: 463 if section in sections and not sections[section].rstrip(): 464 del sections[section] 465 item.set_sections(sections, self.entry.section_start_lines) 466 item.set_params(self.entry.parameterlist, self.entry.parameterdescs, 467 self.entry.parametertypes, 468 self.entry.parameterdesc_start_lines) 469 self.entries.append(item) 470 471 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 472 473 def emit_unused_warnings(self): 474 """ 475 When the parser fails to produce a valid entry, it places some 476 warnings under `entry.warnings` that will be discarded when resetting 477 the state. 478 479 Ensure that those warnings are not lost. 480 481 .. note:: 482 483 Because we are calling `config.warning()` here, those 484 warnings are not filtered by the `-W` parameters: they will all 485 be produced even when `-Wreturn`, `-Wshort-desc`, and/or 486 `-Wcontents-before-sections` are used. 487 488 Allowing those warnings to be filtered is complex, because it 489 would require storing them in a buffer and then filtering them 490 during the output step of the code, depending on the 491 selected symbols. 492 """ 493 if self.entry and self.entry not in self.entries: 494 for log_msg in self.entry.warnings: 495 self.config.warning(log_msg) 496 497 def reset_state(self, ln): 498 """ 499 Ancillary routine to create a new entry. It initializes all 500 variables used by the state machine. 501 """ 502 503 self.emit_unused_warnings() 504 505 self.entry = KernelEntry(self.config, self.fname, ln) 506 507 # State flags 508 self.state = state.NORMAL 509 510 def push_parameter(self, ln, decl_type, param, dtype, 511 org_arg, declaration_name): 512 """ 513 Store parameters and their descriptions at self.entry. 514 """ 515 516 if self.entry.anon_struct_union and dtype == "" and param == "}": 517 return # Ignore the ending }; from anonymous struct/union 518 519 self.entry.anon_struct_union = False 520 521 param = KernRe(r'[\[\)].*').sub('', param, count=1) 522 523 # 524 # Look at various "anonymous type" cases. 525 # 526 if dtype == '': 527 if param.endswith("..."): 528 if len(param) > 3: # there is a name provided, use that 529 param = param[:-3] 530 if not self.entry.parameterdescs.get(param): 531 self.entry.parameterdescs[param] = "variable arguments" 532 533 elif (not param) or param == "void": 534 param = "void" 535 self.entry.parameterdescs[param] = "no arguments" 536 537 elif param in ["struct", "union"]: 538 # Handle unnamed (anonymous) union or struct 539 dtype = param 540 param = "{unnamed_" + param + "}" 541 self.entry.parameterdescs[param] = "anonymous\n" 542 self.entry.anon_struct_union = True 543 544 # Warn if parameter has no description 545 # (but ignore ones starting with # as these are not parameters 546 # but inline preprocessor statements) 547 if param not in self.entry.parameterdescs and not param.startswith("#"): 548 self.entry.parameterdescs[param] = self.undescribed 549 550 if "." not in param: 551 if decl_type == 'function': 552 dname = f"{decl_type} parameter" 553 else: 554 dname = f"{decl_type} member" 555 556 self.emit_msg(ln, 557 f"{dname} '{param}' not described in '{declaration_name}'") 558 559 # Strip spaces from param so that it is one continuous string on 560 # parameterlist. This fixes a problem where check_sections() 561 # cannot find a parameter like "addr[6 + 2]" because it actually 562 # appears as "addr[6", "+", "2]" on the parameter list. 563 # However, it's better to maintain the param string unchanged for 564 # output, so just weaken the string compare in check_sections() 565 # to ignore "[blah" in a parameter string. 566 567 self.entry.parameterlist.append(param) 568 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 569 self.entry.parametertypes[param] = org_arg 570 571 572 def create_parameter_list(self, ln, decl_type, args, 573 splitter, declaration_name): 574 """ 575 Creates a list of parameters, storing them at self.entry. 576 """ 577 578 # temporarily replace all commas inside function pointer definition 579 arg_expr = KernRe(r'(\([^\),]+),') 580 while arg_expr.search(args): 581 args = arg_expr.sub(r"\1#", args) 582 583 for arg in args.split(splitter): 584 # Ignore argument attributes 585 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 586 587 # Strip leading/trailing spaces 588 arg = arg.strip() 589 arg = KernRe(r'\s+').sub(' ', arg, count=1) 590 591 if arg.startswith('#'): 592 # Treat preprocessor directive as a typeless variable just to fill 593 # corresponding data structures "correctly". Catch it later in 594 # output_* subs. 595 596 # Treat preprocessor directive as a typeless variable 597 self.push_parameter(ln, decl_type, arg, "", 598 "", declaration_name) 599 # 600 # The pointer-to-function case. 601 # 602 elif KernRe(r'\(.+\)\s*\(').search(arg): 603 arg = arg.replace('#', ',') 604 r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" 605 r'([\w\[\].]*)' # Capture the name and possible [array] 606 r'\s*\)') # Make sure the trailing ")" is there 607 if r.match(arg): 608 param = r.group(1) 609 else: 610 self.emit_msg(ln, f"Invalid param: {arg}") 611 param = arg 612 dtype = arg.replace(param, '') 613 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 614 # 615 # The array-of-pointers case. Dig the parameter name out from the middle 616 # of the declaration. 617 # 618 elif KernRe(r'\(.+\)\s*\[').search(arg): 619 r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" 620 r'([\w.]*?)' # The actual pointer name 621 r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] 622 if r.match(arg): 623 param = r.group(1) 624 else: 625 self.emit_msg(ln, f"Invalid param: {arg}") 626 param = arg 627 dtype = arg.replace(param, '') 628 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 629 elif arg: 630 # 631 # Clean up extraneous spaces and split the string at commas; the first 632 # element of the resulting list will also include the type information. 633 # 634 arg = KernRe(r'\s*:\s*').sub(":", arg) 635 arg = KernRe(r'\s*\[').sub('[', arg) 636 args = KernRe(r'\s*,\s*').split(arg) 637 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 638 # 639 # args[0] has a string of "type a". If "a" includes an [array] 640 # declaration, we want to not be fooled by any white space inside 641 # the brackets, so detect and handle that case specially. 642 # 643 r = KernRe(r'^([^[\]]*\s+)(.*)$') 644 if r.match(args[0]): 645 args[0] = r.group(2) 646 dtype = r.group(1) 647 else: 648 # No space in args[0]; this seems wrong but preserves previous behavior 649 dtype = '' 650 651 bitfield_re = KernRe(r'(.*?):(\w+)') 652 for param in args: 653 # 654 # For pointers, shift the star(s) from the variable name to the 655 # type declaration. 656 # 657 r = KernRe(r'^(\*+)\s*(.*)') 658 if r.match(param): 659 self.push_parameter(ln, decl_type, r.group(2), 660 f"{dtype} {r.group(1)}", 661 arg, declaration_name) 662 # 663 # Perform a similar shift for bitfields. 664 # 665 elif bitfield_re.search(param): 666 if dtype != "": # Skip unnamed bit-fields 667 self.push_parameter(ln, decl_type, bitfield_re.group(1), 668 f"{dtype}:{bitfield_re.group(2)}", 669 arg, declaration_name) 670 else: 671 self.push_parameter(ln, decl_type, param, dtype, 672 arg, declaration_name) 673 674 def check_sections(self, ln, decl_name, decl_type): 675 """ 676 Check for errors inside sections, emitting warnings if not found 677 parameters are described. 678 """ 679 for section in self.entry.sections: 680 if section not in self.entry.parameterlist and \ 681 not known_sections.search(section): 682 if decl_type == 'function': 683 dname = f"{decl_type} parameter" 684 else: 685 dname = f"{decl_type} member" 686 self.emit_msg(ln, 687 f"Excess {dname} '{section}' description in '{decl_name}'") 688 689 def check_return_section(self, ln, declaration_name, return_type): 690 """ 691 If the function doesn't return void, warns about the lack of a 692 return description. 693 """ 694 695 if not self.config.wreturn: 696 return 697 698 # Ignore an empty return type (It's a macro) 699 # Ignore functions with a "void" return type (but not "void *") 700 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 701 return 702 703 if not self.entry.sections.get("Return", None): 704 self.emit_msg(ln, 705 f"No description found for return value of '{declaration_name}'") 706 707 def split_struct_proto(self, proto): 708 """ 709 Split apart a structure prototype; returns (struct|union, name, 710 members) or ``None``. 711 """ 712 713 type_pattern = r'(struct|union)' 714 qualifiers = [ 715 "__attribute__", 716 "__packed", 717 "__aligned", 718 "____cacheline_aligned_in_smp", 719 "____cacheline_aligned", 720 ] 721 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 722 723 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 724 if r.search(proto): 725 return (r.group(1), r.group(2), r.group(3)) 726 else: 727 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 728 if r.search(proto): 729 return (r.group(1), r.group(3), r.group(2)) 730 return None 731 732 def rewrite_struct_members(self, members): 733 """ 734 Process ``struct``/``union`` members from the most deeply nested 735 outward. 736 737 Rewrite the members of a ``struct`` or ``union`` for easier formatting 738 later on. Among other things, this function will turn a member like:: 739 740 struct { inner_members; } foo; 741 742 into:: 743 744 struct foo; inner_members; 745 """ 746 747 # 748 # The trick is in the ``^{`` below - it prevents a match of an outer 749 # ``struct``/``union`` until the inner one has been munged 750 # (removing the ``{`` in the process). 751 # 752 struct_members = KernRe(r'(struct|union)' # 0: declaration type 753 r'([^\{\};]+)' # 1: possible name 754 r'(\{)' 755 r'([^\{\}]*)' # 3: Contents of declaration 756 r'(\})' 757 r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration 758 tuples = struct_members.findall(members) 759 while tuples: 760 for t in tuples: 761 newmember = "" 762 oldmember = "".join(t) # Reconstruct the original formatting 763 dtype, name, lbr, content, rbr, rest, semi = t 764 # 765 # Pass through each field name, normalizing the form and formatting. 766 # 767 for s_id in rest.split(','): 768 s_id = s_id.strip() 769 newmember += f"{dtype} {s_id}; " 770 # 771 # Remove bitfield/array/pointer info, getting the bare name. 772 # 773 s_id = KernRe(r'[:\[].*').sub('', s_id) 774 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 775 # 776 # Pass through the members of this inner structure/union. 777 # 778 for arg in content.split(';'): 779 arg = arg.strip() 780 # 781 # Look for (type)(*name)(args) - pointer to function 782 # 783 r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') 784 if r.match(arg): 785 dtype, name, extra = r.group(1), r.group(2), r.group(3) 786 # Pointer-to-function 787 if not s_id: 788 # Anonymous struct/union 789 newmember += f"{dtype}{name}{extra}; " 790 else: 791 newmember += f"{dtype}{s_id}.{name}{extra}; " 792 # 793 # Otherwise a non-function member. 794 # 795 else: 796 # 797 # Remove bitmap and array portions and spaces around commas 798 # 799 arg = KernRe(r':\s*\d+\s*').sub('', arg) 800 arg = KernRe(r'\[.*\]').sub('', arg) 801 arg = KernRe(r'\s*,\s*').sub(',', arg) 802 # 803 # Look for a normal decl - "type name[,name...]" 804 # 805 r = KernRe(r'(.*)\s+([\S+,]+)') 806 if r.search(arg): 807 for name in r.group(2).split(','): 808 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) 809 if not s_id: 810 # Anonymous struct/union 811 newmember += f"{r.group(1)} {name}; " 812 else: 813 newmember += f"{r.group(1)} {s_id}.{name}; " 814 else: 815 newmember += f"{arg}; " 816 # 817 # At the end of the s_id loop, replace the original declaration with 818 # the munged version. 819 # 820 members = members.replace(oldmember, newmember) 821 # 822 # End of the tuple loop - search again and see if there are outer members 823 # that now turn up. 824 # 825 tuples = struct_members.findall(members) 826 return members 827 828 def format_struct_decl(self, declaration): 829 """ 830 Format the ``struct`` declaration into a standard form for inclusion 831 in the resulting docs. 832 """ 833 834 # 835 # Insert newlines, get rid of extra spaces. 836 # 837 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 838 declaration = KernRe(r'\}\s+;').sub('};', declaration) 839 # 840 # Format inline enums with each member on its own line. 841 # 842 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 843 while r.search(declaration): 844 declaration = r.sub(r'\1,\n\2', declaration) 845 # 846 # Now go through and supply the right number of tabs 847 # for each line. 848 # 849 def_args = declaration.split('\n') 850 level = 1 851 declaration = "" 852 for clause in def_args: 853 clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) 854 if clause: 855 if '}' in clause and level > 1: 856 level -= 1 857 if not clause.startswith('#'): 858 declaration += "\t" * level 859 declaration += "\t" + clause + "\n" 860 if "{" in clause and "}" not in clause: 861 level += 1 862 return declaration 863 864 865 def dump_struct(self, ln, proto): 866 """ 867 Store an entry for a ``struct`` or ``union`` 868 """ 869 # 870 # Do the basic parse to get the pieces of the declaration. 871 # 872 struct_parts = self.split_struct_proto(proto) 873 if not struct_parts: 874 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 875 return 876 decl_type, declaration_name, members = struct_parts 877 878 if self.entry.identifier != declaration_name: 879 self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " 880 f"Prototype was for {decl_type} {declaration_name} instead\n") 881 return 882 # 883 # Go through the list of members applying all of our transformations. 884 # 885 members = trim_private_members(members) 886 members = apply_transforms(struct_xforms, members) 887 888 nested = NestedMatch() 889 for search, sub in struct_nested_prefixes: 890 members = nested.sub(search, sub, members) 891 # 892 # Deal with embedded struct and union members, and drop enums entirely. 893 # 894 declaration = members 895 members = self.rewrite_struct_members(members) 896 members = re.sub(r'(\{[^\{\}]*\})', '', members) 897 # 898 # Output the result and we are done. 899 # 900 self.create_parameter_list(ln, decl_type, members, ';', 901 declaration_name) 902 self.check_sections(ln, declaration_name, decl_type) 903 self.output_declaration(decl_type, declaration_name, 904 definition=self.format_struct_decl(declaration), 905 purpose=self.entry.declaration_purpose) 906 907 def dump_enum(self, ln, proto): 908 """ 909 Store an ``enum`` inside self.entries array. 910 """ 911 # 912 # Strip preprocessor directives. Note that this depends on the 913 # trailing semicolon we added in process_proto_type(). 914 # 915 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 916 # 917 # Parse out the name and members of the enum. Typedef form first. 918 # 919 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 920 if r.search(proto): 921 declaration_name = r.group(2) 922 members = trim_private_members(r.group(1)) 923 # 924 # Failing that, look for a straight enum 925 # 926 else: 927 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 928 if r.match(proto): 929 declaration_name = r.group(1) 930 members = trim_private_members(r.group(2)) 931 # 932 # OK, this isn't going to work. 933 # 934 else: 935 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 936 return 937 # 938 # Make sure we found what we were expecting. 939 # 940 if self.entry.identifier != declaration_name: 941 if self.entry.identifier == "": 942 self.emit_msg(ln, 943 f"{proto}: wrong kernel-doc identifier on prototype") 944 else: 945 self.emit_msg(ln, 946 f"expecting prototype for enum {self.entry.identifier}. " 947 f"Prototype was for enum {declaration_name} instead") 948 return 949 950 if not declaration_name: 951 declaration_name = "(anonymous)" 952 # 953 # Parse out the name of each enum member, and verify that we 954 # have a description for it. 955 # 956 member_set = set() 957 members = KernRe(r'\([^;)]*\)').sub('', members) 958 for arg in members.split(','): 959 if not arg: 960 continue 961 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 962 self.entry.parameterlist.append(arg) 963 if arg not in self.entry.parameterdescs: 964 self.entry.parameterdescs[arg] = self.undescribed 965 self.emit_msg(ln, 966 f"Enum value '{arg}' not described in enum '{declaration_name}'") 967 member_set.add(arg) 968 # 969 # Ensure that every described member actually exists in the enum. 970 # 971 for k in self.entry.parameterdescs: 972 if k not in member_set: 973 self.emit_msg(ln, 974 f"Excess enum value '@{k}' description in '{declaration_name}'") 975 976 self.output_declaration('enum', declaration_name, 977 purpose=self.entry.declaration_purpose) 978 979 def dump_var(self, ln, proto): 980 """ 981 Store variables that are part of kAPI. 982 """ 983 VAR_ATTRIBS = [ 984 "extern", 985 "const", 986 ] 987 OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*" 988 989 # 990 # Store the full prototype before modifying it 991 # 992 full_proto = proto 993 declaration_name = None 994 995 # 996 # Handle macro definitions 997 # 998 macro_prefixes = [ 999 KernRe(r"DEFINE_[\w_]+\s*\(([\w_]+)\)"), 1000 ] 1001 1002 for r in macro_prefixes: 1003 match = r.search(proto) 1004 if match: 1005 declaration_name = match.group(1) 1006 break 1007 1008 # 1009 # Drop comments and macros to have a pure C prototype 1010 # 1011 if not declaration_name: 1012 for r, sub in var_xforms: 1013 proto = r.sub(sub, proto) 1014 1015 proto = proto.rstrip() 1016 1017 # 1018 # Variable name is at the end of the declaration 1019 # 1020 1021 default_val = None 1022 1023 r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") 1024 if r.match(proto): 1025 if not declaration_name: 1026 declaration_name = r.group(1) 1027 1028 default_val = r.group(2) 1029 else: 1030 r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") 1031 1032 if r.match(proto): 1033 default_val = r.group(1) 1034 if not declaration_name: 1035 self.emit_msg(ln,f"{proto}: can't parse variable") 1036 return 1037 1038 if default_val: 1039 default_val = default_val.lstrip("=").strip() 1040 1041 self.output_declaration("var", declaration_name, 1042 full_proto=full_proto, 1043 default_val=default_val, 1044 purpose=self.entry.declaration_purpose) 1045 1046 def dump_declaration(self, ln, prototype): 1047 """ 1048 Store a data declaration inside self.entries array. 1049 """ 1050 1051 if self.entry.decl_type == "enum": 1052 self.dump_enum(ln, prototype) 1053 elif self.entry.decl_type == "typedef": 1054 self.dump_typedef(ln, prototype) 1055 elif self.entry.decl_type in ["union", "struct"]: 1056 self.dump_struct(ln, prototype) 1057 elif self.entry.decl_type == "var": 1058 self.dump_var(ln, prototype) 1059 else: 1060 # This would be a bug 1061 self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') 1062 1063 def dump_function(self, ln, prototype): 1064 """ 1065 Store a function or function macro inside self.entries array. 1066 """ 1067 1068 found = func_macro = False 1069 return_type = '' 1070 decl_type = 'function' 1071 1072 # 1073 # If we have a macro, remove the "#define" at the front. 1074 # 1075 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 1076 if new_proto != prototype: 1077 prototype = new_proto 1078 # 1079 # Dispense with the simple "#define A B" case here; the key 1080 # is the space after the name of the symbol being defined. 1081 # NOTE that the seemingly misnamed "func_macro" indicates a 1082 # macro *without* arguments. 1083 # 1084 r = KernRe(r'^(\w+)\s+') 1085 if r.search(prototype): 1086 return_type = '' 1087 declaration_name = r.group(1) 1088 func_macro = True 1089 found = True 1090 else: 1091 # 1092 # Apply the initial transformations. 1093 # 1094 prototype = apply_transforms(function_xforms, prototype) 1095 1096 # Yes, this truly is vile. We are looking for: 1097 # 1. Return type (may be nothing if we're looking at a macro) 1098 # 2. Function name 1099 # 3. Function parameters. 1100 # 1101 # All the while we have to watch out for function pointer parameters 1102 # (which IIRC is what the two sections are for), C types (these 1103 # regexps don't even start to express all the possibilities), and 1104 # so on. 1105 # 1106 # If you mess with these regexps, it's a good idea to check that 1107 # the following functions' documentation still comes out right: 1108 # - parport_register_device (function pointer parameters) 1109 # - atomic_set (macro) 1110 # - pci_match_device, __copy_to_user (long return type) 1111 1112 name = r'\w+' 1113 type1 = r'(?:[\w\s]+)?' 1114 type2 = r'(?:[\w\s]+\*+)+' 1115 # 1116 # Attempt to match first on (args) with no internal parentheses; this 1117 # lets us easily filter out __acquires() and other post-args stuff. If 1118 # that fails, just grab the rest of the line to the last closing 1119 # parenthesis. 1120 # 1121 proto_args = r'\(([^\(]*|.*)\)' 1122 # 1123 # (Except for the simple macro case) attempt to split up the prototype 1124 # in the various ways we understand. 1125 # 1126 if not found: 1127 patterns = [ 1128 rf'^()({name})\s*{proto_args}', 1129 rf'^({type1})\s+({name})\s*{proto_args}', 1130 rf'^({type2})\s*({name})\s*{proto_args}', 1131 ] 1132 1133 for p in patterns: 1134 r = KernRe(p) 1135 if r.match(prototype): 1136 return_type = r.group(1) 1137 declaration_name = r.group(2) 1138 args = r.group(3) 1139 self.create_parameter_list(ln, decl_type, args, ',', 1140 declaration_name) 1141 found = True 1142 break 1143 # 1144 # Parsing done; make sure that things are as we expect. 1145 # 1146 if not found: 1147 self.emit_msg(ln, 1148 f"cannot understand function prototype: '{prototype}'") 1149 return 1150 if self.entry.identifier != declaration_name: 1151 self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " 1152 f"Prototype was for {declaration_name}() instead") 1153 return 1154 self.check_sections(ln, declaration_name, "function") 1155 self.check_return_section(ln, declaration_name, return_type) 1156 # 1157 # Store the result. 1158 # 1159 self.output_declaration(decl_type, declaration_name, 1160 typedef=('typedef' in return_type), 1161 functiontype=return_type, 1162 purpose=self.entry.declaration_purpose, 1163 func_macro=func_macro) 1164 1165 1166 def dump_typedef(self, ln, proto): 1167 """ 1168 Store a ``typedef`` inside self.entries array. 1169 """ 1170 # 1171 # We start by looking for function typedefs. 1172 # 1173 typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1174 typedef_ident = r'\*?\s*(\w\S+)\s*' 1175 typedef_args = r'\s*\((.*)\);' 1176 1177 typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1178 typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) 1179 1180 # Parse function typedef prototypes 1181 for r in [typedef1, typedef2]: 1182 if not r.match(proto): 1183 continue 1184 1185 return_type = r.group(1).strip() 1186 declaration_name = r.group(2) 1187 args = r.group(3) 1188 1189 if self.entry.identifier != declaration_name: 1190 self.emit_msg(ln, 1191 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1192 return 1193 1194 self.create_parameter_list(ln, 'function', args, ',', declaration_name) 1195 1196 self.output_declaration('function', declaration_name, 1197 typedef=True, 1198 functiontype=return_type, 1199 purpose=self.entry.declaration_purpose) 1200 return 1201 # 1202 # Not a function, try to parse a simple typedef. 1203 # 1204 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1205 if r.match(proto): 1206 declaration_name = r.group(1) 1207 1208 if self.entry.identifier != declaration_name: 1209 self.emit_msg(ln, 1210 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1211 return 1212 1213 self.output_declaration('typedef', declaration_name, 1214 purpose=self.entry.declaration_purpose) 1215 return 1216 1217 self.emit_msg(ln, "error: Cannot parse typedef!") 1218 1219 @staticmethod 1220 def process_export(function_set, line): 1221 """ 1222 process ``EXPORT_SYMBOL*`` tags 1223 1224 This method doesn't use any variable from the class, so declare it 1225 with a staticmethod decorator. 1226 """ 1227 1228 # We support documenting some exported symbols with different 1229 # names. A horrible hack. 1230 suffixes = [ '_noprof' ] 1231 1232 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1233 # multiple export lines would violate Kernel coding style. 1234 1235 if export_symbol.search(line): 1236 symbol = export_symbol.group(2) 1237 elif export_symbol_ns.search(line): 1238 symbol = export_symbol_ns.group(2) 1239 else: 1240 return False 1241 # 1242 # Found an export, trim out any special suffixes 1243 # 1244 for suffix in suffixes: 1245 # Be backward compatible with Python < 3.9 1246 if symbol.endswith(suffix): 1247 symbol = symbol[:-len(suffix)] 1248 function_set.add(symbol) 1249 return True 1250 1251 def process_normal(self, ln, line): 1252 """ 1253 STATE_NORMAL: looking for the ``/**`` to begin everything. 1254 """ 1255 1256 if not doc_start.match(line): 1257 return 1258 1259 # start a new entry 1260 self.reset_state(ln) 1261 1262 # next line is always the function name 1263 self.state = state.NAME 1264 1265 def process_name(self, ln, line): 1266 """ 1267 STATE_NAME: Looking for the "name - description" line 1268 """ 1269 # 1270 # Check for a DOC: block and handle them specially. 1271 # 1272 if doc_block.search(line): 1273 1274 if not doc_block.group(1): 1275 self.entry.begin_section(ln, "Introduction") 1276 else: 1277 self.entry.begin_section(ln, doc_block.group(1)) 1278 1279 self.entry.identifier = self.entry.section 1280 self.state = state.DOCBLOCK 1281 # 1282 # Otherwise we're looking for a normal kerneldoc declaration line. 1283 # 1284 elif doc_decl.search(line): 1285 self.entry.identifier = doc_decl.group(1) 1286 1287 # Test for data declaration 1288 if doc_begin_data.search(line): 1289 self.entry.decl_type = doc_begin_data.group(1) 1290 self.entry.identifier = doc_begin_data.group(2) 1291 # 1292 # Look for a function description 1293 # 1294 elif doc_begin_func.search(line): 1295 self.entry.identifier = doc_begin_func.group(1) 1296 self.entry.decl_type = "function" 1297 # 1298 # We struck out. 1299 # 1300 else: 1301 self.emit_msg(ln, 1302 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}") 1303 self.state = state.NORMAL 1304 return 1305 # 1306 # OK, set up for a new kerneldoc entry. 1307 # 1308 self.state = state.BODY 1309 self.entry.identifier = self.entry.identifier.strip(" ") 1310 # if there's no @param blocks need to set up default section here 1311 self.entry.begin_section(ln + 1) 1312 # 1313 # Find the description portion, which *should* be there but 1314 # isn't always. 1315 # (We should be able to capture this from the previous parsing - someday) 1316 # 1317 r = KernRe("[-:](.*)") 1318 if r.search(line): 1319 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1320 self.state = state.DECLARATION 1321 else: 1322 self.entry.declaration_purpose = "" 1323 1324 if not self.entry.declaration_purpose and self.config.wshort_desc: 1325 self.emit_msg(ln, 1326 f"missing initial short description on line:\n{line}") 1327 1328 if not self.entry.identifier and self.entry.decl_type != "enum": 1329 self.emit_msg(ln, 1330 f"wrong kernel-doc identifier on line:\n{line}") 1331 self.state = state.NORMAL 1332 1333 if self.config.verbose: 1334 self.emit_msg(ln, 1335 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1336 warning=False) 1337 # 1338 # Failed to find an identifier. Emit a warning 1339 # 1340 else: 1341 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1342 1343 def is_new_section(self, ln, line): 1344 """ 1345 Helper function to determine if a new section is being started. 1346 """ 1347 if doc_sect.search(line): 1348 self.state = state.BODY 1349 # 1350 # Pick out the name of our new section, tweaking it if need be. 1351 # 1352 newsection = doc_sect.group(1) 1353 if newsection.lower() == 'description': 1354 newsection = 'Description' 1355 elif newsection.lower() == 'context': 1356 newsection = 'Context' 1357 self.state = state.SPECIAL_SECTION 1358 elif newsection.lower() in ["@return", "@returns", 1359 "return", "returns"]: 1360 newsection = "Return" 1361 self.state = state.SPECIAL_SECTION 1362 elif newsection[0] == '@': 1363 self.state = state.SPECIAL_SECTION 1364 # 1365 # Initialize the contents, and get the new section going. 1366 # 1367 newcontents = doc_sect.group(2) 1368 if not newcontents: 1369 newcontents = "" 1370 self.dump_section() 1371 self.entry.begin_section(ln, newsection) 1372 self.entry.leading_space = None 1373 1374 self.entry.add_text(newcontents.lstrip()) 1375 return True 1376 return False 1377 1378 def is_comment_end(self, ln, line): 1379 """ 1380 Helper function to detect (and effect) the end of a kerneldoc comment. 1381 """ 1382 if doc_end.search(line): 1383 self.dump_section() 1384 1385 # Look for doc_com + <text> + doc_end: 1386 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') 1387 if r.match(line): 1388 self.emit_msg(ln, f"suspicious ending line: {line}") 1389 1390 self.entry.prototype = "" 1391 self.entry.new_start_line = ln + 1 1392 1393 self.state = state.PROTO 1394 return True 1395 return False 1396 1397 1398 def process_decl(self, ln, line): 1399 """ 1400 STATE_DECLARATION: We've seen the beginning of a declaration. 1401 """ 1402 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1403 return 1404 # 1405 # Look for anything with the " * " line beginning. 1406 # 1407 if doc_content.search(line): 1408 cont = doc_content.group(1) 1409 # 1410 # A blank line means that we have moved out of the declaration 1411 # part of the comment (without any "special section" parameter 1412 # descriptions). 1413 # 1414 if cont == "": 1415 self.state = state.BODY 1416 # 1417 # Otherwise we have more of the declaration section to soak up. 1418 # 1419 else: 1420 self.entry.declaration_purpose = \ 1421 trim_whitespace(self.entry.declaration_purpose + ' ' + cont) 1422 else: 1423 # Unknown line, ignore 1424 self.emit_msg(ln, f"bad line: {line}") 1425 1426 1427 def process_special(self, ln, line): 1428 """ 1429 STATE_SPECIAL_SECTION: a section ending with a blank line. 1430 """ 1431 # 1432 # If we have hit a blank line (only the " * " marker), then this 1433 # section is done. 1434 # 1435 if KernRe(r"\s*\*\s*$").match(line): 1436 self.entry.begin_section(ln, dump = True) 1437 self.state = state.BODY 1438 return 1439 # 1440 # Not a blank line, look for the other ways to end the section. 1441 # 1442 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1443 return 1444 # 1445 # OK, we should have a continuation of the text for this section. 1446 # 1447 if doc_content.search(line): 1448 cont = doc_content.group(1) 1449 # 1450 # If the lines of text after the first in a special section have 1451 # leading white space, we need to trim it out or Sphinx will get 1452 # confused. For the second line (the None case), see what we 1453 # find there and remember it. 1454 # 1455 if self.entry.leading_space is None: 1456 r = KernRe(r'^(\s+)') 1457 if r.match(cont): 1458 self.entry.leading_space = len(r.group(1)) 1459 else: 1460 self.entry.leading_space = 0 1461 # 1462 # Otherwise, before trimming any leading chars, be *sure* 1463 # that they are white space. We should maybe warn if this 1464 # isn't the case. 1465 # 1466 for i in range(0, self.entry.leading_space): 1467 if cont[i] != " ": 1468 self.entry.leading_space = i 1469 break 1470 # 1471 # Add the trimmed result to the section and we're done. 1472 # 1473 self.entry.add_text(cont[self.entry.leading_space:]) 1474 else: 1475 # Unknown line, ignore 1476 self.emit_msg(ln, f"bad line: {line}") 1477 1478 def process_body(self, ln, line): 1479 """ 1480 STATE_BODY: the bulk of a kerneldoc comment. 1481 """ 1482 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1483 return 1484 1485 if doc_content.search(line): 1486 cont = doc_content.group(1) 1487 self.entry.add_text(cont) 1488 else: 1489 # Unknown line, ignore 1490 self.emit_msg(ln, f"bad line: {line}") 1491 1492 def process_inline_name(self, ln, line): 1493 """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" 1494 1495 if doc_inline_sect.search(line): 1496 self.entry.begin_section(ln, doc_inline_sect.group(1)) 1497 self.entry.add_text(doc_inline_sect.group(2).lstrip()) 1498 self.state = state.INLINE_TEXT 1499 elif doc_inline_end.search(line): 1500 self.dump_section() 1501 self.state = state.PROTO 1502 elif doc_content.search(line): 1503 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") 1504 self.state = state.PROTO 1505 # else ... ?? 1506 1507 def process_inline_text(self, ln, line): 1508 """STATE_INLINE_TEXT: docbook comments within a prototype.""" 1509 1510 if doc_inline_end.search(line): 1511 self.dump_section() 1512 self.state = state.PROTO 1513 elif doc_content.search(line): 1514 self.entry.add_text(doc_content.group(1)) 1515 # else ... ?? 1516 1517 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1518 """ 1519 Handle syscall definitions. 1520 """ 1521 1522 is_void = False 1523 1524 # Strip newlines/CR's 1525 proto = re.sub(r'[\r\n]+', ' ', proto) 1526 1527 # Check if it's a SYSCALL_DEFINE0 1528 if 'SYSCALL_DEFINE0' in proto: 1529 is_void = True 1530 1531 # Replace SYSCALL_DEFINE with correct return type & function name 1532 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1533 1534 r = KernRe(r'long\s+(sys_.*?),') 1535 if r.search(proto): 1536 proto = KernRe(',').sub('(', proto, count=1) 1537 elif is_void: 1538 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1539 1540 # Now delete all of the odd-numbered commas in the proto 1541 # so that argument types & names don't have a comma between them 1542 count = 0 1543 length = len(proto) 1544 1545 if is_void: 1546 length = 0 # skip the loop if is_void 1547 1548 for ix in range(length): 1549 if proto[ix] == ',': 1550 count += 1 1551 if count % 2 == 1: 1552 proto = proto[:ix] + ' ' + proto[ix + 1:] 1553 1554 return proto 1555 1556 def tracepoint_munge(self, ln, proto): 1557 """ 1558 Handle tracepoint definitions. 1559 """ 1560 1561 tracepointname = None 1562 tracepointargs = None 1563 1564 # Match tracepoint name based on different patterns 1565 r = KernRe(r'TRACE_EVENT\((.*?),') 1566 if r.search(proto): 1567 tracepointname = r.group(1) 1568 1569 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1570 if r.search(proto): 1571 tracepointname = r.group(1) 1572 1573 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1574 if r.search(proto): 1575 tracepointname = r.group(2) 1576 1577 if tracepointname: 1578 tracepointname = tracepointname.lstrip() 1579 1580 r = KernRe(r'TP_PROTO\((.*?)\)') 1581 if r.search(proto): 1582 tracepointargs = r.group(1) 1583 1584 if not tracepointname or not tracepointargs: 1585 self.emit_msg(ln, 1586 f"Unrecognized tracepoint format:\n{proto}\n") 1587 else: 1588 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1589 self.entry.identifier = f"trace_{self.entry.identifier}" 1590 1591 return proto 1592 1593 def process_proto_function(self, ln, line): 1594 """Ancillary routine to process a function prototype.""" 1595 1596 # strip C99-style comments to end of line 1597 line = KernRe(r"//.*$", re.S).sub('', line) 1598 # 1599 # Soak up the line's worth of prototype text, stopping at { or ; if present. 1600 # 1601 if KernRe(r'\s*#\s*define').match(line): 1602 self.entry.prototype = line 1603 elif not line.startswith('#'): # skip other preprocessor stuff 1604 r = KernRe(r'([^\{]*)') 1605 if r.match(line): 1606 self.entry.prototype += r.group(1) + " " 1607 # 1608 # If we now have the whole prototype, clean it up and declare victory. 1609 # 1610 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1611 # strip comments and surrounding spaces 1612 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() 1613 # 1614 # Handle self.entry.prototypes for function pointers like: 1615 # int (*pcs_config)(struct foo) 1616 # by turning it into 1617 # int pcs_config(struct foo) 1618 # 1619 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1620 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1621 # 1622 # Handle special declaration syntaxes 1623 # 1624 if 'SYSCALL_DEFINE' in self.entry.prototype: 1625 self.entry.prototype = self.syscall_munge(ln, 1626 self.entry.prototype) 1627 else: 1628 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1629 if r.search(self.entry.prototype): 1630 self.entry.prototype = self.tracepoint_munge(ln, 1631 self.entry.prototype) 1632 # 1633 # ... and we're done 1634 # 1635 self.dump_function(ln, self.entry.prototype) 1636 self.reset_state(ln) 1637 1638 def process_proto_type(self, ln, line): 1639 """ 1640 Ancillary routine to process a type. 1641 """ 1642 1643 # Strip C99-style comments and surrounding whitespace 1644 line = KernRe(r"//.*$", re.S).sub('', line).strip() 1645 if not line: 1646 return # nothing to see here 1647 1648 # To distinguish preprocessor directive from regular declaration later. 1649 if line.startswith('#'): 1650 line += ";" 1651 # 1652 # Split the declaration on any of { } or ;, and accumulate pieces 1653 # until we hit a semicolon while not inside {brackets} 1654 # 1655 r = KernRe(r'(.*?)([{};])') 1656 for chunk in r.split(line): 1657 if chunk: # Ignore empty matches 1658 self.entry.prototype += chunk 1659 # 1660 # This cries out for a match statement ... someday after we can 1661 # drop Python 3.9 ... 1662 # 1663 if chunk == '{': 1664 self.entry.brcount += 1 1665 elif chunk == '}': 1666 self.entry.brcount -= 1 1667 elif chunk == ';' and self.entry.brcount <= 0: 1668 self.dump_declaration(ln, self.entry.prototype) 1669 self.reset_state(ln) 1670 return 1671 # 1672 # We hit the end of the line while still in the declaration; put 1673 # in a space to represent the newline. 1674 # 1675 self.entry.prototype += ' ' 1676 1677 def process_proto(self, ln, line): 1678 """STATE_PROTO: reading a function/whatever prototype.""" 1679 1680 if doc_inline_oneline.search(line): 1681 self.entry.begin_section(ln, doc_inline_oneline.group(1)) 1682 self.entry.add_text(doc_inline_oneline.group(2)) 1683 self.dump_section() 1684 1685 elif doc_inline_start.search(line): 1686 self.state = state.INLINE_NAME 1687 1688 elif self.entry.decl_type == 'function': 1689 self.process_proto_function(ln, line) 1690 1691 else: 1692 self.process_proto_type(ln, line) 1693 1694 def process_docblock(self, ln, line): 1695 """STATE_DOCBLOCK: within a ``DOC:`` block.""" 1696 1697 if doc_end.search(line): 1698 self.dump_section() 1699 self.output_declaration("doc", self.entry.identifier) 1700 self.reset_state(ln) 1701 1702 elif doc_content.search(line): 1703 self.entry.add_text(doc_content.group(1)) 1704 1705 def parse_export(self): 1706 """ 1707 Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file. 1708 """ 1709 1710 export_table = set() 1711 1712 try: 1713 with open(self.fname, "r", encoding="utf8", 1714 errors="backslashreplace") as fp: 1715 1716 for line in fp: 1717 self.process_export(export_table, line) 1718 1719 except IOError: 1720 return None 1721 1722 return export_table 1723 1724 #: The state/action table telling us which function to invoke in each state. 1725 state_actions = { 1726 state.NORMAL: process_normal, 1727 state.NAME: process_name, 1728 state.BODY: process_body, 1729 state.DECLARATION: process_decl, 1730 state.SPECIAL_SECTION: process_special, 1731 state.INLINE_NAME: process_inline_name, 1732 state.INLINE_TEXT: process_inline_text, 1733 state.PROTO: process_proto, 1734 state.DOCBLOCK: process_docblock, 1735 } 1736 1737 def parse_kdoc(self): 1738 """ 1739 Open and process each line of a C source file. 1740 The parsing is controlled via a state machine, and the line is passed 1741 to a different process function depending on the state. The process 1742 function may update the state as needed. 1743 1744 Besides parsing kernel-doc tags, it also parses export symbols. 1745 """ 1746 1747 prev = "" 1748 prev_ln = None 1749 export_table = set() 1750 1751 try: 1752 with open(self.fname, "r", encoding="utf8", 1753 errors="backslashreplace") as fp: 1754 for ln, line in enumerate(fp): 1755 1756 line = line.expandtabs().strip("\n") 1757 1758 # Group continuation lines on prototypes 1759 if self.state == state.PROTO: 1760 if line.endswith("\\"): 1761 prev += line.rstrip("\\") 1762 if not prev_ln: 1763 prev_ln = ln 1764 continue 1765 1766 if prev: 1767 ln = prev_ln 1768 line = prev + line 1769 prev = "" 1770 prev_ln = None 1771 1772 self.config.log.debug("%d %s: %s", 1773 ln, state.name[self.state], 1774 line) 1775 1776 # This is an optimization over the original script. 1777 # There, when export_file was used for the same file, 1778 # it was read twice. Here, we use the already-existing 1779 # loop to parse exported symbols as well. 1780 # 1781 if (self.state != state.NORMAL) or \ 1782 not self.process_export(export_table, line): 1783 # Hand this line to the appropriate state handler 1784 self.state_actions[self.state](self, ln, line) 1785 1786 self.emit_unused_warnings() 1787 1788 except OSError: 1789 self.config.log.error(f"Error: Cannot open file {self.fname}") 1790 1791 return export_table, self.entries 1792