1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8kdoc_parser 9=========== 10 11Read a C language source or header FILE and extract embedded 12documentation comments 13""" 14 15import sys 16import re 17from pprint import pformat 18 19from kdoc.kdoc_re import NestedMatch, KernRe 20from kdoc.kdoc_item import KdocItem 21 22# 23# Regular expressions used to parse kernel-doc markups at KernelDoc class. 24# 25# Let's declare them in lowercase outside any class to make easier to 26# convert from the python script. 27# 28# As those are evaluated at the beginning, no need to cache them 29# 30 31# Allow whitespace at end of comment start. 32doc_start = KernRe(r'^/\*\*\s*$', cache=False) 33 34doc_end = KernRe(r'\*/', cache=False) 35doc_com = KernRe(r'\s*\*\s*', cache=False) 36doc_com_body = KernRe(r'\s*\* ?', cache=False) 37doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 38 39# @params and a strictly limited set of supported section names 40# Specifically: 41# Match @word: 42# @...: 43# @{section-name}: 44# while trying to not match literal block starts like "example::" 45# 46known_section_names = 'description|context|returns?|notes?|examples?' 47known_sections = KernRe(known_section_names, flags = re.I) 48doc_sect = doc_com + \ 49 KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', 50 flags=re.I, cache=False) 51 52doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 53doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 54doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 55doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 56doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) 57 58export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 59export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 60 61type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 62 63# 64# Tests for the beginning of a kerneldoc block in its various forms. 65# 66doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 67doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) 68doc_begin_func = KernRe(str(doc_com) + # initial " * ' 69 r"(?:\w+\s*\*\s*)?" + # type (not captured) 70 r'(?:define\s+)?' + # possible "define" (not captured) 71 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 72 r'(?:[-:].*)?$', # description (not captured) 73 cache = False) 74 75# 76# Here begins a long set of transformations to turn structure member prefixes 77# and macro invocations into something we can parse and generate kdoc for. 78# 79struct_args_pattern = r'([^,)]+)' 80 81struct_xforms = [ 82 # Strip attributes 83 (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), 84 (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), 85 (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), 86 (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), 87 (KernRe(r'\s*__packed\s*', re.S), ' '), 88 (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), 89 (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), 90 (KernRe(r'\s*____cacheline_aligned', re.S), ' '), 91 (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), 92 # 93 # Unwrap struct_group macros based on this definition: 94 # __struct_group(TAG, NAME, ATTRS, MEMBERS...) 95 # which has variants like: struct_group(NAME, MEMBERS...) 96 # Only MEMBERS arguments require documentation. 97 # 98 # Parsing them happens on two steps: 99 # 100 # 1. drop struct group arguments that aren't at MEMBERS, 101 # storing them as STRUCT_GROUP(MEMBERS) 102 # 103 # 2. remove STRUCT_GROUP() ancillary macro. 104 # 105 # The original logic used to remove STRUCT_GROUP() using an 106 # advanced regex: 107 # 108 # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; 109 # 110 # with two patterns that are incompatible with 111 # Python re module, as it has: 112 # 113 # - a recursive pattern: (?1) 114 # - an atomic grouping: (?>...) 115 # 116 # I tried a simpler version: but it didn't work either: 117 # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; 118 # 119 # As it doesn't properly match the end parenthesis on some cases. 120 # 121 # So, a better solution was crafted: there's now a NestedMatch 122 # class that ensures that delimiters after a search are properly 123 # matched. So, the implementation to drop STRUCT_GROUP() will be 124 # handled in separate. 125 # 126 (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), 127 (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), 128 (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), 129 (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), 130 # 131 # Replace macros 132 # 133 # TODO: use NestedMatch for FOO($1, $2, ...) matches 134 # 135 # it is better to also move those to the NestedMatch logic, 136 # to ensure that parenthesis will be properly matched. 137 # 138 (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), 139 r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), 140 (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), 141 r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), 142 (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', 143 re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), 144 (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', 145 re.S), r'unsigned long \1[1 << ((\2) - 1)]'), 146 (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + 147 r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), 148 (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + 149 struct_args_pattern + r'\)', re.S), r'\2 *\1'), 150 (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + 151 struct_args_pattern + r'\)', re.S), r'\1 \2[]'), 152 (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), 153 (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), 154] 155# 156# Regexes here are guaranteed to have the end limiter matching 157# the start delimiter. Yet, right now, only one replace group 158# is allowed. 159# 160struct_nested_prefixes = [ 161 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), 162] 163 164# 165# Transforms for function prototypes 166# 167function_xforms = [ 168 (KernRe(r"^static +"), ""), 169 (KernRe(r"^extern +"), ""), 170 (KernRe(r"^asmlinkage +"), ""), 171 (KernRe(r"^inline +"), ""), 172 (KernRe(r"^__inline__ +"), ""), 173 (KernRe(r"^__inline +"), ""), 174 (KernRe(r"^__always_inline +"), ""), 175 (KernRe(r"^noinline +"), ""), 176 (KernRe(r"^__FORTIFY_INLINE +"), ""), 177 (KernRe(r"__init +"), ""), 178 (KernRe(r"__init_or_module +"), ""), 179 (KernRe(r"__deprecated +"), ""), 180 (KernRe(r"__flatten +"), ""), 181 (KernRe(r"__meminit +"), ""), 182 (KernRe(r"__must_check +"), ""), 183 (KernRe(r"__weak +"), ""), 184 (KernRe(r"__sched +"), ""), 185 (KernRe(r"_noprof"), ""), 186 (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), 187 (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), 188 (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), 189 (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), 190 (KernRe(r"__attribute_const__ +"), ""), 191 (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), 192] 193 194# 195# Apply a set of transforms to a block of text. 196# 197def apply_transforms(xforms, text): 198 for search, subst in xforms: 199 text = search.sub(subst, text) 200 return text 201 202# 203# A little helper to get rid of excess white space 204# 205multi_space = KernRe(r'\s\s+') 206def trim_whitespace(s): 207 return multi_space.sub(' ', s.strip()) 208 209# 210# Remove struct/enum members that have been marked "private". 211# 212def trim_private_members(text): 213 # 214 # First look for a "public:" block that ends a private region, then 215 # handle the "private until the end" case. 216 # 217 text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) 218 text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) 219 # 220 # We needed the comments to do the above, but now we can take them out. 221 # 222 return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() 223 224class state: 225 """ 226 State machine enums 227 """ 228 229 # Parser states 230 NORMAL = 0 # normal code 231 NAME = 1 # looking for function name 232 DECLARATION = 2 # We have seen a declaration which might not be done 233 BODY = 3 # the body of the comment 234 SPECIAL_SECTION = 4 # doc section ending with a blank line 235 PROTO = 5 # scanning prototype 236 DOCBLOCK = 6 # documentation block 237 INLINE_NAME = 7 # gathering doc outside main block 238 INLINE_TEXT = 8 # reading the body of inline docs 239 240 name = [ 241 "NORMAL", 242 "NAME", 243 "DECLARATION", 244 "BODY", 245 "SPECIAL_SECTION", 246 "PROTO", 247 "DOCBLOCK", 248 "INLINE_NAME", 249 "INLINE_TEXT", 250 ] 251 252 253SECTION_DEFAULT = "Description" # default section 254 255class KernelEntry: 256 257 def __init__(self, config, fname, ln): 258 self.config = config 259 self.fname = fname 260 261 self._contents = [] 262 self.prototype = "" 263 264 self.warnings = [] 265 266 self.parameterlist = [] 267 self.parameterdescs = {} 268 self.parametertypes = {} 269 self.parameterdesc_start_lines = {} 270 271 self.section_start_lines = {} 272 self.sections = {} 273 274 self.anon_struct_union = False 275 276 self.leading_space = None 277 278 self.fname = fname 279 280 # State flags 281 self.brcount = 0 282 self.declaration_start_line = ln + 1 283 284 # 285 # Management of section contents 286 # 287 def add_text(self, text): 288 self._contents.append(text) 289 290 def contents(self): 291 return '\n'.join(self._contents) + '\n' 292 293 # TODO: rename to emit_message after removal of kernel-doc.pl 294 def emit_msg(self, ln, msg, *, warning=True): 295 """Emit a message""" 296 297 log_msg = f"{self.fname}:{ln} {msg}" 298 299 if not warning: 300 self.config.log.info(log_msg) 301 return 302 303 # Delegate warning output to output logic, as this way it 304 # will report warnings/info only for symbols that are output 305 306 self.warnings.append(log_msg) 307 return 308 309 # 310 # Begin a new section. 311 # 312 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): 313 if dump: 314 self.dump_section(start_new = True) 315 self.section = title 316 self.new_start_line = line_no 317 318 def dump_section(self, start_new=True): 319 """ 320 Dumps section contents to arrays/hashes intended for that purpose. 321 """ 322 # 323 # If we have accumulated no contents in the default ("description") 324 # section, don't bother. 325 # 326 if self.section == SECTION_DEFAULT and not self._contents: 327 return 328 name = self.section 329 contents = self.contents() 330 331 if type_param.match(name): 332 name = type_param.group(1) 333 334 self.parameterdescs[name] = contents 335 self.parameterdesc_start_lines[name] = self.new_start_line 336 337 self.new_start_line = 0 338 339 else: 340 if name in self.sections and self.sections[name] != "": 341 # Only warn on user-specified duplicate section names 342 if name != SECTION_DEFAULT: 343 self.emit_msg(self.new_start_line, 344 f"duplicate section name '{name}'") 345 # Treat as a new paragraph - add a blank line 346 self.sections[name] += '\n' + contents 347 else: 348 self.sections[name] = contents 349 self.section_start_lines[name] = self.new_start_line 350 self.new_start_line = 0 351 352# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 353 354 if start_new: 355 self.section = SECTION_DEFAULT 356 self._contents = [] 357 358python_warning = False 359 360class KernelDoc: 361 """ 362 Read a C language source or header FILE and extract embedded 363 documentation comments. 364 """ 365 366 # Section names 367 368 section_context = "Context" 369 section_return = "Return" 370 371 undescribed = "-- undescribed --" 372 373 def __init__(self, config, fname): 374 """Initialize internal variables""" 375 376 self.fname = fname 377 self.config = config 378 379 # Initial state for the state machines 380 self.state = state.NORMAL 381 382 # Store entry currently being processed 383 self.entry = None 384 385 # Place all potential outputs into an array 386 self.entries = [] 387 388 # 389 # We need Python 3.7 for its "dicts remember the insertion 390 # order" guarantee 391 # 392 global python_warning 393 if (not python_warning and 394 sys.version_info.major == 3 and sys.version_info.minor < 7): 395 396 self.emit_msg(0, 397 'Python 3.7 or later is required for correct results') 398 python_warning = True 399 400 def emit_msg(self, ln, msg, *, warning=True): 401 """Emit a message""" 402 403 if self.entry: 404 self.entry.emit_msg(ln, msg, warning=warning) 405 return 406 407 log_msg = f"{self.fname}:{ln} {msg}" 408 409 if warning: 410 self.config.log.warning(log_msg) 411 else: 412 self.config.log.info(log_msg) 413 414 def dump_section(self, start_new=True): 415 """ 416 Dumps section contents to arrays/hashes intended for that purpose. 417 """ 418 419 if self.entry: 420 self.entry.dump_section(start_new) 421 422 # TODO: rename it to store_declaration after removal of kernel-doc.pl 423 def output_declaration(self, dtype, name, **args): 424 """ 425 Stores the entry into an entry array. 426 427 The actual output and output filters will be handled elsewhere 428 """ 429 430 item = KdocItem(name, self.fname, dtype, 431 self.entry.declaration_start_line, **args) 432 item.warnings = self.entry.warnings 433 434 # Drop empty sections 435 # TODO: improve empty sections logic to emit warnings 436 sections = self.entry.sections 437 for section in ["Description", "Return"]: 438 if section in sections and not sections[section].rstrip(): 439 del sections[section] 440 item.set_sections(sections, self.entry.section_start_lines) 441 item.set_params(self.entry.parameterlist, self.entry.parameterdescs, 442 self.entry.parametertypes, 443 self.entry.parameterdesc_start_lines) 444 self.entries.append(item) 445 446 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 447 448 def reset_state(self, ln): 449 """ 450 Ancillary routine to create a new entry. It initializes all 451 variables used by the state machine. 452 """ 453 454 # 455 # Flush the warnings out before we proceed further 456 # 457 if self.entry and self.entry not in self.entries: 458 for log_msg in self.entry.warnings: 459 self.config.log.warning(log_msg) 460 461 self.entry = KernelEntry(self.config, self.fname, ln) 462 463 # State flags 464 self.state = state.NORMAL 465 466 def push_parameter(self, ln, decl_type, param, dtype, 467 org_arg, declaration_name): 468 """ 469 Store parameters and their descriptions at self.entry. 470 """ 471 472 if self.entry.anon_struct_union and dtype == "" and param == "}": 473 return # Ignore the ending }; from anonymous struct/union 474 475 self.entry.anon_struct_union = False 476 477 param = KernRe(r'[\[\)].*').sub('', param, count=1) 478 479 # 480 # Look at various "anonymous type" cases. 481 # 482 if dtype == '': 483 if param.endswith("..."): 484 if len(param) > 3: # there is a name provided, use that 485 param = param[:-3] 486 if not self.entry.parameterdescs.get(param): 487 self.entry.parameterdescs[param] = "variable arguments" 488 489 elif (not param) or param == "void": 490 param = "void" 491 self.entry.parameterdescs[param] = "no arguments" 492 493 elif param in ["struct", "union"]: 494 # Handle unnamed (anonymous) union or struct 495 dtype = param 496 param = "{unnamed_" + param + "}" 497 self.entry.parameterdescs[param] = "anonymous\n" 498 self.entry.anon_struct_union = True 499 500 # Warn if parameter has no description 501 # (but ignore ones starting with # as these are not parameters 502 # but inline preprocessor statements) 503 if param not in self.entry.parameterdescs and not param.startswith("#"): 504 self.entry.parameterdescs[param] = self.undescribed 505 506 if "." not in param: 507 if decl_type == 'function': 508 dname = f"{decl_type} parameter" 509 else: 510 dname = f"{decl_type} member" 511 512 self.emit_msg(ln, 513 f"{dname} '{param}' not described in '{declaration_name}'") 514 515 # Strip spaces from param so that it is one continuous string on 516 # parameterlist. This fixes a problem where check_sections() 517 # cannot find a parameter like "addr[6 + 2]" because it actually 518 # appears as "addr[6", "+", "2]" on the parameter list. 519 # However, it's better to maintain the param string unchanged for 520 # output, so just weaken the string compare in check_sections() 521 # to ignore "[blah" in a parameter string. 522 523 self.entry.parameterlist.append(param) 524 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 525 self.entry.parametertypes[param] = org_arg 526 527 528 def create_parameter_list(self, ln, decl_type, args, 529 splitter, declaration_name): 530 """ 531 Creates a list of parameters, storing them at self.entry. 532 """ 533 534 # temporarily replace all commas inside function pointer definition 535 arg_expr = KernRe(r'(\([^\),]+),') 536 while arg_expr.search(args): 537 args = arg_expr.sub(r"\1#", args) 538 539 for arg in args.split(splitter): 540 # Ignore argument attributes 541 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 542 543 # Strip leading/trailing spaces 544 arg = arg.strip() 545 arg = KernRe(r'\s+').sub(' ', arg, count=1) 546 547 if arg.startswith('#'): 548 # Treat preprocessor directive as a typeless variable just to fill 549 # corresponding data structures "correctly". Catch it later in 550 # output_* subs. 551 552 # Treat preprocessor directive as a typeless variable 553 self.push_parameter(ln, decl_type, arg, "", 554 "", declaration_name) 555 # 556 # The pointer-to-function case. 557 # 558 elif KernRe(r'\(.+\)\s*\(').search(arg): 559 arg = arg.replace('#', ',') 560 r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" 561 r'([\w\[\].]*)' # Capture the name and possible [array] 562 r'\s*\)') # Make sure the trailing ")" is there 563 if r.match(arg): 564 param = r.group(1) 565 else: 566 self.emit_msg(ln, f"Invalid param: {arg}") 567 param = arg 568 dtype = arg.replace(param, '') 569 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 570 # 571 # The array-of-pointers case. Dig the parameter name out from the middle 572 # of the declaration. 573 # 574 elif KernRe(r'\(.+\)\s*\[').search(arg): 575 r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" 576 r'([\w.]*?)' # The actual pointer name 577 r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] 578 if r.match(arg): 579 param = r.group(1) 580 else: 581 self.emit_msg(ln, f"Invalid param: {arg}") 582 param = arg 583 dtype = arg.replace(param, '') 584 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 585 elif arg: 586 # 587 # Clean up extraneous spaces and split the string at commas; the first 588 # element of the resulting list will also include the type information. 589 # 590 arg = KernRe(r'\s*:\s*').sub(":", arg) 591 arg = KernRe(r'\s*\[').sub('[', arg) 592 args = KernRe(r'\s*,\s*').split(arg) 593 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 594 # 595 # args[0] has a string of "type a". If "a" includes an [array] 596 # declaration, we want to not be fooled by any white space inside 597 # the brackets, so detect and handle that case specially. 598 # 599 r = KernRe(r'^([^[\]]*\s+)(.*)$') 600 if r.match(args[0]): 601 args[0] = r.group(2) 602 dtype = r.group(1) 603 else: 604 # No space in args[0]; this seems wrong but preserves previous behavior 605 dtype = '' 606 607 bitfield_re = KernRe(r'(.*?):(\w+)') 608 for param in args: 609 # 610 # For pointers, shift the star(s) from the variable name to the 611 # type declaration. 612 # 613 r = KernRe(r'^(\*+)\s*(.*)') 614 if r.match(param): 615 self.push_parameter(ln, decl_type, r.group(2), 616 f"{dtype} {r.group(1)}", 617 arg, declaration_name) 618 # 619 # Perform a similar shift for bitfields. 620 # 621 elif bitfield_re.search(param): 622 if dtype != "": # Skip unnamed bit-fields 623 self.push_parameter(ln, decl_type, bitfield_re.group(1), 624 f"{dtype}:{bitfield_re.group(2)}", 625 arg, declaration_name) 626 else: 627 self.push_parameter(ln, decl_type, param, dtype, 628 arg, declaration_name) 629 630 def check_sections(self, ln, decl_name, decl_type): 631 """ 632 Check for errors inside sections, emitting warnings if not found 633 parameters are described. 634 """ 635 for section in self.entry.sections: 636 if section not in self.entry.parameterlist and \ 637 not known_sections.search(section): 638 if decl_type == 'function': 639 dname = f"{decl_type} parameter" 640 else: 641 dname = f"{decl_type} member" 642 self.emit_msg(ln, 643 f"Excess {dname} '{section}' description in '{decl_name}'") 644 645 def check_return_section(self, ln, declaration_name, return_type): 646 """ 647 If the function doesn't return void, warns about the lack of a 648 return description. 649 """ 650 651 if not self.config.wreturn: 652 return 653 654 # Ignore an empty return type (It's a macro) 655 # Ignore functions with a "void" return type (but not "void *") 656 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 657 return 658 659 if not self.entry.sections.get("Return", None): 660 self.emit_msg(ln, 661 f"No description found for return value of '{declaration_name}'") 662 663 # 664 # Split apart a structure prototype; returns (struct|union, name, members) or None 665 # 666 def split_struct_proto(self, proto): 667 type_pattern = r'(struct|union)' 668 qualifiers = [ 669 "__attribute__", 670 "__packed", 671 "__aligned", 672 "____cacheline_aligned_in_smp", 673 "____cacheline_aligned", 674 ] 675 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 676 677 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 678 if r.search(proto): 679 return (r.group(1), r.group(2), r.group(3)) 680 else: 681 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 682 if r.search(proto): 683 return (r.group(1), r.group(3), r.group(2)) 684 return None 685 # 686 # Rewrite the members of a structure or union for easier formatting later on. 687 # Among other things, this function will turn a member like: 688 # 689 # struct { inner_members; } foo; 690 # 691 # into: 692 # 693 # struct foo; inner_members; 694 # 695 def rewrite_struct_members(self, members): 696 # 697 # Process struct/union members from the most deeply nested outward. The 698 # trick is in the ^{ below - it prevents a match of an outer struct/union 699 # until the inner one has been munged (removing the "{" in the process). 700 # 701 struct_members = KernRe(r'(struct|union)' # 0: declaration type 702 r'([^\{\};]+)' # 1: possible name 703 r'(\{)' 704 r'([^\{\}]*)' # 3: Contents of declaration 705 r'(\})' 706 r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration 707 tuples = struct_members.findall(members) 708 while tuples: 709 for t in tuples: 710 newmember = "" 711 oldmember = "".join(t) # Reconstruct the original formatting 712 dtype, name, lbr, content, rbr, rest, semi = t 713 # 714 # Pass through each field name, normalizing the form and formatting. 715 # 716 for s_id in rest.split(','): 717 s_id = s_id.strip() 718 newmember += f"{dtype} {s_id}; " 719 # 720 # Remove bitfield/array/pointer info, getting the bare name. 721 # 722 s_id = KernRe(r'[:\[].*').sub('', s_id) 723 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 724 # 725 # Pass through the members of this inner structure/union. 726 # 727 for arg in content.split(';'): 728 arg = arg.strip() 729 # 730 # Look for (type)(*name)(args) - pointer to function 731 # 732 r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') 733 if r.match(arg): 734 dtype, name, extra = r.group(1), r.group(2), r.group(3) 735 # Pointer-to-function 736 if not s_id: 737 # Anonymous struct/union 738 newmember += f"{dtype}{name}{extra}; " 739 else: 740 newmember += f"{dtype}{s_id}.{name}{extra}; " 741 # 742 # Otherwise a non-function member. 743 # 744 else: 745 # 746 # Remove bitmap and array portions and spaces around commas 747 # 748 arg = KernRe(r':\s*\d+\s*').sub('', arg) 749 arg = KernRe(r'\[.*\]').sub('', arg) 750 arg = KernRe(r'\s*,\s*').sub(',', arg) 751 # 752 # Look for a normal decl - "type name[,name...]" 753 # 754 r = KernRe(r'(.*)\s+([\S+,]+)') 755 if r.search(arg): 756 for name in r.group(2).split(','): 757 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) 758 if not s_id: 759 # Anonymous struct/union 760 newmember += f"{r.group(1)} {name}; " 761 else: 762 newmember += f"{r.group(1)} {s_id}.{name}; " 763 else: 764 newmember += f"{arg}; " 765 # 766 # At the end of the s_id loop, replace the original declaration with 767 # the munged version. 768 # 769 members = members.replace(oldmember, newmember) 770 # 771 # End of the tuple loop - search again and see if there are outer members 772 # that now turn up. 773 # 774 tuples = struct_members.findall(members) 775 return members 776 777 # 778 # Format the struct declaration into a standard form for inclusion in the 779 # resulting docs. 780 # 781 def format_struct_decl(self, declaration): 782 # 783 # Insert newlines, get rid of extra spaces. 784 # 785 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 786 declaration = KernRe(r'\}\s+;').sub('};', declaration) 787 # 788 # Format inline enums with each member on its own line. 789 # 790 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 791 while r.search(declaration): 792 declaration = r.sub(r'\1,\n\2', declaration) 793 # 794 # Now go through and supply the right number of tabs 795 # for each line. 796 # 797 def_args = declaration.split('\n') 798 level = 1 799 declaration = "" 800 for clause in def_args: 801 clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) 802 if clause: 803 if '}' in clause and level > 1: 804 level -= 1 805 if not clause.startswith('#'): 806 declaration += "\t" * level 807 declaration += "\t" + clause + "\n" 808 if "{" in clause and "}" not in clause: 809 level += 1 810 return declaration 811 812 813 def dump_struct(self, ln, proto): 814 """ 815 Store an entry for an struct or union 816 """ 817 # 818 # Do the basic parse to get the pieces of the declaration. 819 # 820 struct_parts = self.split_struct_proto(proto) 821 if not struct_parts: 822 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 823 return 824 decl_type, declaration_name, members = struct_parts 825 826 if self.entry.identifier != declaration_name: 827 self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " 828 f"Prototype was for {decl_type} {declaration_name} instead\n") 829 return 830 # 831 # Go through the list of members applying all of our transformations. 832 # 833 members = trim_private_members(members) 834 members = apply_transforms(struct_xforms, members) 835 836 nested = NestedMatch() 837 for search, sub in struct_nested_prefixes: 838 members = nested.sub(search, sub, members) 839 # 840 # Deal with embedded struct and union members, and drop enums entirely. 841 # 842 declaration = members 843 members = self.rewrite_struct_members(members) 844 members = re.sub(r'(\{[^\{\}]*\})', '', members) 845 # 846 # Output the result and we are done. 847 # 848 self.create_parameter_list(ln, decl_type, members, ';', 849 declaration_name) 850 self.check_sections(ln, declaration_name, decl_type) 851 self.output_declaration(decl_type, declaration_name, 852 definition=self.format_struct_decl(declaration), 853 purpose=self.entry.declaration_purpose) 854 855 def dump_enum(self, ln, proto): 856 """ 857 Stores an enum inside self.entries array. 858 """ 859 # 860 # Strip preprocessor directives. Note that this depends on the 861 # trailing semicolon we added in process_proto_type(). 862 # 863 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 864 # 865 # Parse out the name and members of the enum. Typedef form first. 866 # 867 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 868 if r.search(proto): 869 declaration_name = r.group(2) 870 members = trim_private_members(r.group(1)) 871 # 872 # Failing that, look for a straight enum 873 # 874 else: 875 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 876 if r.match(proto): 877 declaration_name = r.group(1) 878 members = trim_private_members(r.group(2)) 879 # 880 # OK, this isn't going to work. 881 # 882 else: 883 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 884 return 885 # 886 # Make sure we found what we were expecting. 887 # 888 if self.entry.identifier != declaration_name: 889 if self.entry.identifier == "": 890 self.emit_msg(ln, 891 f"{proto}: wrong kernel-doc identifier on prototype") 892 else: 893 self.emit_msg(ln, 894 f"expecting prototype for enum {self.entry.identifier}. " 895 f"Prototype was for enum {declaration_name} instead") 896 return 897 898 if not declaration_name: 899 declaration_name = "(anonymous)" 900 # 901 # Parse out the name of each enum member, and verify that we 902 # have a description for it. 903 # 904 member_set = set() 905 members = KernRe(r'\([^;)]*\)').sub('', members) 906 for arg in members.split(','): 907 if not arg: 908 continue 909 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 910 self.entry.parameterlist.append(arg) 911 if arg not in self.entry.parameterdescs: 912 self.entry.parameterdescs[arg] = self.undescribed 913 self.emit_msg(ln, 914 f"Enum value '{arg}' not described in enum '{declaration_name}'") 915 member_set.add(arg) 916 # 917 # Ensure that every described member actually exists in the enum. 918 # 919 for k in self.entry.parameterdescs: 920 if k not in member_set: 921 self.emit_msg(ln, 922 f"Excess enum value '%{k}' description in '{declaration_name}'") 923 924 self.output_declaration('enum', declaration_name, 925 purpose=self.entry.declaration_purpose) 926 927 def dump_declaration(self, ln, prototype): 928 """ 929 Stores a data declaration inside self.entries array. 930 """ 931 932 if self.entry.decl_type == "enum": 933 self.dump_enum(ln, prototype) 934 elif self.entry.decl_type == "typedef": 935 self.dump_typedef(ln, prototype) 936 elif self.entry.decl_type in ["union", "struct"]: 937 self.dump_struct(ln, prototype) 938 else: 939 # This would be a bug 940 self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') 941 942 def dump_function(self, ln, prototype): 943 """ 944 Stores a function of function macro inside self.entries array. 945 """ 946 947 found = func_macro = False 948 return_type = '' 949 decl_type = 'function' 950 # 951 # Apply the initial transformations. 952 # 953 prototype = apply_transforms(function_xforms, prototype) 954 # 955 # If we have a macro, remove the "#define" at the front. 956 # 957 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 958 if new_proto != prototype: 959 prototype = new_proto 960 # 961 # Dispense with the simple "#define A B" case here; the key 962 # is the space after the name of the symbol being defined. 963 # NOTE that the seemingly misnamed "func_macro" indicates a 964 # macro *without* arguments. 965 # 966 r = KernRe(r'^(\w+)\s+') 967 if r.search(prototype): 968 return_type = '' 969 declaration_name = r.group(1) 970 func_macro = True 971 found = True 972 973 # Yes, this truly is vile. We are looking for: 974 # 1. Return type (may be nothing if we're looking at a macro) 975 # 2. Function name 976 # 3. Function parameters. 977 # 978 # All the while we have to watch out for function pointer parameters 979 # (which IIRC is what the two sections are for), C types (these 980 # regexps don't even start to express all the possibilities), and 981 # so on. 982 # 983 # If you mess with these regexps, it's a good idea to check that 984 # the following functions' documentation still comes out right: 985 # - parport_register_device (function pointer parameters) 986 # - atomic_set (macro) 987 # - pci_match_device, __copy_to_user (long return type) 988 989 name = r'\w+' 990 type1 = r'(?:[\w\s]+)?' 991 type2 = r'(?:[\w\s]+\*+)+' 992 # 993 # Attempt to match first on (args) with no internal parentheses; this 994 # lets us easily filter out __acquires() and other post-args stuff. If 995 # that fails, just grab the rest of the line to the last closing 996 # parenthesis. 997 # 998 proto_args = r'\(([^\(]*|.*)\)' 999 # 1000 # (Except for the simple macro case) attempt to split up the prototype 1001 # in the various ways we understand. 1002 # 1003 if not found: 1004 patterns = [ 1005 rf'^()({name})\s*{proto_args}', 1006 rf'^({type1})\s+({name})\s*{proto_args}', 1007 rf'^({type2})\s*({name})\s*{proto_args}', 1008 ] 1009 1010 for p in patterns: 1011 r = KernRe(p) 1012 if r.match(prototype): 1013 return_type = r.group(1) 1014 declaration_name = r.group(2) 1015 args = r.group(3) 1016 self.create_parameter_list(ln, decl_type, args, ',', 1017 declaration_name) 1018 found = True 1019 break 1020 # 1021 # Parsing done; make sure that things are as we expect. 1022 # 1023 if not found: 1024 self.emit_msg(ln, 1025 f"cannot understand function prototype: '{prototype}'") 1026 return 1027 if self.entry.identifier != declaration_name: 1028 self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " 1029 f"Prototype was for {declaration_name}() instead") 1030 return 1031 self.check_sections(ln, declaration_name, "function") 1032 self.check_return_section(ln, declaration_name, return_type) 1033 # 1034 # Store the result. 1035 # 1036 self.output_declaration(decl_type, declaration_name, 1037 typedef=('typedef' in return_type), 1038 functiontype=return_type, 1039 purpose=self.entry.declaration_purpose, 1040 func_macro=func_macro) 1041 1042 1043 def dump_typedef(self, ln, proto): 1044 """ 1045 Stores a typedef inside self.entries array. 1046 """ 1047 # 1048 # We start by looking for function typedefs. 1049 # 1050 typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1051 typedef_ident = r'\*?\s*(\w\S+)\s*' 1052 typedef_args = r'\s*\((.*)\);' 1053 1054 typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1055 typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) 1056 1057 # Parse function typedef prototypes 1058 for r in [typedef1, typedef2]: 1059 if not r.match(proto): 1060 continue 1061 1062 return_type = r.group(1).strip() 1063 declaration_name = r.group(2) 1064 args = r.group(3) 1065 1066 if self.entry.identifier != declaration_name: 1067 self.emit_msg(ln, 1068 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1069 return 1070 1071 self.create_parameter_list(ln, 'function', args, ',', declaration_name) 1072 1073 self.output_declaration('function', declaration_name, 1074 typedef=True, 1075 functiontype=return_type, 1076 purpose=self.entry.declaration_purpose) 1077 return 1078 # 1079 # Not a function, try to parse a simple typedef. 1080 # 1081 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1082 if r.match(proto): 1083 declaration_name = r.group(1) 1084 1085 if self.entry.identifier != declaration_name: 1086 self.emit_msg(ln, 1087 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1088 return 1089 1090 self.output_declaration('typedef', declaration_name, 1091 purpose=self.entry.declaration_purpose) 1092 return 1093 1094 self.emit_msg(ln, "error: Cannot parse typedef!") 1095 1096 @staticmethod 1097 def process_export(function_set, line): 1098 """ 1099 process EXPORT_SYMBOL* tags 1100 1101 This method doesn't use any variable from the class, so declare it 1102 with a staticmethod decorator. 1103 """ 1104 1105 # We support documenting some exported symbols with different 1106 # names. A horrible hack. 1107 suffixes = [ '_noprof' ] 1108 1109 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1110 # multiple export lines would violate Kernel coding style. 1111 1112 if export_symbol.search(line): 1113 symbol = export_symbol.group(2) 1114 elif export_symbol_ns.search(line): 1115 symbol = export_symbol_ns.group(2) 1116 else: 1117 return False 1118 # 1119 # Found an export, trim out any special suffixes 1120 # 1121 for suffix in suffixes: 1122 # Be backward compatible with Python < 3.9 1123 if symbol.endswith(suffix): 1124 symbol = symbol[:-len(suffix)] 1125 function_set.add(symbol) 1126 return True 1127 1128 def process_normal(self, ln, line): 1129 """ 1130 STATE_NORMAL: looking for the /** to begin everything. 1131 """ 1132 1133 if not doc_start.match(line): 1134 return 1135 1136 # start a new entry 1137 self.reset_state(ln) 1138 1139 # next line is always the function name 1140 self.state = state.NAME 1141 1142 def process_name(self, ln, line): 1143 """ 1144 STATE_NAME: Looking for the "name - description" line 1145 """ 1146 # 1147 # Check for a DOC: block and handle them specially. 1148 # 1149 if doc_block.search(line): 1150 1151 if not doc_block.group(1): 1152 self.entry.begin_section(ln, "Introduction") 1153 else: 1154 self.entry.begin_section(ln, doc_block.group(1)) 1155 1156 self.entry.identifier = self.entry.section 1157 self.state = state.DOCBLOCK 1158 # 1159 # Otherwise we're looking for a normal kerneldoc declaration line. 1160 # 1161 elif doc_decl.search(line): 1162 self.entry.identifier = doc_decl.group(1) 1163 1164 # Test for data declaration 1165 if doc_begin_data.search(line): 1166 self.entry.decl_type = doc_begin_data.group(1) 1167 self.entry.identifier = doc_begin_data.group(2) 1168 # 1169 # Look for a function description 1170 # 1171 elif doc_begin_func.search(line): 1172 self.entry.identifier = doc_begin_func.group(1) 1173 self.entry.decl_type = "function" 1174 # 1175 # We struck out. 1176 # 1177 else: 1178 self.emit_msg(ln, 1179 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") 1180 self.state = state.NORMAL 1181 return 1182 # 1183 # OK, set up for a new kerneldoc entry. 1184 # 1185 self.state = state.BODY 1186 self.entry.identifier = self.entry.identifier.strip(" ") 1187 # if there's no @param blocks need to set up default section here 1188 self.entry.begin_section(ln + 1) 1189 # 1190 # Find the description portion, which *should* be there but 1191 # isn't always. 1192 # (We should be able to capture this from the previous parsing - someday) 1193 # 1194 r = KernRe("[-:](.*)") 1195 if r.search(line): 1196 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1197 self.state = state.DECLARATION 1198 else: 1199 self.entry.declaration_purpose = "" 1200 1201 if not self.entry.declaration_purpose and self.config.wshort_desc: 1202 self.emit_msg(ln, 1203 f"missing initial short description on line:\n{line}") 1204 1205 if not self.entry.identifier and self.entry.decl_type != "enum": 1206 self.emit_msg(ln, 1207 f"wrong kernel-doc identifier on line:\n{line}") 1208 self.state = state.NORMAL 1209 1210 if self.config.verbose: 1211 self.emit_msg(ln, 1212 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1213 warning=False) 1214 # 1215 # Failed to find an identifier. Emit a warning 1216 # 1217 else: 1218 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1219 1220 # 1221 # Helper function to determine if a new section is being started. 1222 # 1223 def is_new_section(self, ln, line): 1224 if doc_sect.search(line): 1225 self.state = state.BODY 1226 # 1227 # Pick out the name of our new section, tweaking it if need be. 1228 # 1229 newsection = doc_sect.group(1) 1230 if newsection.lower() == 'description': 1231 newsection = 'Description' 1232 elif newsection.lower() == 'context': 1233 newsection = 'Context' 1234 self.state = state.SPECIAL_SECTION 1235 elif newsection.lower() in ["@return", "@returns", 1236 "return", "returns"]: 1237 newsection = "Return" 1238 self.state = state.SPECIAL_SECTION 1239 elif newsection[0] == '@': 1240 self.state = state.SPECIAL_SECTION 1241 # 1242 # Initialize the contents, and get the new section going. 1243 # 1244 newcontents = doc_sect.group(2) 1245 if not newcontents: 1246 newcontents = "" 1247 self.dump_section() 1248 self.entry.begin_section(ln, newsection) 1249 self.entry.leading_space = None 1250 1251 self.entry.add_text(newcontents.lstrip()) 1252 return True 1253 return False 1254 1255 # 1256 # Helper function to detect (and effect) the end of a kerneldoc comment. 1257 # 1258 def is_comment_end(self, ln, line): 1259 if doc_end.search(line): 1260 self.dump_section() 1261 1262 # Look for doc_com + <text> + doc_end: 1263 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') 1264 if r.match(line): 1265 self.emit_msg(ln, f"suspicious ending line: {line}") 1266 1267 self.entry.prototype = "" 1268 self.entry.new_start_line = ln + 1 1269 1270 self.state = state.PROTO 1271 return True 1272 return False 1273 1274 1275 def process_decl(self, ln, line): 1276 """ 1277 STATE_DECLARATION: We've seen the beginning of a declaration 1278 """ 1279 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1280 return 1281 # 1282 # Look for anything with the " * " line beginning. 1283 # 1284 if doc_content.search(line): 1285 cont = doc_content.group(1) 1286 # 1287 # A blank line means that we have moved out of the declaration 1288 # part of the comment (without any "special section" parameter 1289 # descriptions). 1290 # 1291 if cont == "": 1292 self.state = state.BODY 1293 # 1294 # Otherwise we have more of the declaration section to soak up. 1295 # 1296 else: 1297 self.entry.declaration_purpose = \ 1298 trim_whitespace(self.entry.declaration_purpose + ' ' + cont) 1299 else: 1300 # Unknown line, ignore 1301 self.emit_msg(ln, f"bad line: {line}") 1302 1303 1304 def process_special(self, ln, line): 1305 """ 1306 STATE_SPECIAL_SECTION: a section ending with a blank line 1307 """ 1308 # 1309 # If we have hit a blank line (only the " * " marker), then this 1310 # section is done. 1311 # 1312 if KernRe(r"\s*\*\s*$").match(line): 1313 self.entry.begin_section(ln, dump = True) 1314 self.state = state.BODY 1315 return 1316 # 1317 # Not a blank line, look for the other ways to end the section. 1318 # 1319 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1320 return 1321 # 1322 # OK, we should have a continuation of the text for this section. 1323 # 1324 if doc_content.search(line): 1325 cont = doc_content.group(1) 1326 # 1327 # If the lines of text after the first in a special section have 1328 # leading white space, we need to trim it out or Sphinx will get 1329 # confused. For the second line (the None case), see what we 1330 # find there and remember it. 1331 # 1332 if self.entry.leading_space is None: 1333 r = KernRe(r'^(\s+)') 1334 if r.match(cont): 1335 self.entry.leading_space = len(r.group(1)) 1336 else: 1337 self.entry.leading_space = 0 1338 # 1339 # Otherwise, before trimming any leading chars, be *sure* 1340 # that they are white space. We should maybe warn if this 1341 # isn't the case. 1342 # 1343 for i in range(0, self.entry.leading_space): 1344 if cont[i] != " ": 1345 self.entry.leading_space = i 1346 break 1347 # 1348 # Add the trimmed result to the section and we're done. 1349 # 1350 self.entry.add_text(cont[self.entry.leading_space:]) 1351 else: 1352 # Unknown line, ignore 1353 self.emit_msg(ln, f"bad line: {line}") 1354 1355 def process_body(self, ln, line): 1356 """ 1357 STATE_BODY: the bulk of a kerneldoc comment. 1358 """ 1359 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1360 return 1361 1362 if doc_content.search(line): 1363 cont = doc_content.group(1) 1364 self.entry.add_text(cont) 1365 else: 1366 # Unknown line, ignore 1367 self.emit_msg(ln, f"bad line: {line}") 1368 1369 def process_inline_name(self, ln, line): 1370 """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" 1371 1372 if doc_inline_sect.search(line): 1373 self.entry.begin_section(ln, doc_inline_sect.group(1)) 1374 self.entry.add_text(doc_inline_sect.group(2).lstrip()) 1375 self.state = state.INLINE_TEXT 1376 elif doc_inline_end.search(line): 1377 self.dump_section() 1378 self.state = state.PROTO 1379 elif doc_content.search(line): 1380 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") 1381 self.state = state.PROTO 1382 # else ... ?? 1383 1384 def process_inline_text(self, ln, line): 1385 """STATE_INLINE_TEXT: docbook comments within a prototype.""" 1386 1387 if doc_inline_end.search(line): 1388 self.dump_section() 1389 self.state = state.PROTO 1390 elif doc_content.search(line): 1391 self.entry.add_text(doc_content.group(1)) 1392 # else ... ?? 1393 1394 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1395 """ 1396 Handle syscall definitions 1397 """ 1398 1399 is_void = False 1400 1401 # Strip newlines/CR's 1402 proto = re.sub(r'[\r\n]+', ' ', proto) 1403 1404 # Check if it's a SYSCALL_DEFINE0 1405 if 'SYSCALL_DEFINE0' in proto: 1406 is_void = True 1407 1408 # Replace SYSCALL_DEFINE with correct return type & function name 1409 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1410 1411 r = KernRe(r'long\s+(sys_.*?),') 1412 if r.search(proto): 1413 proto = KernRe(',').sub('(', proto, count=1) 1414 elif is_void: 1415 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1416 1417 # Now delete all of the odd-numbered commas in the proto 1418 # so that argument types & names don't have a comma between them 1419 count = 0 1420 length = len(proto) 1421 1422 if is_void: 1423 length = 0 # skip the loop if is_void 1424 1425 for ix in range(length): 1426 if proto[ix] == ',': 1427 count += 1 1428 if count % 2 == 1: 1429 proto = proto[:ix] + ' ' + proto[ix + 1:] 1430 1431 return proto 1432 1433 def tracepoint_munge(self, ln, proto): 1434 """ 1435 Handle tracepoint definitions 1436 """ 1437 1438 tracepointname = None 1439 tracepointargs = None 1440 1441 # Match tracepoint name based on different patterns 1442 r = KernRe(r'TRACE_EVENT\((.*?),') 1443 if r.search(proto): 1444 tracepointname = r.group(1) 1445 1446 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1447 if r.search(proto): 1448 tracepointname = r.group(1) 1449 1450 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1451 if r.search(proto): 1452 tracepointname = r.group(2) 1453 1454 if tracepointname: 1455 tracepointname = tracepointname.lstrip() 1456 1457 r = KernRe(r'TP_PROTO\((.*?)\)') 1458 if r.search(proto): 1459 tracepointargs = r.group(1) 1460 1461 if not tracepointname or not tracepointargs: 1462 self.emit_msg(ln, 1463 f"Unrecognized tracepoint format:\n{proto}\n") 1464 else: 1465 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1466 self.entry.identifier = f"trace_{self.entry.identifier}" 1467 1468 return proto 1469 1470 def process_proto_function(self, ln, line): 1471 """Ancillary routine to process a function prototype""" 1472 1473 # strip C99-style comments to end of line 1474 line = KernRe(r"//.*$", re.S).sub('', line) 1475 # 1476 # Soak up the line's worth of prototype text, stopping at { or ; if present. 1477 # 1478 if KernRe(r'\s*#\s*define').match(line): 1479 self.entry.prototype = line 1480 elif not line.startswith('#'): # skip other preprocessor stuff 1481 r = KernRe(r'([^\{]*)') 1482 if r.match(line): 1483 self.entry.prototype += r.group(1) + " " 1484 # 1485 # If we now have the whole prototype, clean it up and declare victory. 1486 # 1487 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1488 # strip comments and surrounding spaces 1489 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() 1490 # 1491 # Handle self.entry.prototypes for function pointers like: 1492 # int (*pcs_config)(struct foo) 1493 # by turning it into 1494 # int pcs_config(struct foo) 1495 # 1496 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1497 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1498 # 1499 # Handle special declaration syntaxes 1500 # 1501 if 'SYSCALL_DEFINE' in self.entry.prototype: 1502 self.entry.prototype = self.syscall_munge(ln, 1503 self.entry.prototype) 1504 else: 1505 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1506 if r.search(self.entry.prototype): 1507 self.entry.prototype = self.tracepoint_munge(ln, 1508 self.entry.prototype) 1509 # 1510 # ... and we're done 1511 # 1512 self.dump_function(ln, self.entry.prototype) 1513 self.reset_state(ln) 1514 1515 def process_proto_type(self, ln, line): 1516 """Ancillary routine to process a type""" 1517 1518 # Strip C99-style comments and surrounding whitespace 1519 line = KernRe(r"//.*$", re.S).sub('', line).strip() 1520 if not line: 1521 return # nothing to see here 1522 1523 # To distinguish preprocessor directive from regular declaration later. 1524 if line.startswith('#'): 1525 line += ";" 1526 # 1527 # Split the declaration on any of { } or ;, and accumulate pieces 1528 # until we hit a semicolon while not inside {brackets} 1529 # 1530 r = KernRe(r'(.*?)([{};])') 1531 for chunk in r.split(line): 1532 if chunk: # Ignore empty matches 1533 self.entry.prototype += chunk 1534 # 1535 # This cries out for a match statement ... someday after we can 1536 # drop Python 3.9 ... 1537 # 1538 if chunk == '{': 1539 self.entry.brcount += 1 1540 elif chunk == '}': 1541 self.entry.brcount -= 1 1542 elif chunk == ';' and self.entry.brcount <= 0: 1543 self.dump_declaration(ln, self.entry.prototype) 1544 self.reset_state(ln) 1545 return 1546 # 1547 # We hit the end of the line while still in the declaration; put 1548 # in a space to represent the newline. 1549 # 1550 self.entry.prototype += ' ' 1551 1552 def process_proto(self, ln, line): 1553 """STATE_PROTO: reading a function/whatever prototype.""" 1554 1555 if doc_inline_oneline.search(line): 1556 self.entry.begin_section(ln, doc_inline_oneline.group(1)) 1557 self.entry.add_text(doc_inline_oneline.group(2)) 1558 self.dump_section() 1559 1560 elif doc_inline_start.search(line): 1561 self.state = state.INLINE_NAME 1562 1563 elif self.entry.decl_type == 'function': 1564 self.process_proto_function(ln, line) 1565 1566 else: 1567 self.process_proto_type(ln, line) 1568 1569 def process_docblock(self, ln, line): 1570 """STATE_DOCBLOCK: within a DOC: block.""" 1571 1572 if doc_end.search(line): 1573 self.dump_section() 1574 self.output_declaration("doc", self.entry.identifier) 1575 self.reset_state(ln) 1576 1577 elif doc_content.search(line): 1578 self.entry.add_text(doc_content.group(1)) 1579 1580 def parse_export(self): 1581 """ 1582 Parses EXPORT_SYMBOL* macros from a single Kernel source file. 1583 """ 1584 1585 export_table = set() 1586 1587 try: 1588 with open(self.fname, "r", encoding="utf8", 1589 errors="backslashreplace") as fp: 1590 1591 for line in fp: 1592 self.process_export(export_table, line) 1593 1594 except IOError: 1595 return None 1596 1597 return export_table 1598 1599 # 1600 # The state/action table telling us which function to invoke in 1601 # each state. 1602 # 1603 state_actions = { 1604 state.NORMAL: process_normal, 1605 state.NAME: process_name, 1606 state.BODY: process_body, 1607 state.DECLARATION: process_decl, 1608 state.SPECIAL_SECTION: process_special, 1609 state.INLINE_NAME: process_inline_name, 1610 state.INLINE_TEXT: process_inline_text, 1611 state.PROTO: process_proto, 1612 state.DOCBLOCK: process_docblock, 1613 } 1614 1615 def parse_kdoc(self): 1616 """ 1617 Open and process each line of a C source file. 1618 The parsing is controlled via a state machine, and the line is passed 1619 to a different process function depending on the state. The process 1620 function may update the state as needed. 1621 1622 Besides parsing kernel-doc tags, it also parses export symbols. 1623 """ 1624 1625 prev = "" 1626 prev_ln = None 1627 export_table = set() 1628 1629 try: 1630 with open(self.fname, "r", encoding="utf8", 1631 errors="backslashreplace") as fp: 1632 for ln, line in enumerate(fp): 1633 1634 line = line.expandtabs().strip("\n") 1635 1636 # Group continuation lines on prototypes 1637 if self.state == state.PROTO: 1638 if line.endswith("\\"): 1639 prev += line.rstrip("\\") 1640 if not prev_ln: 1641 prev_ln = ln 1642 continue 1643 1644 if prev: 1645 ln = prev_ln 1646 line = prev + line 1647 prev = "" 1648 prev_ln = None 1649 1650 self.config.log.debug("%d %s: %s", 1651 ln, state.name[self.state], 1652 line) 1653 1654 # This is an optimization over the original script. 1655 # There, when export_file was used for the same file, 1656 # it was read twice. Here, we use the already-existing 1657 # loop to parse exported symbols as well. 1658 # 1659 if (self.state != state.NORMAL) or \ 1660 not self.process_export(export_table, line): 1661 # Hand this line to the appropriate state handler 1662 self.state_actions[self.state](self, ln, line) 1663 1664 except OSError: 1665 self.config.log.error(f"Error: Cannot open file {self.fname}") 1666 1667 return export_table, self.entries 1668