1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8Classes and functions related to reading a C language source or header FILE 9and extract embedded documentation comments from it. 10""" 11 12import sys 13import re 14from pprint import pformat 15 16from kdoc.c_lex import CTokenizer, tokenizer_set_log 17from kdoc.kdoc_re import KernRe 18from kdoc.kdoc_item import KdocItem 19 20# 21# Regular expressions used to parse kernel-doc markups at KernelDoc class. 22# 23# Let's declare them in lowercase outside any class to make it easier to 24# convert from the Perl script. 25# 26# As those are evaluated at the beginning, no need to cache them 27# 28 29# Allow whitespace at end of comment start. 30doc_start = KernRe(r'^/\*\*\s*$', cache=False) 31 32doc_end = KernRe(r'\*/', cache=False) 33doc_com = KernRe(r'\s*\*\s*', cache=False) 34doc_com_body = KernRe(r'\s*\* ?', cache=False) 35doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 36 37# @params and a strictly limited set of supported section names 38# Specifically: 39# Match @word: 40# @...: 41# @{section-name}: 42# while trying to not match literal block starts like "example::" 43# 44known_section_names = 'description|context|returns?|notes?|examples?' 45known_sections = KernRe(known_section_names, flags = re.I) 46doc_sect = doc_com + \ 47 KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', 48 flags=re.I, cache=False) 49 50doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 51doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 52doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 53doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 54doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@\s*[\w][\w\.]*\s*):\s*(.*)\s*\*/\s*$', cache=False) 55 56export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 57export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 58 59type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 60 61# 62# Tests for the beginning of a kerneldoc block in its various forms. 63# 64doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 65doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef|var)\b\s*(\w*)", cache = False) 66doc_begin_func = KernRe(str(doc_com) + # initial " * ' 67 r"(?:\w+\s*\*\s*)?" + # type (not captured) 68 r'(?:define\s+)?' + # possible "define" (not captured) 69 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 70 r'(?:[-:].*)?$', # description (not captured) 71 cache = False) 72 73# 74# Ancillary functions 75# 76 77multi_space = KernRe(r'\s\s+') 78def trim_whitespace(s): 79 """ 80 A little helper to get rid of excess white space. 81 """ 82 return multi_space.sub(' ', s.strip()) 83 84def trim_private_members(text): 85 """ 86 Remove ``struct``/``enum`` members that have been marked "private". 87 """ 88 89 tokens = CTokenizer(text) 90 return str(tokens) 91 92class state: 93 """ 94 States used by the parser's state machine. 95 """ 96 97 # Parser states 98 NORMAL = 0 #: Normal code. 99 NAME = 1 #: Looking for function name. 100 DECLARATION = 2 #: We have seen a declaration which might not be done. 101 BODY = 3 #: The body of the comment. 102 SPECIAL_SECTION = 4 #: Doc section ending with a blank line. 103 PROTO = 5 #: Scanning prototype. 104 DOCBLOCK = 6 #: Documentation block. 105 INLINE_NAME = 7 #: Gathering doc outside main block. 106 INLINE_TEXT = 8 #: Reading the body of inline docs. 107 108 #: Names for each parser state. 109 name = [ 110 "NORMAL", 111 "NAME", 112 "DECLARATION", 113 "BODY", 114 "SPECIAL_SECTION", 115 "PROTO", 116 "DOCBLOCK", 117 "INLINE_NAME", 118 "INLINE_TEXT", 119 ] 120 121 122SECTION_DEFAULT = "Description" #: Default section. 123 124class KernelEntry: 125 """ 126 Encapsulates a Kernel documentation entry. 127 """ 128 129 def __init__(self, config, fname, ln): 130 self.config = config 131 self.fname = fname 132 133 self._contents = [] 134 self.prototype = "" 135 136 self.warnings = [] 137 138 self.parameterlist = [] 139 self.parameterdescs = {} 140 self.parametertypes = {} 141 self.parameterdesc_start_lines = {} 142 143 self.sections_start_lines = {} 144 self.sections = {} 145 146 self.anon_struct_union = False 147 148 self.leading_space = None 149 150 self.fname = fname 151 152 # State flags 153 self.brcount = 0 154 self.declaration_start_line = ln + 1 155 156 # 157 # Management of section contents 158 # 159 def add_text(self, text): 160 """Add a new text to the entry contents list.""" 161 self._contents.append(text) 162 163 def contents(self): 164 """Returns a string with all content texts that were added.""" 165 return '\n'.join(self._contents) + '\n' 166 167 # TODO: rename to emit_message after removal of kernel-doc.pl 168 def emit_msg(self, ln, msg, *, warning=True): 169 """Emit a message.""" 170 171 log_msg = f"{self.fname}:{ln} {msg}" 172 173 if not warning: 174 self.config.log.info(log_msg) 175 return 176 177 # Delegate warning output to output logic, as this way it 178 # will report warnings/info only for symbols that are output 179 180 self.warnings.append(log_msg) 181 return 182 183 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): 184 """ 185 Begin a new section. 186 """ 187 if dump: 188 self.dump_section(start_new = True) 189 self.section = title 190 self.new_start_line = line_no 191 192 def dump_section(self, start_new=True): 193 """ 194 Dumps section contents to arrays/hashes intended for that purpose. 195 """ 196 # 197 # If we have accumulated no contents in the default ("description") 198 # section, don't bother. 199 # 200 if self.section == SECTION_DEFAULT and not self._contents: 201 return 202 name = self.section 203 contents = self.contents() 204 205 if type_param.match(name): 206 name = type_param.group(1) 207 208 self.parameterdescs[name] = contents 209 self.parameterdesc_start_lines[name] = self.new_start_line 210 211 self.new_start_line = 0 212 213 else: 214 if name in self.sections and self.sections[name] != "": 215 # Only warn on user-specified duplicate section names 216 if name != SECTION_DEFAULT: 217 self.emit_msg(self.new_start_line, 218 f"duplicate section name '{name}'") 219 # Treat as a new paragraph - add a blank line 220 self.sections[name] += '\n' + contents 221 else: 222 self.sections[name] = contents 223 self.sections_start_lines[name] = self.new_start_line 224 self.new_start_line = 0 225 226# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 227 228 if start_new: 229 self.section = SECTION_DEFAULT 230 self._contents = [] 231 232python_warning = False 233 234class KernelDoc: 235 """ 236 Read a C language source or header FILE and extract embedded 237 documentation comments. 238 """ 239 240 #: Name of context section. 241 section_context = "Context" 242 243 #: Name of return section. 244 section_return = "Return" 245 246 #: String to write when a parameter is not described. 247 undescribed = "-- undescribed --" 248 249 def __init__(self, config, fname, xforms, store_src=False): 250 """Initialize internal variables""" 251 252 self.fname = fname 253 self.config = config 254 self.xforms = xforms 255 self.store_src = store_src 256 257 tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ") 258 259 # Initial state for the state machines 260 self.state = state.NORMAL 261 262 # Store entry currently being processed 263 self.entry = None 264 265 # Place all potential outputs into an array 266 self.entries = [] 267 268 # 269 # We need Python 3.7 for its "dicts remember the insertion 270 # order" guarantee 271 # 272 global python_warning 273 if (not python_warning and 274 sys.version_info.major == 3 and sys.version_info.minor < 7): 275 276 self.emit_msg(0, 277 'Python 3.7 or later is required for correct results') 278 python_warning = True 279 280 def emit_msg(self, ln, msg, *, warning=True): 281 """Emit a message""" 282 283 if self.entry: 284 self.entry.emit_msg(ln, msg, warning=warning) 285 return 286 287 log_msg = f"{self.fname}:{ln} {msg}" 288 289 if warning: 290 self.config.log.warning(log_msg) 291 else: 292 self.config.log.info(log_msg) 293 294 def dump_section(self, start_new=True): 295 """ 296 Dump section contents to arrays/hashes intended for that purpose. 297 """ 298 299 if self.entry: 300 self.entry.dump_section(start_new) 301 302 # TODO: rename it to store_declaration after removal of kernel-doc.pl 303 def output_declaration(self, dtype, name, **args): 304 """ 305 Store the entry into an entry array. 306 307 The actual output and output filters will be handled elsewhere. 308 """ 309 310 item = KdocItem(name, self.fname, dtype, 311 self.entry.declaration_start_line, **args) 312 item.warnings = self.entry.warnings 313 314 # Drop empty sections 315 # TODO: improve empty sections logic to emit warnings 316 sections = self.entry.sections 317 for section in ["Description", "Return"]: 318 if section in sections and not sections[section].rstrip(): 319 del sections[section] 320 item.set_sections(sections, self.entry.sections_start_lines) 321 item.set_params(self.entry.parameterlist, self.entry.parameterdescs, 322 self.entry.parametertypes, 323 self.entry.parameterdesc_start_lines) 324 self.entries.append(item) 325 326 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 327 328 def emit_unused_warnings(self): 329 """ 330 When the parser fails to produce a valid entry, it places some 331 warnings under `entry.warnings` that will be discarded when resetting 332 the state. 333 334 Ensure that those warnings are not lost. 335 336 .. note:: 337 338 Because we are calling `config.warning()` here, those 339 warnings are not filtered by the `-W` parameters: they will all 340 be produced even when `-Wreturn`, `-Wshort-desc`, and/or 341 `-Wcontents-before-sections` are used. 342 343 Allowing those warnings to be filtered is complex, because it 344 would require storing them in a buffer and then filtering them 345 during the output step of the code, depending on the 346 selected symbols. 347 """ 348 if self.entry and self.entry not in self.entries: 349 for log_msg in self.entry.warnings: 350 self.config.warning(log_msg) 351 352 def reset_state(self, ln): 353 """ 354 Ancillary routine to create a new entry. It initializes all 355 variables used by the state machine. 356 """ 357 358 self.emit_unused_warnings() 359 360 self.entry = KernelEntry(self.config, self.fname, ln) 361 362 # State flags 363 self.state = state.NORMAL 364 365 def push_parameter(self, ln, decl_type, param, dtype, 366 org_arg, declaration_name): 367 """ 368 Store parameters and their descriptions at self.entry. 369 """ 370 371 if self.entry.anon_struct_union and dtype == "" and param == "}": 372 return # Ignore the ending }; from anonymous struct/union 373 374 self.entry.anon_struct_union = False 375 376 param = KernRe(r'[\[\)].*').sub('', param, count=1) 377 378 # 379 # Look at various "anonymous type" cases. 380 # 381 if dtype == '': 382 if param.endswith("..."): 383 named_variadic = len(param) > 3 384 if named_variadic: # there is a name provided, use that 385 # 386 # If the user documented the parameter using the 387 # ``@name...:`` form, the description is stored in 388 # parameterdescs under the unstripped key. Migrate 389 # it to the stripped key so the user's text is not 390 # silently dropped during output, and so the new 391 # excess-parameter check in check_sections() does 392 # not flag the unstripped key as orphaned. 393 # 394 orig = self.entry.parameterdescs.pop(param, None) 395 param = param[:-3] 396 if orig is not None and \ 397 not self.entry.parameterdescs.get(param): 398 self.entry.parameterdescs[param] = orig 399 if not self.entry.parameterdescs.get(param): 400 # 401 # For a named variadic (e.g. ``args...``), emit the 402 # standard "not described" warning before auto-filling 403 # so a missing or mistyped ``@<name>:`` doc tag does 404 # not go undetected. The bare ``...`` form has no 405 # natural name for the user to document and so always 406 # gets the auto-generated text. 407 # 408 if named_variadic and decl_type == 'function': 409 self.emit_msg(ln, 410 f"function parameter '{param}' " 411 f"not described in " 412 f"'{declaration_name}'") 413 self.entry.parameterdescs[param] = "variable arguments" 414 415 elif (not param) or param == "void": 416 param = "void" 417 self.entry.parameterdescs[param] = "no arguments" 418 419 elif param in ["struct", "union"]: 420 # Handle unnamed (anonymous) union or struct 421 dtype = param 422 param = "{unnamed_" + param + "}" 423 self.entry.parameterdescs[param] = "anonymous\n" 424 self.entry.anon_struct_union = True 425 426 # Warn if parameter has no description 427 # (but ignore ones starting with # as these are not parameters 428 # but inline preprocessor statements) 429 if param not in self.entry.parameterdescs and not param.startswith("#"): 430 self.entry.parameterdescs[param] = self.undescribed 431 432 if "." not in param: 433 if decl_type == 'function': 434 dname = f"{decl_type} parameter" 435 else: 436 dname = f"{decl_type} member" 437 438 self.emit_msg(ln, 439 f"{dname} '{param}' not described in '{declaration_name}'") 440 441 # Strip spaces from param so that it is one continuous string on 442 # parameterlist. This fixes a problem where check_sections() 443 # cannot find a parameter like "addr[6 + 2]" because it actually 444 # appears as "addr[6", "+", "2]" on the parameter list. 445 # However, it's better to maintain the param string unchanged for 446 # output, so just weaken the string compare in check_sections() 447 # to ignore "[blah" in a parameter string. 448 449 self.entry.parameterlist.append(param) 450 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 451 self.entry.parametertypes[param] = org_arg 452 453 454 def create_parameter_list(self, ln, decl_type, args, 455 splitter, declaration_name): 456 """ 457 Creates a list of parameters, storing them at self.entry. 458 """ 459 460 # temporarily replace all commas inside function pointer definition 461 arg_expr = KernRe(r'(\([^\),]+),') 462 while arg_expr.search(args): 463 args = arg_expr.sub(r"\1#", args) 464 465 for arg in args.split(splitter): 466 # Ignore argument attributes 467 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 468 469 # Replace '[at_least ' with '[static '. This allows sphinx to parse 470 # array parameter declarations like 'char A[at_least 4]', where 471 # 'at_least' is #defined to 'static' by the kernel headers. 472 arg = arg.replace('[at_least ', '[static ') 473 474 # Strip leading/trailing spaces 475 arg = arg.strip() 476 arg = KernRe(r'\s+').sub(' ', arg, count=1) 477 478 if arg.startswith('#'): 479 # Treat preprocessor directive as a typeless variable just to fill 480 # corresponding data structures "correctly". Catch it later in 481 # output_* subs. 482 483 # Treat preprocessor directive as a typeless variable 484 self.push_parameter(ln, decl_type, arg, "", 485 "", declaration_name) 486 # 487 # The pointer-to-function case. 488 # 489 elif KernRe(r'\(.+\)\s*\(').search(arg): 490 arg = arg.replace('#', ',') 491 r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" 492 r'([\w\[\].]*)' # Capture the name and possible [array] 493 r'\s*\)') # Make sure the trailing ")" is there 494 if r.match(arg): 495 param = r.group(1) 496 else: 497 self.emit_msg(ln, f"Invalid param: {arg}") 498 param = arg 499 dtype = arg.replace(param, '') 500 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 501 # 502 # The array-of-pointers case. Dig the parameter name out from the middle 503 # of the declaration. 504 # 505 elif KernRe(r'\(.+\)\s*\[').search(arg): 506 r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" 507 r'([\w.]*?)' # The actual pointer name 508 r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] 509 if r.match(arg): 510 param = r.group(1) 511 else: 512 self.emit_msg(ln, f"Invalid param: {arg}") 513 param = arg 514 dtype = arg.replace(param, '') 515 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 516 elif arg: 517 # 518 # Clean up extraneous spaces and split the string at commas; the first 519 # element of the resulting list will also include the type information. 520 # 521 arg = KernRe(r'\s*:\s*').sub(":", arg) 522 arg = KernRe(r'\s*\[').sub('[', arg) 523 args = KernRe(r'\s*,\s*').split(arg) 524 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 525 # 526 # args[0] has a string of "type a". If "a" includes an [array] 527 # declaration, we want to not be fooled by any white space inside 528 # the brackets, so detect and handle that case specially. 529 # 530 r = KernRe(r'^([^[\]]*\s+)(.*)$') 531 if r.match(args[0]): 532 args[0] = r.group(2) 533 dtype = r.group(1) 534 else: 535 # No space in args[0]; this seems wrong but preserves previous behavior 536 dtype = '' 537 538 bitfield_re = KernRe(r'(.*?):(\w+)') 539 for param in args: 540 # 541 # For pointers, shift the star(s) from the variable name to the 542 # type declaration. 543 # 544 r = KernRe(r'^(\*+)\s*(.*)') 545 if r.match(param): 546 self.push_parameter(ln, decl_type, r.group(2), 547 f"{dtype} {r.group(1)}", 548 arg, declaration_name) 549 # 550 # Perform a similar shift for bitfields. 551 # 552 elif bitfield_re.search(param): 553 if dtype != "": # Skip unnamed bit-fields 554 self.push_parameter(ln, decl_type, bitfield_re.group(1), 555 f"{dtype}:{bitfield_re.group(2)}", 556 arg, declaration_name) 557 else: 558 self.push_parameter(ln, decl_type, param, dtype, 559 arg, declaration_name) 560 561 def check_sections(self, ln, decl_name, decl_type): 562 """ 563 Check for errors inside sections, emitting warnings if not found 564 parameters are described. 565 """ 566 for section in self.entry.sections: 567 if section not in self.entry.parameterlist and \ 568 not known_sections.search(section): 569 if decl_type == 'function': 570 dname = f"{decl_type} parameter" 571 else: 572 dname = f"{decl_type} member" 573 self.emit_msg(ln, 574 f"Excess {dname} '{section}' description in '{decl_name}'") 575 576 # 577 # Check that documented parameter names (from doc comments, including 578 # inline ``/** @member: */`` tags) actually match real members in 579 # the declaration. This catches mismatched or stale kernel-doc 580 # member tags that don't correspond to any actual struct/union 581 # member or function parameter. 582 # 583 for param_name, desc in self.entry.parameterdescs.items(): 584 # Skip auto-generated entries from push_parameter() 585 if desc == self.undescribed: 586 continue 587 if desc in ("no arguments", "anonymous\n", "variable arguments"): 588 continue 589 if param_name.startswith("{unnamed_"): 590 continue 591 if param_name in self.entry.parameterlist: 592 continue 593 594 if decl_type == 'function': 595 dname = f"{decl_type} parameter" 596 else: 597 dname = f"{decl_type} member" 598 self.emit_msg(ln, 599 f"Excess {dname} '{param_name}' description in '{decl_name}'") 600 601 def check_return_section(self, ln, declaration_name, return_type): 602 """ 603 If the function doesn't return void, warns about the lack of a 604 return description. 605 """ 606 607 if not self.config.wreturn: 608 return 609 610 # Ignore an empty return type (It's a macro) 611 # Ignore functions with a "void" return type (but not "void *") 612 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 613 return 614 615 if not self.entry.sections.get("Return", None): 616 self.emit_msg(ln, 617 f"No description found for return value of '{declaration_name}'") 618 619 def split_struct_proto(self, proto): 620 """ 621 Split apart a structure prototype; returns (struct|union, name, 622 members) or ``None``. 623 """ 624 625 type_pattern = r'(struct|union)' 626 qualifiers = [ 627 "__attribute__", 628 "__packed", 629 "__aligned", 630 "____cacheline_aligned_in_smp", 631 "____cacheline_aligned", 632 ] 633 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 634 635 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 636 if r.search(proto): 637 return (r.group(1), r.group(2), r.group(3)) 638 else: 639 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 640 if r.search(proto): 641 return (r.group(1), r.group(3), r.group(2)) 642 return None 643 644 def rewrite_struct_members(self, members): 645 """ 646 Process ``struct``/``union`` members from the most deeply nested 647 outward. 648 649 Rewrite the members of a ``struct`` or ``union`` for easier formatting 650 later on. Among other things, this function will turn a member like:: 651 652 struct { inner_members; } foo; 653 654 into:: 655 656 struct foo; inner_members; 657 """ 658 659 # 660 # The trick is in the ``^{`` below - it prevents a match of an outer 661 # ``struct``/``union`` until the inner one has been munged 662 # (removing the ``{`` in the process). 663 # 664 struct_members = KernRe(r'(struct|union)' # 0: declaration type 665 r'([^\{\};]+)' # 1: possible name 666 r'(\{)' 667 r'([^\{\}]*)' # 3: Contents of declaration 668 r'(\})' 669 r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration 670 tuples = struct_members.findall(members) 671 while tuples: 672 for t in tuples: 673 newmember = "" 674 oldmember = "".join(t) # Reconstruct the original formatting 675 dtype, name, lbr, content, rbr, rest, semi = t 676 # 677 # Pass through each field name, normalizing the form and formatting. 678 # 679 for s_id in rest.split(','): 680 s_id = s_id.strip() 681 newmember += f"{dtype} {s_id}; " 682 # 683 # Remove bitfield/array/pointer info, getting the bare name. 684 # 685 s_id = KernRe(r'[:\[].*').sub('', s_id) 686 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 687 # 688 # Pass through the members of this inner structure/union. 689 # 690 for arg in content.split(';'): 691 arg = arg.strip() 692 # 693 # Look for (type)(*name)(args) - pointer to function 694 # 695 r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') 696 if r.match(arg): 697 dtype, name, extra = r.group(1), r.group(2), r.group(3) 698 # Pointer-to-function 699 if not s_id: 700 # Anonymous struct/union 701 newmember += f"{dtype}{name}{extra}; " 702 else: 703 newmember += f"{dtype}{s_id}.{name}{extra}; " 704 # 705 # Otherwise a non-function member. 706 # 707 else: 708 # 709 # Remove bitmap and array portions and spaces around commas 710 # 711 arg = KernRe(r':\s*\d+\s*').sub('', arg) 712 arg = KernRe(r'\[.*\]').sub('', arg) 713 arg = KernRe(r'\s*,\s*').sub(',', arg) 714 # 715 # Look for a normal decl - "type name[,name...]" 716 # 717 r = KernRe(r'(.*)\s+([\S+,]+)') 718 if r.search(arg): 719 for name in r.group(2).split(','): 720 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) 721 if not s_id: 722 # Anonymous struct/union 723 newmember += f"{r.group(1)} {name}; " 724 else: 725 newmember += f"{r.group(1)} {s_id}.{name}; " 726 else: 727 newmember += f"{arg}; " 728 # 729 # At the end of the s_id loop, replace the original declaration with 730 # the munged version. 731 # 732 members = members.replace(oldmember, newmember) 733 # 734 # End of the tuple loop - search again and see if there are outer members 735 # that now turn up. 736 # 737 tuples = struct_members.findall(members) 738 return members 739 740 def format_struct_decl(self, declaration): 741 """ 742 Format the ``struct`` declaration into a standard form for inclusion 743 in the resulting docs. 744 """ 745 746 # 747 # Insert newlines, get rid of extra spaces. 748 # 749 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 750 declaration = KernRe(r'\}\s+;').sub('};', declaration) 751 # 752 # Format inline enums with each member on its own line. 753 # 754 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 755 while r.search(declaration): 756 declaration = r.sub(r'\1,\n\2', declaration) 757 # 758 # Now go through and supply the right number of tabs 759 # for each line. 760 # 761 def_args = declaration.split('\n') 762 level = 1 763 declaration = "" 764 for clause in def_args: 765 clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) 766 if clause: 767 if '}' in clause and level > 1: 768 level -= 1 769 if not clause.startswith('#'): 770 declaration += "\t" * level 771 declaration += "\t" + clause + "\n" 772 if "{" in clause and "}" not in clause: 773 level += 1 774 return declaration 775 776 777 def dump_struct(self, ln, proto, source): 778 """ 779 Store an entry for a ``struct`` or ``union`` 780 """ 781 # 782 # Do the basic parse to get the pieces of the declaration. 783 # 784 source = source 785 proto = trim_private_members(proto) 786 struct_parts = self.split_struct_proto(proto) 787 if not struct_parts: 788 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 789 return 790 decl_type, declaration_name, members = struct_parts 791 792 if self.entry.identifier != declaration_name: 793 self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " 794 f"Prototype was for {decl_type} {declaration_name} instead\n") 795 return 796 # 797 # Go through the list of members applying all of our transformations. 798 # 799 members = self.xforms.apply("struct", members) 800 801 # 802 # Deal with embedded struct and union members, and drop enums entirely. 803 # 804 declaration = members 805 members = self.rewrite_struct_members(members) 806 members = re.sub(r'(\{[^\{\}]*\})', '', members) 807 # 808 # Output the result and we are done. 809 # 810 self.create_parameter_list(ln, decl_type, members, ';', 811 declaration_name) 812 self.check_sections(ln, declaration_name, decl_type) 813 self.output_declaration(decl_type, declaration_name, 814 source=source, 815 definition=self.format_struct_decl(declaration), 816 purpose=self.entry.declaration_purpose) 817 818 def dump_enum(self, ln, proto, source): 819 """ 820 Store an ``enum`` inside self.entries array. 821 """ 822 # 823 # Strip preprocessor directives. Note that this depends on the 824 # trailing semicolon we added in process_proto_type(). 825 # 826 source = source 827 proto = trim_private_members(proto) 828 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 829 # 830 # Parse out the name and members of the enum. Typedef form first. 831 # 832 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 833 if r.search(proto): 834 declaration_name = r.group(2) 835 members = r.group(1) 836 # 837 # Failing that, look for a straight enum 838 # 839 else: 840 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 841 if r.match(proto): 842 declaration_name = r.group(1) 843 members = r.group(2) 844 # 845 # OK, this isn't going to work. 846 # 847 else: 848 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 849 return 850 # 851 # Make sure we found what we were expecting. 852 # 853 if self.entry.identifier != declaration_name: 854 if self.entry.identifier == "": 855 self.emit_msg(ln, 856 f"{proto}: wrong kernel-doc identifier on prototype") 857 else: 858 self.emit_msg(ln, 859 f"expecting prototype for enum {self.entry.identifier}. " 860 f"Prototype was for enum {declaration_name} instead") 861 return 862 863 if not declaration_name: 864 declaration_name = "(anonymous)" 865 # 866 # Parse out the name of each enum member, and verify that we 867 # have a description for it. 868 # 869 member_set = set() 870 members = KernRe(r'\([^;)]*\)').sub('', members) 871 for arg in members.split(','): 872 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 873 if not arg.strip(): 874 continue 875 876 self.entry.parameterlist.append(arg) 877 if arg not in self.entry.parameterdescs: 878 self.entry.parameterdescs[arg] = self.undescribed 879 self.emit_msg(ln, 880 f"Enum value '{arg}' not described in enum '{declaration_name}'") 881 member_set.add(arg) 882 # 883 # Ensure that every described member actually exists in the enum. 884 # 885 for k in self.entry.parameterdescs: 886 if k not in member_set: 887 self.emit_msg(ln, 888 f"Excess enum value '@{k}' description in '{declaration_name}'") 889 890 self.output_declaration('enum', declaration_name, 891 source=source, 892 purpose=self.entry.declaration_purpose) 893 894 def dump_var(self, ln, proto, source): 895 """ 896 Store variables that are part of kAPI. 897 """ 898 VAR_ATTRIBS = [ 899 "extern", 900 "const", 901 ] 902 OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*" 903 904 # 905 # Store the full prototype before modifying it 906 # 907 source = source 908 full_proto = proto 909 declaration_name = None 910 911 # 912 # Handle macro definitions 913 # 914 macro_prefixes = [ 915 KernRe(r"DEFINE_[\w_]+\s*\(([\w_]+)\)"), 916 ] 917 918 for r in macro_prefixes: 919 match = r.search(proto) 920 if match: 921 declaration_name = match.group(1) 922 break 923 924 # 925 # Drop comments and macros to have a pure C prototype 926 # 927 if not declaration_name: 928 proto = self.xforms.apply("var", proto) 929 930 proto = proto.rstrip() 931 932 # 933 # Variable name is at the end of the declaration 934 # 935 936 default_val = None 937 938 r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") 939 if r.match(proto): 940 if not declaration_name: 941 declaration_name = r.group(1) 942 943 default_val = r.group(2) 944 else: 945 r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") 946 947 if r.match(proto): 948 default_val = r.group(1) 949 if not declaration_name: 950 self.emit_msg(ln,f"{proto}: can't parse variable") 951 return 952 953 if default_val: 954 default_val = default_val.lstrip("=").strip() 955 956 self.output_declaration("var", declaration_name, 957 source=source, 958 full_proto=full_proto, 959 default_val=default_val, 960 purpose=self.entry.declaration_purpose) 961 962 def dump_declaration(self, ln, prototype, source): 963 """ 964 Store a data declaration inside self.entries array. 965 """ 966 967 if self.entry.decl_type == "enum": 968 self.dump_enum(ln, prototype, source) 969 elif self.entry.decl_type == "typedef": 970 self.dump_typedef(ln, prototype, source) 971 elif self.entry.decl_type in ["union", "struct"]: 972 self.dump_struct(ln, prototype, source) 973 elif self.entry.decl_type == "var": 974 self.dump_var(ln, prototype, source) 975 else: 976 # This would be a bug 977 self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') 978 979 def dump_function(self, ln, prototype, source): 980 """ 981 Store a function or function macro inside self.entries array. 982 """ 983 984 source = source 985 found = func_macro = False 986 return_type = '' 987 decl_type = 'function' 988 989 # 990 # If we have a macro, remove the "#define" at the front. 991 # 992 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 993 if new_proto != prototype: 994 prototype = new_proto 995 # 996 # Dispense with the simple "#define A B" case here; the key 997 # is the space after the name of the symbol being defined. 998 # NOTE that the seemingly misnamed "func_macro" indicates a 999 # macro *without* arguments. 1000 # 1001 r = KernRe(r'^(\w+)\s+') 1002 if r.search(prototype): 1003 return_type = '' 1004 declaration_name = r.group(1) 1005 func_macro = True 1006 found = True 1007 else: 1008 # 1009 # Apply the initial transformations. 1010 # 1011 prototype = self.xforms.apply("func", prototype) 1012 1013 # Yes, this truly is vile. We are looking for: 1014 # 1. Return type (may be nothing if we're looking at a macro) 1015 # 2. Function name 1016 # 3. Function parameters. 1017 # 1018 # All the while we have to watch out for function pointer parameters 1019 # (which IIRC is what the two sections are for), C types (these 1020 # regexps don't even start to express all the possibilities), and 1021 # so on. 1022 # 1023 # If you mess with these regexps, it's a good idea to check that 1024 # the following functions' documentation still comes out right: 1025 # - parport_register_device (function pointer parameters) 1026 # - atomic_set (macro) 1027 # - pci_match_device, __copy_to_user (long return type) 1028 1029 name = r'\w+' 1030 type1 = r'(?:[\w\s]+)?' 1031 type2 = r'(?:[\w\s]+\*+)+' 1032 # 1033 # Attempt to match first on (args) with no internal parentheses; this 1034 # lets us easily filter out __acquires() and other post-args stuff. If 1035 # that fails, just grab the rest of the line to the last closing 1036 # parenthesis. 1037 # 1038 proto_args = r'\(([^\(]*|.*)\)' 1039 # 1040 # (Except for the simple macro case) attempt to split up the prototype 1041 # in the various ways we understand. 1042 # 1043 if not found: 1044 patterns = [ 1045 rf'^()({name})\s*{proto_args}', 1046 rf'^({type1})\s+({name})\s*{proto_args}', 1047 rf'^({type2})\s*({name})\s*{proto_args}', 1048 ] 1049 1050 for p in patterns: 1051 r = KernRe(p) 1052 if r.match(prototype): 1053 return_type = r.group(1) 1054 declaration_name = r.group(2) 1055 args = r.group(3) 1056 self.create_parameter_list(ln, decl_type, args, ',', 1057 declaration_name) 1058 found = True 1059 break 1060 # 1061 # Parsing done; make sure that things are as we expect. 1062 # 1063 if not found: 1064 self.emit_msg(ln, 1065 f"cannot understand function prototype: '{prototype}'") 1066 return 1067 if self.entry.identifier != declaration_name: 1068 self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " 1069 f"Prototype was for {declaration_name}() instead") 1070 return 1071 self.check_sections(ln, declaration_name, "function") 1072 self.check_return_section(ln, declaration_name, return_type) 1073 # 1074 # Store the result. 1075 # 1076 self.output_declaration(decl_type, declaration_name, 1077 source=source, 1078 typedef=('typedef' in return_type), 1079 functiontype=return_type, 1080 purpose=self.entry.declaration_purpose, 1081 func_macro=func_macro) 1082 1083 1084 def dump_typedef(self, ln, proto, source): 1085 """ 1086 Store a ``typedef`` inside self.entries array. 1087 """ 1088 # 1089 # We start by looking for function typedefs. 1090 # 1091 typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1092 typedef_ident = r'\*?\s*(\w\S+)\s*' 1093 typedef_args = r'\s*\((.*)\);' 1094 1095 source = source 1096 1097 typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1098 typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) 1099 1100 # Parse function typedef prototypes 1101 for r in [typedef1, typedef2]: 1102 if not r.match(proto): 1103 continue 1104 1105 return_type = r.group(1).strip() 1106 declaration_name = r.group(2) 1107 args = r.group(3) 1108 1109 if self.entry.identifier != declaration_name: 1110 self.emit_msg(ln, 1111 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1112 return 1113 1114 self.create_parameter_list(ln, 'function', args, ',', declaration_name) 1115 1116 self.output_declaration('function', declaration_name, 1117 source=source, 1118 typedef=True, 1119 functiontype=return_type, 1120 purpose=self.entry.declaration_purpose) 1121 return 1122 # 1123 # Not a function, try to parse a simple typedef. 1124 # 1125 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1126 if r.match(proto): 1127 declaration_name = r.group(1) 1128 1129 if self.entry.identifier != declaration_name: 1130 self.emit_msg(ln, 1131 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1132 return 1133 1134 self.output_declaration('typedef', declaration_name, 1135 source=source, 1136 purpose=self.entry.declaration_purpose) 1137 return 1138 1139 self.emit_msg(ln, "error: Cannot parse typedef!") 1140 1141 @staticmethod 1142 def process_export(function_set, line): 1143 """ 1144 process ``EXPORT_SYMBOL*`` tags 1145 1146 This method doesn't use any variable from the class, so declare it 1147 with a staticmethod decorator. 1148 """ 1149 1150 # We support documenting some exported symbols with different 1151 # names. A horrible hack. 1152 suffixes = [ '_noprof' ] 1153 1154 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1155 # multiple export lines would violate Kernel coding style. 1156 1157 if export_symbol.search(line): 1158 symbol = export_symbol.group(2) 1159 elif export_symbol_ns.search(line): 1160 symbol = export_symbol_ns.group(2) 1161 else: 1162 return False 1163 # 1164 # Found an export, trim out any special suffixes 1165 # 1166 for suffix in suffixes: 1167 # Be backward compatible with Python < 3.9 1168 if symbol.endswith(suffix): 1169 symbol = symbol[:-len(suffix)] 1170 function_set.add(symbol) 1171 return True 1172 1173 def process_normal(self, ln, line, source): 1174 """ 1175 STATE_NORMAL: looking for the ``/**`` to begin everything. 1176 """ 1177 1178 if not doc_start.match(line): 1179 return 1180 1181 # start a new entry 1182 self.reset_state(ln) 1183 1184 # next line is always the function name 1185 self.state = state.NAME 1186 1187 def process_name(self, ln, line, source): 1188 """ 1189 STATE_NAME: Looking for the "name - description" line 1190 """ 1191 # 1192 # Check for a DOC: block and handle them specially. 1193 # 1194 if doc_block.search(line): 1195 1196 if not doc_block.group(1): 1197 self.entry.begin_section(ln, "Introduction") 1198 else: 1199 self.entry.begin_section(ln, doc_block.group(1)) 1200 1201 self.entry.identifier = self.entry.section 1202 self.state = state.DOCBLOCK 1203 # 1204 # Otherwise we're looking for a normal kerneldoc declaration line. 1205 # 1206 elif doc_decl.search(line): 1207 self.entry.identifier = doc_decl.group(1) 1208 1209 # Test for data declaration 1210 if doc_begin_data.search(line): 1211 self.entry.decl_type = doc_begin_data.group(1) 1212 self.entry.identifier = doc_begin_data.group(2) 1213 # 1214 # Look for a function description 1215 # 1216 elif doc_begin_func.search(line): 1217 self.entry.identifier = doc_begin_func.group(1) 1218 self.entry.decl_type = "function" 1219 # 1220 # We struck out. 1221 # 1222 else: 1223 self.emit_msg(ln, 1224 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}") 1225 self.state = state.NORMAL 1226 return 1227 # 1228 # OK, set up for a new kerneldoc entry. 1229 # 1230 self.state = state.BODY 1231 self.entry.identifier = self.entry.identifier.strip(" ") 1232 # if there's no @param blocks need to set up default section here 1233 self.entry.begin_section(ln + 1) 1234 # 1235 # Find the description portion, which *should* be there but 1236 # isn't always. 1237 # (We should be able to capture this from the previous parsing - someday) 1238 # 1239 r = KernRe("[-:](.*)") 1240 if r.search(line): 1241 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1242 self.state = state.DECLARATION 1243 else: 1244 self.entry.declaration_purpose = "" 1245 1246 if not self.entry.declaration_purpose and self.config.wshort_desc: 1247 self.emit_msg(ln, 1248 f"missing initial short description on line:\n{line}") 1249 1250 if not self.entry.identifier and self.entry.decl_type != "enum": 1251 self.emit_msg(ln, 1252 f"wrong kernel-doc identifier on line:\n{line}") 1253 self.state = state.NORMAL 1254 1255 if self.config.verbose: 1256 self.emit_msg(ln, 1257 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1258 warning=False) 1259 # 1260 # Failed to find an identifier. Emit a warning 1261 # 1262 else: 1263 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1264 1265 def is_new_section(self, ln, line): 1266 """ 1267 Helper function to determine if a new section is being started. 1268 """ 1269 if doc_sect.search(line): 1270 self.state = state.BODY 1271 # 1272 # Pick out the name of our new section, tweaking it if need be. 1273 # 1274 newsection = doc_sect.group(1) 1275 if newsection.lower() == 'description': 1276 newsection = 'Description' 1277 elif newsection.lower() == 'context': 1278 newsection = 'Context' 1279 self.state = state.SPECIAL_SECTION 1280 elif newsection.lower() in ["@return", "@returns", 1281 "return", "returns"]: 1282 newsection = "Return" 1283 self.state = state.SPECIAL_SECTION 1284 elif newsection[0] == '@': 1285 self.state = state.SPECIAL_SECTION 1286 # 1287 # Initialize the contents, and get the new section going. 1288 # 1289 newcontents = doc_sect.group(2) 1290 if not newcontents: 1291 newcontents = "" 1292 self.dump_section() 1293 self.entry.begin_section(ln, newsection) 1294 self.entry.leading_space = None 1295 1296 self.entry.add_text(newcontents.lstrip()) 1297 return True 1298 return False 1299 1300 def is_comment_end(self, ln, line): 1301 """ 1302 Helper function to detect (and effect) the end of a kerneldoc comment. 1303 """ 1304 if doc_end.search(line): 1305 self.dump_section() 1306 1307 # Look for doc_com + <text> + doc_end: 1308 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') 1309 if r.match(line): 1310 self.emit_msg(ln, f"suspicious ending line: {line}") 1311 1312 self.entry.prototype = "" 1313 self.entry.new_start_line = ln + 1 1314 1315 self.state = state.PROTO 1316 return True 1317 return False 1318 1319 1320 def process_decl(self, ln, line, source): 1321 """ 1322 STATE_DECLARATION: We've seen the beginning of a declaration. 1323 """ 1324 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1325 return 1326 # 1327 # Look for anything with the " * " line beginning. 1328 # 1329 if doc_content.search(line): 1330 cont = doc_content.group(1) 1331 # 1332 # A blank line means that we have moved out of the declaration 1333 # part of the comment (without any "special section" parameter 1334 # descriptions). 1335 # 1336 if cont == "": 1337 self.state = state.BODY 1338 # 1339 # Otherwise we have more of the declaration section to soak up. 1340 # 1341 else: 1342 self.entry.declaration_purpose = \ 1343 trim_whitespace(self.entry.declaration_purpose + ' ' + cont) 1344 else: 1345 # Unknown line, ignore 1346 self.emit_msg(ln, f"bad line: {line}") 1347 1348 1349 def process_special(self, ln, line, source): 1350 """ 1351 STATE_SPECIAL_SECTION: a section ending with a blank line. 1352 """ 1353 # 1354 # If we have hit a blank line (only the " * " marker), then this 1355 # section is done. 1356 # 1357 if KernRe(r"\s*\*\s*$").match(line): 1358 self.entry.begin_section(ln, dump = True) 1359 self.state = state.BODY 1360 return 1361 # 1362 # Not a blank line, look for the other ways to end the section. 1363 # 1364 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1365 return 1366 # 1367 # OK, we should have a continuation of the text for this section. 1368 # 1369 if doc_content.search(line): 1370 cont = doc_content.group(1) 1371 # 1372 # If the lines of text after the first in a special section have 1373 # leading white space, we need to trim it out or Sphinx will get 1374 # confused. For the second line (the None case), see what we 1375 # find there and remember it. 1376 # 1377 if self.entry.leading_space is None: 1378 r = KernRe(r'^(\s+)') 1379 if r.match(cont): 1380 self.entry.leading_space = len(r.group(1)) 1381 else: 1382 self.entry.leading_space = 0 1383 # 1384 # Otherwise, before trimming any leading chars, be *sure* 1385 # that they are white space. We should maybe warn if this 1386 # isn't the case. 1387 # 1388 for i in range(0, self.entry.leading_space): 1389 if cont[i] != " ": 1390 self.entry.leading_space = i 1391 break 1392 # 1393 # Add the trimmed result to the section and we're done. 1394 # 1395 self.entry.add_text(cont[self.entry.leading_space:]) 1396 else: 1397 # Unknown line, ignore 1398 self.emit_msg(ln, f"bad line: {line}") 1399 1400 def process_body(self, ln, line, source): 1401 """ 1402 STATE_BODY: the bulk of a kerneldoc comment. 1403 """ 1404 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1405 return 1406 1407 if doc_content.search(line): 1408 cont = doc_content.group(1) 1409 self.entry.add_text(cont) 1410 else: 1411 # Unknown line, ignore 1412 self.emit_msg(ln, f"bad line: {line}") 1413 1414 def process_inline_name(self, ln, line, source): 1415 """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" 1416 1417 if doc_inline_sect.search(line): 1418 self.entry.begin_section(ln, doc_inline_sect.group(1)) 1419 self.entry.add_text(doc_inline_sect.group(2).lstrip()) 1420 self.state = state.INLINE_TEXT 1421 elif doc_inline_end.search(line): 1422 self.dump_section() 1423 self.state = state.PROTO 1424 elif doc_content.search(line): 1425 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") 1426 self.state = state.PROTO 1427 1428 # 1429 # Don't let it add partial comments at the code, as breaks the 1430 # logic meant to remove comments from prototypes. 1431 # 1432 self.process_proto_type(ln, "/**\n" + line, source) 1433 # else ... ?? 1434 1435 def process_inline_text(self, ln, line, source): 1436 """STATE_INLINE_TEXT: docbook comments within a prototype.""" 1437 1438 if doc_inline_end.search(line): 1439 self.dump_section() 1440 self.state = state.PROTO 1441 elif doc_content.search(line): 1442 self.entry.add_text(doc_content.group(1)) 1443 # else ... ?? 1444 1445 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1446 """ 1447 Handle syscall definitions. 1448 """ 1449 1450 is_void = False 1451 1452 # Strip newlines/CR's 1453 proto = re.sub(r'[\r\n]+', ' ', proto) 1454 1455 # Check if it's a SYSCALL_DEFINE0 1456 if 'SYSCALL_DEFINE0' in proto: 1457 is_void = True 1458 1459 # Replace SYSCALL_DEFINE with correct return type & function name 1460 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1461 1462 r = KernRe(r'long\s+(sys_.*?),') 1463 if r.search(proto): 1464 proto = KernRe(',').sub('(', proto, count=1) 1465 elif is_void: 1466 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1467 1468 # Now delete all of the odd-numbered commas in the proto 1469 # so that argument types & names don't have a comma between them 1470 count = 0 1471 length = len(proto) 1472 1473 if is_void: 1474 length = 0 # skip the loop if is_void 1475 1476 for ix in range(length): 1477 if proto[ix] == ',': 1478 count += 1 1479 if count % 2 == 1: 1480 proto = proto[:ix] + ' ' + proto[ix + 1:] 1481 1482 return proto 1483 1484 def tracepoint_munge(self, ln, proto): 1485 """ 1486 Handle tracepoint definitions. 1487 """ 1488 1489 tracepointname = None 1490 tracepointargs = None 1491 1492 # Match tracepoint name based on different patterns 1493 r = KernRe(r'TRACE_EVENT\((.*?),') 1494 if r.search(proto): 1495 tracepointname = r.group(1) 1496 1497 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1498 if r.search(proto): 1499 tracepointname = r.group(1) 1500 1501 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1502 if r.search(proto): 1503 tracepointname = r.group(2) 1504 1505 if tracepointname: 1506 tracepointname = tracepointname.lstrip() 1507 1508 r = KernRe(r'TP_PROTO\((.*?)\)') 1509 if r.search(proto): 1510 tracepointargs = r.group(1) 1511 1512 if not tracepointname or not tracepointargs: 1513 self.emit_msg(ln, 1514 f"Unrecognized tracepoint format:\n{proto}\n") 1515 else: 1516 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1517 self.entry.identifier = f"trace_{self.entry.identifier}" 1518 1519 return proto 1520 1521 def process_proto_function(self, ln, line, source): 1522 """Ancillary routine to process a function prototype.""" 1523 1524 # strip C99-style comments to end of line 1525 line = KernRe(r"//.*$", re.S).sub('', line) 1526 # 1527 # Soak up the line's worth of prototype text, stopping at { or ; if present. 1528 # 1529 if KernRe(r'\s*#\s*define').match(line): 1530 self.entry.prototype = line 1531 elif not line.startswith('#'): # skip other preprocessor stuff 1532 r = KernRe(r'([^\{]*)') 1533 if r.match(line): 1534 self.entry.prototype += r.group(1) + " " 1535 # 1536 # If we now have the whole prototype, clean it up and declare victory. 1537 # 1538 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1539 # strip comments and surrounding spaces 1540 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() 1541 # 1542 # Handle self.entry.prototypes for function pointers like: 1543 # int (*pcs_config)(struct foo) 1544 # by turning it into 1545 # int pcs_config(struct foo) 1546 # 1547 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1548 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1549 # 1550 # Handle special declaration syntaxes 1551 # 1552 if 'SYSCALL_DEFINE' in self.entry.prototype: 1553 self.entry.prototype = self.syscall_munge(ln, 1554 self.entry.prototype) 1555 else: 1556 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1557 if r.search(self.entry.prototype): 1558 self.entry.prototype = self.tracepoint_munge(ln, 1559 self.entry.prototype) 1560 # 1561 # ... and we're done 1562 # 1563 self.dump_function(ln, self.entry.prototype, source) 1564 self.reset_state(ln) 1565 1566 def process_proto_type(self, ln, line, source): 1567 """ 1568 Ancillary routine to process a type. 1569 """ 1570 1571 # Strip C99-style comments and surrounding whitespace 1572 line = KernRe(r"//.*$", re.S).sub('', line).strip() 1573 if not line: 1574 return # nothing to see here 1575 1576 # To distinguish preprocessor directive from regular declaration later. 1577 if line.startswith('#'): 1578 line += ";" 1579 # 1580 # Split the declaration on any of { } or ;, and accumulate pieces 1581 # until we hit a semicolon while not inside {brackets} 1582 # 1583 r = KernRe(r'(.*?)([{};])') 1584 for chunk in r.split(line): 1585 if chunk: # Ignore empty matches 1586 self.entry.prototype += chunk 1587 # 1588 # This cries out for a match statement ... someday after we can 1589 # drop Python 3.9 ... 1590 # 1591 if chunk == '{': 1592 self.entry.brcount += 1 1593 elif chunk == '}': 1594 self.entry.brcount -= 1 1595 elif chunk == ';' and self.entry.brcount <= 0: 1596 self.dump_declaration(ln, self.entry.prototype, source) 1597 self.reset_state(ln) 1598 return 1599 # 1600 # We hit the end of the line while still in the declaration; put 1601 # in a space to represent the newline. 1602 # 1603 self.entry.prototype += ' ' 1604 1605 def process_proto(self, ln, line, source): 1606 """STATE_PROTO: reading a function/whatever prototype.""" 1607 1608 if doc_inline_oneline.search(line): 1609 self.entry.begin_section(ln, doc_inline_oneline.group(1)) 1610 self.entry.add_text(doc_inline_oneline.group(2)) 1611 self.dump_section() 1612 1613 elif doc_inline_start.search(line): 1614 self.state = state.INLINE_NAME 1615 1616 elif self.entry.decl_type == 'function': 1617 self.process_proto_function(ln, line, source) 1618 1619 else: 1620 self.process_proto_type(ln, line, source) 1621 1622 def process_docblock(self, ln, line, source): 1623 """STATE_DOCBLOCK: within a ``DOC:`` block.""" 1624 1625 if doc_end.search(line): 1626 self.dump_section() 1627 self.output_declaration("doc", self.entry.identifier, 1628 source=source) 1629 self.reset_state(ln) 1630 1631 elif doc_content.search(line): 1632 self.entry.add_text(doc_content.group(1)) 1633 1634 def parse_export(self): 1635 """ 1636 Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file. 1637 """ 1638 1639 export_table = set() 1640 1641 try: 1642 with open(self.fname, "r", encoding="utf8", 1643 errors="backslashreplace") as fp: 1644 1645 for line in fp: 1646 self.process_export(export_table, line) 1647 1648 except IOError: 1649 return None 1650 1651 return export_table 1652 1653 #: The state/action table telling us which function to invoke in each state. 1654 state_actions = { 1655 state.NORMAL: process_normal, 1656 state.NAME: process_name, 1657 state.BODY: process_body, 1658 state.DECLARATION: process_decl, 1659 state.SPECIAL_SECTION: process_special, 1660 state.INLINE_NAME: process_inline_name, 1661 state.INLINE_TEXT: process_inline_text, 1662 state.PROTO: process_proto, 1663 state.DOCBLOCK: process_docblock, 1664 } 1665 1666 def parse_kdoc(self): 1667 """ 1668 Open and process each line of a C source file. 1669 The parsing is controlled via a state machine, and the line is passed 1670 to a different process function depending on the state. The process 1671 function may update the state as needed. 1672 1673 Besides parsing kernel-doc tags, it also parses export symbols. 1674 """ 1675 1676 prev = "" 1677 prev_ln = None 1678 export_table = set() 1679 self.state = state.NORMAL 1680 source = "" 1681 1682 try: 1683 with open(self.fname, "r", encoding="utf8", 1684 errors="backslashreplace") as fp: 1685 for ln, line in enumerate(fp): 1686 1687 line = line.expandtabs().strip("\n") 1688 1689 # Group continuation lines on prototypes 1690 if self.state == state.PROTO: 1691 if line.endswith("\\"): 1692 prev += line.rstrip("\\") 1693 if not prev_ln: 1694 prev_ln = ln 1695 continue 1696 1697 if prev: 1698 ln = prev_ln 1699 line = prev + line 1700 prev = "" 1701 prev_ln = None 1702 1703 self.config.log.debug("%d %s: %s", 1704 ln, state.name[self.state], 1705 line) 1706 1707 if self.store_src: 1708 if source and self.state == state.NORMAL: 1709 source = "" 1710 elif self.state != state.NORMAL: 1711 source += line + "\n" 1712 1713 # This is an optimization over the original script. 1714 # There, when export_file was used for the same file, 1715 # it was read twice. Here, we use the already-existing 1716 # loop to parse exported symbols as well. 1717 # 1718 if (self.state != state.NORMAL) or \ 1719 not self.process_export(export_table, line): 1720 prev_state = self.state 1721 # Hand this line to the appropriate state handler 1722 self.state_actions[self.state](self, ln, line, source) 1723 if prev_state == state.NORMAL and self.state != state.NORMAL: 1724 source += line + "\n" 1725 1726 self.emit_unused_warnings() 1727 1728 except OSError: 1729 self.config.log.error(f"Error: Cannot open file {self.fname}") 1730 1731 return export_table, self.entries 1732