1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8Classes and functions related to reading a C language source or header FILE 9and extract embedded documentation comments from it. 10""" 11 12import sys 13import re 14from pprint import pformat 15 16from kdoc.c_lex import CTokenizer, tokenizer_set_log 17from kdoc.kdoc_re import KernRe 18from kdoc.kdoc_item import KdocItem 19 20# 21# Regular expressions used to parse kernel-doc markups at KernelDoc class. 22# 23# Let's declare them in lowercase outside any class to make it easier to 24# convert from the Perl script. 25# 26# As those are evaluated at the beginning, no need to cache them 27# 28 29# Allow whitespace at end of comment start. 30doc_start = KernRe(r'^/\*\*\s*$', cache=False) 31 32doc_end = KernRe(r'\*/', cache=False) 33doc_com = KernRe(r'\s*\*\s*', cache=False) 34doc_com_body = KernRe(r'\s*\* ?', cache=False) 35doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 36 37# @params and a strictly limited set of supported section names 38# Specifically: 39# Match @word: 40# @...: 41# @{section-name}: 42# while trying to not match literal block starts like "example::" 43# 44known_section_names = 'description|context|returns?|notes?|examples?' 45known_sections = KernRe(known_section_names, flags = re.I) 46doc_sect = doc_com + \ 47 KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', 48 flags=re.I, cache=False) 49 50doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 51doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 52doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 53doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 54doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@\s*[\w][\w\.]*\s*):\s*(.*)\s*\*/\s*$', cache=False) 55 56export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 57export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 58 59type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 60 61# 62# Tests for the beginning of a kerneldoc block in its various forms. 63# 64doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 65doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef|var)\b\s*(\w*)", cache = False) 66doc_begin_func = KernRe(str(doc_com) + # initial " * ' 67 r"(?:\w+\s*\*\s*)?" + # type (not captured) 68 r'(?:define\s+)?' + # possible "define" (not captured) 69 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 70 r'(?:[-:].*)?$', # description (not captured) 71 cache = False) 72 73# 74# Ancillary functions 75# 76 77multi_space = KernRe(r'\s\s+') 78def trim_whitespace(s): 79 """ 80 A little helper to get rid of excess white space. 81 """ 82 return multi_space.sub(' ', s.strip()) 83 84def trim_private_members(text): 85 """ 86 Remove ``struct``/``enum`` members that have been marked "private". 87 """ 88 89 tokens = CTokenizer(text) 90 return str(tokens) 91 92class state: 93 """ 94 States used by the parser's state machine. 95 """ 96 97 # Parser states 98 NORMAL = 0 #: Normal code. 99 NAME = 1 #: Looking for function name. 100 DECLARATION = 2 #: We have seen a declaration which might not be done. 101 BODY = 3 #: The body of the comment. 102 SPECIAL_SECTION = 4 #: Doc section ending with a blank line. 103 PROTO = 5 #: Scanning prototype. 104 DOCBLOCK = 6 #: Documentation block. 105 INLINE_NAME = 7 #: Gathering doc outside main block. 106 INLINE_TEXT = 8 #: Reading the body of inline docs. 107 108 #: Names for each parser state. 109 name = [ 110 "NORMAL", 111 "NAME", 112 "DECLARATION", 113 "BODY", 114 "SPECIAL_SECTION", 115 "PROTO", 116 "DOCBLOCK", 117 "INLINE_NAME", 118 "INLINE_TEXT", 119 ] 120 121 122SECTION_DEFAULT = "Description" #: Default section. 123 124class KernelEntry: 125 """ 126 Encapsulates a Kernel documentation entry. 127 """ 128 129 def __init__(self, config, fname, ln): 130 self.config = config 131 self.fname = fname 132 133 self._contents = [] 134 self.prototype = "" 135 136 self.warnings = [] 137 138 self.parameterlist = [] 139 self.parameterdescs = {} 140 self.parametertypes = {} 141 self.parameterdesc_start_lines = {} 142 143 self.section_start_lines = {} 144 self.sections = {} 145 146 self.anon_struct_union = False 147 148 self.leading_space = None 149 150 self.fname = fname 151 152 # State flags 153 self.brcount = 0 154 self.declaration_start_line = ln + 1 155 156 # 157 # Management of section contents 158 # 159 def add_text(self, text): 160 """Add a new text to the entry contents list.""" 161 self._contents.append(text) 162 163 def contents(self): 164 """Returns a string with all content texts that were added.""" 165 return '\n'.join(self._contents) + '\n' 166 167 # TODO: rename to emit_message after removal of kernel-doc.pl 168 def emit_msg(self, ln, msg, *, warning=True): 169 """Emit a message.""" 170 171 log_msg = f"{self.fname}:{ln} {msg}" 172 173 if not warning: 174 self.config.log.info(log_msg) 175 return 176 177 # Delegate warning output to output logic, as this way it 178 # will report warnings/info only for symbols that are output 179 180 self.warnings.append(log_msg) 181 return 182 183 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): 184 """ 185 Begin a new section. 186 """ 187 if dump: 188 self.dump_section(start_new = True) 189 self.section = title 190 self.new_start_line = line_no 191 192 def dump_section(self, start_new=True): 193 """ 194 Dumps section contents to arrays/hashes intended for that purpose. 195 """ 196 # 197 # If we have accumulated no contents in the default ("description") 198 # section, don't bother. 199 # 200 if self.section == SECTION_DEFAULT and not self._contents: 201 return 202 name = self.section 203 contents = self.contents() 204 205 if type_param.match(name): 206 name = type_param.group(1) 207 208 self.parameterdescs[name] = contents 209 self.parameterdesc_start_lines[name] = self.new_start_line 210 211 self.new_start_line = 0 212 213 else: 214 if name in self.sections and self.sections[name] != "": 215 # Only warn on user-specified duplicate section names 216 if name != SECTION_DEFAULT: 217 self.emit_msg(self.new_start_line, 218 f"duplicate section name '{name}'") 219 # Treat as a new paragraph - add a blank line 220 self.sections[name] += '\n' + contents 221 else: 222 self.sections[name] = contents 223 self.section_start_lines[name] = self.new_start_line 224 self.new_start_line = 0 225 226# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 227 228 if start_new: 229 self.section = SECTION_DEFAULT 230 self._contents = [] 231 232python_warning = False 233 234class KernelDoc: 235 """ 236 Read a C language source or header FILE and extract embedded 237 documentation comments. 238 """ 239 240 #: Name of context section. 241 section_context = "Context" 242 243 #: Name of return section. 244 section_return = "Return" 245 246 #: String to write when a parameter is not described. 247 undescribed = "-- undescribed --" 248 249 def __init__(self, config, fname, xforms): 250 """Initialize internal variables""" 251 252 self.fname = fname 253 self.config = config 254 self.xforms = xforms 255 256 tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ") 257 258 # Initial state for the state machines 259 self.state = state.NORMAL 260 261 # Store entry currently being processed 262 self.entry = None 263 264 # Place all potential outputs into an array 265 self.entries = [] 266 267 # 268 # We need Python 3.7 for its "dicts remember the insertion 269 # order" guarantee 270 # 271 global python_warning 272 if (not python_warning and 273 sys.version_info.major == 3 and sys.version_info.minor < 7): 274 275 self.emit_msg(0, 276 'Python 3.7 or later is required for correct results') 277 python_warning = True 278 279 def emit_msg(self, ln, msg, *, warning=True): 280 """Emit a message""" 281 282 if self.entry: 283 self.entry.emit_msg(ln, msg, warning=warning) 284 return 285 286 log_msg = f"{self.fname}:{ln} {msg}" 287 288 if warning: 289 self.config.log.warning(log_msg) 290 else: 291 self.config.log.info(log_msg) 292 293 def dump_section(self, start_new=True): 294 """ 295 Dump section contents to arrays/hashes intended for that purpose. 296 """ 297 298 if self.entry: 299 self.entry.dump_section(start_new) 300 301 # TODO: rename it to store_declaration after removal of kernel-doc.pl 302 def output_declaration(self, dtype, name, **args): 303 """ 304 Store the entry into an entry array. 305 306 The actual output and output filters will be handled elsewhere. 307 """ 308 309 item = KdocItem(name, self.fname, dtype, 310 self.entry.declaration_start_line, **args) 311 item.warnings = self.entry.warnings 312 313 # Drop empty sections 314 # TODO: improve empty sections logic to emit warnings 315 sections = self.entry.sections 316 for section in ["Description", "Return"]: 317 if section in sections and not sections[section].rstrip(): 318 del sections[section] 319 item.set_sections(sections, self.entry.section_start_lines) 320 item.set_params(self.entry.parameterlist, self.entry.parameterdescs, 321 self.entry.parametertypes, 322 self.entry.parameterdesc_start_lines) 323 self.entries.append(item) 324 325 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 326 327 def emit_unused_warnings(self): 328 """ 329 When the parser fails to produce a valid entry, it places some 330 warnings under `entry.warnings` that will be discarded when resetting 331 the state. 332 333 Ensure that those warnings are not lost. 334 335 .. note:: 336 337 Because we are calling `config.warning()` here, those 338 warnings are not filtered by the `-W` parameters: they will all 339 be produced even when `-Wreturn`, `-Wshort-desc`, and/or 340 `-Wcontents-before-sections` are used. 341 342 Allowing those warnings to be filtered is complex, because it 343 would require storing them in a buffer and then filtering them 344 during the output step of the code, depending on the 345 selected symbols. 346 """ 347 if self.entry and self.entry not in self.entries: 348 for log_msg in self.entry.warnings: 349 self.config.warning(log_msg) 350 351 def reset_state(self, ln): 352 """ 353 Ancillary routine to create a new entry. It initializes all 354 variables used by the state machine. 355 """ 356 357 self.emit_unused_warnings() 358 359 self.entry = KernelEntry(self.config, self.fname, ln) 360 361 # State flags 362 self.state = state.NORMAL 363 364 def push_parameter(self, ln, decl_type, param, dtype, 365 org_arg, declaration_name): 366 """ 367 Store parameters and their descriptions at self.entry. 368 """ 369 370 if self.entry.anon_struct_union and dtype == "" and param == "}": 371 return # Ignore the ending }; from anonymous struct/union 372 373 self.entry.anon_struct_union = False 374 375 param = KernRe(r'[\[\)].*').sub('', param, count=1) 376 377 # 378 # Look at various "anonymous type" cases. 379 # 380 if dtype == '': 381 if param.endswith("..."): 382 if len(param) > 3: # there is a name provided, use that 383 param = param[:-3] 384 if not self.entry.parameterdescs.get(param): 385 self.entry.parameterdescs[param] = "variable arguments" 386 387 elif (not param) or param == "void": 388 param = "void" 389 self.entry.parameterdescs[param] = "no arguments" 390 391 elif param in ["struct", "union"]: 392 # Handle unnamed (anonymous) union or struct 393 dtype = param 394 param = "{unnamed_" + param + "}" 395 self.entry.parameterdescs[param] = "anonymous\n" 396 self.entry.anon_struct_union = True 397 398 # Warn if parameter has no description 399 # (but ignore ones starting with # as these are not parameters 400 # but inline preprocessor statements) 401 if param not in self.entry.parameterdescs and not param.startswith("#"): 402 self.entry.parameterdescs[param] = self.undescribed 403 404 if "." not in param: 405 if decl_type == 'function': 406 dname = f"{decl_type} parameter" 407 else: 408 dname = f"{decl_type} member" 409 410 self.emit_msg(ln, 411 f"{dname} '{param}' not described in '{declaration_name}'") 412 413 # Strip spaces from param so that it is one continuous string on 414 # parameterlist. This fixes a problem where check_sections() 415 # cannot find a parameter like "addr[6 + 2]" because it actually 416 # appears as "addr[6", "+", "2]" on the parameter list. 417 # However, it's better to maintain the param string unchanged for 418 # output, so just weaken the string compare in check_sections() 419 # to ignore "[blah" in a parameter string. 420 421 self.entry.parameterlist.append(param) 422 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 423 self.entry.parametertypes[param] = org_arg 424 425 426 def create_parameter_list(self, ln, decl_type, args, 427 splitter, declaration_name): 428 """ 429 Creates a list of parameters, storing them at self.entry. 430 """ 431 432 # temporarily replace all commas inside function pointer definition 433 arg_expr = KernRe(r'(\([^\),]+),') 434 while arg_expr.search(args): 435 args = arg_expr.sub(r"\1#", args) 436 437 for arg in args.split(splitter): 438 # Ignore argument attributes 439 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 440 441 # Strip leading/trailing spaces 442 arg = arg.strip() 443 arg = KernRe(r'\s+').sub(' ', arg, count=1) 444 445 if arg.startswith('#'): 446 # Treat preprocessor directive as a typeless variable just to fill 447 # corresponding data structures "correctly". Catch it later in 448 # output_* subs. 449 450 # Treat preprocessor directive as a typeless variable 451 self.push_parameter(ln, decl_type, arg, "", 452 "", declaration_name) 453 # 454 # The pointer-to-function case. 455 # 456 elif KernRe(r'\(.+\)\s*\(').search(arg): 457 arg = arg.replace('#', ',') 458 r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" 459 r'([\w\[\].]*)' # Capture the name and possible [array] 460 r'\s*\)') # Make sure the trailing ")" is there 461 if r.match(arg): 462 param = r.group(1) 463 else: 464 self.emit_msg(ln, f"Invalid param: {arg}") 465 param = arg 466 dtype = arg.replace(param, '') 467 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 468 # 469 # The array-of-pointers case. Dig the parameter name out from the middle 470 # of the declaration. 471 # 472 elif KernRe(r'\(.+\)\s*\[').search(arg): 473 r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" 474 r'([\w.]*?)' # The actual pointer name 475 r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] 476 if r.match(arg): 477 param = r.group(1) 478 else: 479 self.emit_msg(ln, f"Invalid param: {arg}") 480 param = arg 481 dtype = arg.replace(param, '') 482 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 483 elif arg: 484 # 485 # Clean up extraneous spaces and split the string at commas; the first 486 # element of the resulting list will also include the type information. 487 # 488 arg = KernRe(r'\s*:\s*').sub(":", arg) 489 arg = KernRe(r'\s*\[').sub('[', arg) 490 args = KernRe(r'\s*,\s*').split(arg) 491 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 492 # 493 # args[0] has a string of "type a". If "a" includes an [array] 494 # declaration, we want to not be fooled by any white space inside 495 # the brackets, so detect and handle that case specially. 496 # 497 r = KernRe(r'^([^[\]]*\s+)(.*)$') 498 if r.match(args[0]): 499 args[0] = r.group(2) 500 dtype = r.group(1) 501 else: 502 # No space in args[0]; this seems wrong but preserves previous behavior 503 dtype = '' 504 505 bitfield_re = KernRe(r'(.*?):(\w+)') 506 for param in args: 507 # 508 # For pointers, shift the star(s) from the variable name to the 509 # type declaration. 510 # 511 r = KernRe(r'^(\*+)\s*(.*)') 512 if r.match(param): 513 self.push_parameter(ln, decl_type, r.group(2), 514 f"{dtype} {r.group(1)}", 515 arg, declaration_name) 516 # 517 # Perform a similar shift for bitfields. 518 # 519 elif bitfield_re.search(param): 520 if dtype != "": # Skip unnamed bit-fields 521 self.push_parameter(ln, decl_type, bitfield_re.group(1), 522 f"{dtype}:{bitfield_re.group(2)}", 523 arg, declaration_name) 524 else: 525 self.push_parameter(ln, decl_type, param, dtype, 526 arg, declaration_name) 527 528 def check_sections(self, ln, decl_name, decl_type): 529 """ 530 Check for errors inside sections, emitting warnings if not found 531 parameters are described. 532 """ 533 for section in self.entry.sections: 534 if section not in self.entry.parameterlist and \ 535 not known_sections.search(section): 536 if decl_type == 'function': 537 dname = f"{decl_type} parameter" 538 else: 539 dname = f"{decl_type} member" 540 self.emit_msg(ln, 541 f"Excess {dname} '{section}' description in '{decl_name}'") 542 543 def check_return_section(self, ln, declaration_name, return_type): 544 """ 545 If the function doesn't return void, warns about the lack of a 546 return description. 547 """ 548 549 if not self.config.wreturn: 550 return 551 552 # Ignore an empty return type (It's a macro) 553 # Ignore functions with a "void" return type (but not "void *") 554 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 555 return 556 557 if not self.entry.sections.get("Return", None): 558 self.emit_msg(ln, 559 f"No description found for return value of '{declaration_name}'") 560 561 def split_struct_proto(self, proto): 562 """ 563 Split apart a structure prototype; returns (struct|union, name, 564 members) or ``None``. 565 """ 566 567 type_pattern = r'(struct|union)' 568 qualifiers = [ 569 "__attribute__", 570 "__packed", 571 "__aligned", 572 "____cacheline_aligned_in_smp", 573 "____cacheline_aligned", 574 ] 575 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 576 577 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 578 if r.search(proto): 579 return (r.group(1), r.group(2), r.group(3)) 580 else: 581 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 582 if r.search(proto): 583 return (r.group(1), r.group(3), r.group(2)) 584 return None 585 586 def rewrite_struct_members(self, members): 587 """ 588 Process ``struct``/``union`` members from the most deeply nested 589 outward. 590 591 Rewrite the members of a ``struct`` or ``union`` for easier formatting 592 later on. Among other things, this function will turn a member like:: 593 594 struct { inner_members; } foo; 595 596 into:: 597 598 struct foo; inner_members; 599 """ 600 601 # 602 # The trick is in the ``^{`` below - it prevents a match of an outer 603 # ``struct``/``union`` until the inner one has been munged 604 # (removing the ``{`` in the process). 605 # 606 struct_members = KernRe(r'(struct|union)' # 0: declaration type 607 r'([^\{\};]+)' # 1: possible name 608 r'(\{)' 609 r'([^\{\}]*)' # 3: Contents of declaration 610 r'(\})' 611 r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration 612 tuples = struct_members.findall(members) 613 while tuples: 614 for t in tuples: 615 newmember = "" 616 oldmember = "".join(t) # Reconstruct the original formatting 617 dtype, name, lbr, content, rbr, rest, semi = t 618 # 619 # Pass through each field name, normalizing the form and formatting. 620 # 621 for s_id in rest.split(','): 622 s_id = s_id.strip() 623 newmember += f"{dtype} {s_id}; " 624 # 625 # Remove bitfield/array/pointer info, getting the bare name. 626 # 627 s_id = KernRe(r'[:\[].*').sub('', s_id) 628 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 629 # 630 # Pass through the members of this inner structure/union. 631 # 632 for arg in content.split(';'): 633 arg = arg.strip() 634 # 635 # Look for (type)(*name)(args) - pointer to function 636 # 637 r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') 638 if r.match(arg): 639 dtype, name, extra = r.group(1), r.group(2), r.group(3) 640 # Pointer-to-function 641 if not s_id: 642 # Anonymous struct/union 643 newmember += f"{dtype}{name}{extra}; " 644 else: 645 newmember += f"{dtype}{s_id}.{name}{extra}; " 646 # 647 # Otherwise a non-function member. 648 # 649 else: 650 # 651 # Remove bitmap and array portions and spaces around commas 652 # 653 arg = KernRe(r':\s*\d+\s*').sub('', arg) 654 arg = KernRe(r'\[.*\]').sub('', arg) 655 arg = KernRe(r'\s*,\s*').sub(',', arg) 656 # 657 # Look for a normal decl - "type name[,name...]" 658 # 659 r = KernRe(r'(.*)\s+([\S+,]+)') 660 if r.search(arg): 661 for name in r.group(2).split(','): 662 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) 663 if not s_id: 664 # Anonymous struct/union 665 newmember += f"{r.group(1)} {name}; " 666 else: 667 newmember += f"{r.group(1)} {s_id}.{name}; " 668 else: 669 newmember += f"{arg}; " 670 # 671 # At the end of the s_id loop, replace the original declaration with 672 # the munged version. 673 # 674 members = members.replace(oldmember, newmember) 675 # 676 # End of the tuple loop - search again and see if there are outer members 677 # that now turn up. 678 # 679 tuples = struct_members.findall(members) 680 return members 681 682 def format_struct_decl(self, declaration): 683 """ 684 Format the ``struct`` declaration into a standard form for inclusion 685 in the resulting docs. 686 """ 687 688 # 689 # Insert newlines, get rid of extra spaces. 690 # 691 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 692 declaration = KernRe(r'\}\s+;').sub('};', declaration) 693 # 694 # Format inline enums with each member on its own line. 695 # 696 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 697 while r.search(declaration): 698 declaration = r.sub(r'\1,\n\2', declaration) 699 # 700 # Now go through and supply the right number of tabs 701 # for each line. 702 # 703 def_args = declaration.split('\n') 704 level = 1 705 declaration = "" 706 for clause in def_args: 707 clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) 708 if clause: 709 if '}' in clause and level > 1: 710 level -= 1 711 if not clause.startswith('#'): 712 declaration += "\t" * level 713 declaration += "\t" + clause + "\n" 714 if "{" in clause and "}" not in clause: 715 level += 1 716 return declaration 717 718 719 def dump_struct(self, ln, proto): 720 """ 721 Store an entry for a ``struct`` or ``union`` 722 """ 723 # 724 # Do the basic parse to get the pieces of the declaration. 725 # 726 proto = trim_private_members(proto) 727 struct_parts = self.split_struct_proto(proto) 728 if not struct_parts: 729 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 730 return 731 decl_type, declaration_name, members = struct_parts 732 733 if self.entry.identifier != declaration_name: 734 self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " 735 f"Prototype was for {decl_type} {declaration_name} instead\n") 736 return 737 # 738 # Go through the list of members applying all of our transformations. 739 # 740 members = self.xforms.apply("struct", members) 741 742 # 743 # Deal with embedded struct and union members, and drop enums entirely. 744 # 745 declaration = members 746 members = self.rewrite_struct_members(members) 747 members = re.sub(r'(\{[^\{\}]*\})', '', members) 748 # 749 # Output the result and we are done. 750 # 751 self.create_parameter_list(ln, decl_type, members, ';', 752 declaration_name) 753 self.check_sections(ln, declaration_name, decl_type) 754 self.output_declaration(decl_type, declaration_name, 755 definition=self.format_struct_decl(declaration), 756 purpose=self.entry.declaration_purpose) 757 758 def dump_enum(self, ln, proto): 759 """ 760 Store an ``enum`` inside self.entries array. 761 """ 762 # 763 # Strip preprocessor directives. Note that this depends on the 764 # trailing semicolon we added in process_proto_type(). 765 # 766 proto = trim_private_members(proto) 767 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 768 # 769 # Parse out the name and members of the enum. Typedef form first. 770 # 771 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 772 if r.search(proto): 773 declaration_name = r.group(2) 774 members = r.group(1) 775 # 776 # Failing that, look for a straight enum 777 # 778 else: 779 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 780 if r.match(proto): 781 declaration_name = r.group(1) 782 members = r.group(2) 783 # 784 # OK, this isn't going to work. 785 # 786 else: 787 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 788 return 789 # 790 # Make sure we found what we were expecting. 791 # 792 if self.entry.identifier != declaration_name: 793 if self.entry.identifier == "": 794 self.emit_msg(ln, 795 f"{proto}: wrong kernel-doc identifier on prototype") 796 else: 797 self.emit_msg(ln, 798 f"expecting prototype for enum {self.entry.identifier}. " 799 f"Prototype was for enum {declaration_name} instead") 800 return 801 802 if not declaration_name: 803 declaration_name = "(anonymous)" 804 # 805 # Parse out the name of each enum member, and verify that we 806 # have a description for it. 807 # 808 member_set = set() 809 members = KernRe(r'\([^;)]*\)').sub('', members) 810 for arg in members.split(','): 811 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 812 if not arg.strip(): 813 continue 814 815 self.entry.parameterlist.append(arg) 816 if arg not in self.entry.parameterdescs: 817 self.entry.parameterdescs[arg] = self.undescribed 818 self.emit_msg(ln, 819 f"Enum value '{arg}' not described in enum '{declaration_name}'") 820 member_set.add(arg) 821 # 822 # Ensure that every described member actually exists in the enum. 823 # 824 for k in self.entry.parameterdescs: 825 if k not in member_set: 826 self.emit_msg(ln, 827 f"Excess enum value '@{k}' description in '{declaration_name}'") 828 829 self.output_declaration('enum', declaration_name, 830 purpose=self.entry.declaration_purpose) 831 832 def dump_var(self, ln, proto): 833 """ 834 Store variables that are part of kAPI. 835 """ 836 VAR_ATTRIBS = [ 837 "extern", 838 "const", 839 ] 840 OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*" 841 842 # 843 # Store the full prototype before modifying it 844 # 845 full_proto = proto 846 declaration_name = None 847 848 # 849 # Handle macro definitions 850 # 851 macro_prefixes = [ 852 KernRe(r"DEFINE_[\w_]+\s*\(([\w_]+)\)"), 853 ] 854 855 for r in macro_prefixes: 856 match = r.search(proto) 857 if match: 858 declaration_name = match.group(1) 859 break 860 861 # 862 # Drop comments and macros to have a pure C prototype 863 # 864 if not declaration_name: 865 proto = self.xforms.apply("var", proto) 866 867 proto = proto.rstrip() 868 869 # 870 # Variable name is at the end of the declaration 871 # 872 873 default_val = None 874 875 r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") 876 if r.match(proto): 877 if not declaration_name: 878 declaration_name = r.group(1) 879 880 default_val = r.group(2) 881 else: 882 r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") 883 884 if r.match(proto): 885 default_val = r.group(1) 886 if not declaration_name: 887 self.emit_msg(ln,f"{proto}: can't parse variable") 888 return 889 890 if default_val: 891 default_val = default_val.lstrip("=").strip() 892 893 self.output_declaration("var", declaration_name, 894 full_proto=full_proto, 895 default_val=default_val, 896 purpose=self.entry.declaration_purpose) 897 898 def dump_declaration(self, ln, prototype): 899 """ 900 Store a data declaration inside self.entries array. 901 """ 902 903 if self.entry.decl_type == "enum": 904 self.dump_enum(ln, prototype) 905 elif self.entry.decl_type == "typedef": 906 self.dump_typedef(ln, prototype) 907 elif self.entry.decl_type in ["union", "struct"]: 908 self.dump_struct(ln, prototype) 909 elif self.entry.decl_type == "var": 910 self.dump_var(ln, prototype) 911 else: 912 # This would be a bug 913 self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') 914 915 def dump_function(self, ln, prototype): 916 """ 917 Store a function or function macro inside self.entries array. 918 """ 919 920 found = func_macro = False 921 return_type = '' 922 decl_type = 'function' 923 924 # 925 # If we have a macro, remove the "#define" at the front. 926 # 927 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 928 if new_proto != prototype: 929 prototype = new_proto 930 # 931 # Dispense with the simple "#define A B" case here; the key 932 # is the space after the name of the symbol being defined. 933 # NOTE that the seemingly misnamed "func_macro" indicates a 934 # macro *without* arguments. 935 # 936 r = KernRe(r'^(\w+)\s+') 937 if r.search(prototype): 938 return_type = '' 939 declaration_name = r.group(1) 940 func_macro = True 941 found = True 942 else: 943 # 944 # Apply the initial transformations. 945 # 946 prototype = self.xforms.apply("func", prototype) 947 948 # Yes, this truly is vile. We are looking for: 949 # 1. Return type (may be nothing if we're looking at a macro) 950 # 2. Function name 951 # 3. Function parameters. 952 # 953 # All the while we have to watch out for function pointer parameters 954 # (which IIRC is what the two sections are for), C types (these 955 # regexps don't even start to express all the possibilities), and 956 # so on. 957 # 958 # If you mess with these regexps, it's a good idea to check that 959 # the following functions' documentation still comes out right: 960 # - parport_register_device (function pointer parameters) 961 # - atomic_set (macro) 962 # - pci_match_device, __copy_to_user (long return type) 963 964 name = r'\w+' 965 type1 = r'(?:[\w\s]+)?' 966 type2 = r'(?:[\w\s]+\*+)+' 967 # 968 # Attempt to match first on (args) with no internal parentheses; this 969 # lets us easily filter out __acquires() and other post-args stuff. If 970 # that fails, just grab the rest of the line to the last closing 971 # parenthesis. 972 # 973 proto_args = r'\(([^\(]*|.*)\)' 974 # 975 # (Except for the simple macro case) attempt to split up the prototype 976 # in the various ways we understand. 977 # 978 if not found: 979 patterns = [ 980 rf'^()({name})\s*{proto_args}', 981 rf'^({type1})\s+({name})\s*{proto_args}', 982 rf'^({type2})\s*({name})\s*{proto_args}', 983 ] 984 985 for p in patterns: 986 r = KernRe(p) 987 if r.match(prototype): 988 return_type = r.group(1) 989 declaration_name = r.group(2) 990 args = r.group(3) 991 self.create_parameter_list(ln, decl_type, args, ',', 992 declaration_name) 993 found = True 994 break 995 # 996 # Parsing done; make sure that things are as we expect. 997 # 998 if not found: 999 self.emit_msg(ln, 1000 f"cannot understand function prototype: '{prototype}'") 1001 return 1002 if self.entry.identifier != declaration_name: 1003 self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " 1004 f"Prototype was for {declaration_name}() instead") 1005 return 1006 self.check_sections(ln, declaration_name, "function") 1007 self.check_return_section(ln, declaration_name, return_type) 1008 # 1009 # Store the result. 1010 # 1011 self.output_declaration(decl_type, declaration_name, 1012 typedef=('typedef' in return_type), 1013 functiontype=return_type, 1014 purpose=self.entry.declaration_purpose, 1015 func_macro=func_macro) 1016 1017 1018 def dump_typedef(self, ln, proto): 1019 """ 1020 Store a ``typedef`` inside self.entries array. 1021 """ 1022 # 1023 # We start by looking for function typedefs. 1024 # 1025 typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1026 typedef_ident = r'\*?\s*(\w\S+)\s*' 1027 typedef_args = r'\s*\((.*)\);' 1028 1029 typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1030 typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) 1031 1032 # Parse function typedef prototypes 1033 for r in [typedef1, typedef2]: 1034 if not r.match(proto): 1035 continue 1036 1037 return_type = r.group(1).strip() 1038 declaration_name = r.group(2) 1039 args = r.group(3) 1040 1041 if self.entry.identifier != declaration_name: 1042 self.emit_msg(ln, 1043 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1044 return 1045 1046 self.create_parameter_list(ln, 'function', args, ',', declaration_name) 1047 1048 self.output_declaration('function', declaration_name, 1049 typedef=True, 1050 functiontype=return_type, 1051 purpose=self.entry.declaration_purpose) 1052 return 1053 # 1054 # Not a function, try to parse a simple typedef. 1055 # 1056 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1057 if r.match(proto): 1058 declaration_name = r.group(1) 1059 1060 if self.entry.identifier != declaration_name: 1061 self.emit_msg(ln, 1062 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1063 return 1064 1065 self.output_declaration('typedef', declaration_name, 1066 purpose=self.entry.declaration_purpose) 1067 return 1068 1069 self.emit_msg(ln, "error: Cannot parse typedef!") 1070 1071 @staticmethod 1072 def process_export(function_set, line): 1073 """ 1074 process ``EXPORT_SYMBOL*`` tags 1075 1076 This method doesn't use any variable from the class, so declare it 1077 with a staticmethod decorator. 1078 """ 1079 1080 # We support documenting some exported symbols with different 1081 # names. A horrible hack. 1082 suffixes = [ '_noprof' ] 1083 1084 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1085 # multiple export lines would violate Kernel coding style. 1086 1087 if export_symbol.search(line): 1088 symbol = export_symbol.group(2) 1089 elif export_symbol_ns.search(line): 1090 symbol = export_symbol_ns.group(2) 1091 else: 1092 return False 1093 # 1094 # Found an export, trim out any special suffixes 1095 # 1096 for suffix in suffixes: 1097 # Be backward compatible with Python < 3.9 1098 if symbol.endswith(suffix): 1099 symbol = symbol[:-len(suffix)] 1100 function_set.add(symbol) 1101 return True 1102 1103 def process_normal(self, ln, line): 1104 """ 1105 STATE_NORMAL: looking for the ``/**`` to begin everything. 1106 """ 1107 1108 if not doc_start.match(line): 1109 return 1110 1111 # start a new entry 1112 self.reset_state(ln) 1113 1114 # next line is always the function name 1115 self.state = state.NAME 1116 1117 def process_name(self, ln, line): 1118 """ 1119 STATE_NAME: Looking for the "name - description" line 1120 """ 1121 # 1122 # Check for a DOC: block and handle them specially. 1123 # 1124 if doc_block.search(line): 1125 1126 if not doc_block.group(1): 1127 self.entry.begin_section(ln, "Introduction") 1128 else: 1129 self.entry.begin_section(ln, doc_block.group(1)) 1130 1131 self.entry.identifier = self.entry.section 1132 self.state = state.DOCBLOCK 1133 # 1134 # Otherwise we're looking for a normal kerneldoc declaration line. 1135 # 1136 elif doc_decl.search(line): 1137 self.entry.identifier = doc_decl.group(1) 1138 1139 # Test for data declaration 1140 if doc_begin_data.search(line): 1141 self.entry.decl_type = doc_begin_data.group(1) 1142 self.entry.identifier = doc_begin_data.group(2) 1143 # 1144 # Look for a function description 1145 # 1146 elif doc_begin_func.search(line): 1147 self.entry.identifier = doc_begin_func.group(1) 1148 self.entry.decl_type = "function" 1149 # 1150 # We struck out. 1151 # 1152 else: 1153 self.emit_msg(ln, 1154 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}") 1155 self.state = state.NORMAL 1156 return 1157 # 1158 # OK, set up for a new kerneldoc entry. 1159 # 1160 self.state = state.BODY 1161 self.entry.identifier = self.entry.identifier.strip(" ") 1162 # if there's no @param blocks need to set up default section here 1163 self.entry.begin_section(ln + 1) 1164 # 1165 # Find the description portion, which *should* be there but 1166 # isn't always. 1167 # (We should be able to capture this from the previous parsing - someday) 1168 # 1169 r = KernRe("[-:](.*)") 1170 if r.search(line): 1171 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1172 self.state = state.DECLARATION 1173 else: 1174 self.entry.declaration_purpose = "" 1175 1176 if not self.entry.declaration_purpose and self.config.wshort_desc: 1177 self.emit_msg(ln, 1178 f"missing initial short description on line:\n{line}") 1179 1180 if not self.entry.identifier and self.entry.decl_type != "enum": 1181 self.emit_msg(ln, 1182 f"wrong kernel-doc identifier on line:\n{line}") 1183 self.state = state.NORMAL 1184 1185 if self.config.verbose: 1186 self.emit_msg(ln, 1187 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1188 warning=False) 1189 # 1190 # Failed to find an identifier. Emit a warning 1191 # 1192 else: 1193 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1194 1195 def is_new_section(self, ln, line): 1196 """ 1197 Helper function to determine if a new section is being started. 1198 """ 1199 if doc_sect.search(line): 1200 self.state = state.BODY 1201 # 1202 # Pick out the name of our new section, tweaking it if need be. 1203 # 1204 newsection = doc_sect.group(1) 1205 if newsection.lower() == 'description': 1206 newsection = 'Description' 1207 elif newsection.lower() == 'context': 1208 newsection = 'Context' 1209 self.state = state.SPECIAL_SECTION 1210 elif newsection.lower() in ["@return", "@returns", 1211 "return", "returns"]: 1212 newsection = "Return" 1213 self.state = state.SPECIAL_SECTION 1214 elif newsection[0] == '@': 1215 self.state = state.SPECIAL_SECTION 1216 # 1217 # Initialize the contents, and get the new section going. 1218 # 1219 newcontents = doc_sect.group(2) 1220 if not newcontents: 1221 newcontents = "" 1222 self.dump_section() 1223 self.entry.begin_section(ln, newsection) 1224 self.entry.leading_space = None 1225 1226 self.entry.add_text(newcontents.lstrip()) 1227 return True 1228 return False 1229 1230 def is_comment_end(self, ln, line): 1231 """ 1232 Helper function to detect (and effect) the end of a kerneldoc comment. 1233 """ 1234 if doc_end.search(line): 1235 self.dump_section() 1236 1237 # Look for doc_com + <text> + doc_end: 1238 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') 1239 if r.match(line): 1240 self.emit_msg(ln, f"suspicious ending line: {line}") 1241 1242 self.entry.prototype = "" 1243 self.entry.new_start_line = ln + 1 1244 1245 self.state = state.PROTO 1246 return True 1247 return False 1248 1249 1250 def process_decl(self, ln, line): 1251 """ 1252 STATE_DECLARATION: We've seen the beginning of a declaration. 1253 """ 1254 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1255 return 1256 # 1257 # Look for anything with the " * " line beginning. 1258 # 1259 if doc_content.search(line): 1260 cont = doc_content.group(1) 1261 # 1262 # A blank line means that we have moved out of the declaration 1263 # part of the comment (without any "special section" parameter 1264 # descriptions). 1265 # 1266 if cont == "": 1267 self.state = state.BODY 1268 # 1269 # Otherwise we have more of the declaration section to soak up. 1270 # 1271 else: 1272 self.entry.declaration_purpose = \ 1273 trim_whitespace(self.entry.declaration_purpose + ' ' + cont) 1274 else: 1275 # Unknown line, ignore 1276 self.emit_msg(ln, f"bad line: {line}") 1277 1278 1279 def process_special(self, ln, line): 1280 """ 1281 STATE_SPECIAL_SECTION: a section ending with a blank line. 1282 """ 1283 # 1284 # If we have hit a blank line (only the " * " marker), then this 1285 # section is done. 1286 # 1287 if KernRe(r"\s*\*\s*$").match(line): 1288 self.entry.begin_section(ln, dump = True) 1289 self.state = state.BODY 1290 return 1291 # 1292 # Not a blank line, look for the other ways to end the section. 1293 # 1294 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1295 return 1296 # 1297 # OK, we should have a continuation of the text for this section. 1298 # 1299 if doc_content.search(line): 1300 cont = doc_content.group(1) 1301 # 1302 # If the lines of text after the first in a special section have 1303 # leading white space, we need to trim it out or Sphinx will get 1304 # confused. For the second line (the None case), see what we 1305 # find there and remember it. 1306 # 1307 if self.entry.leading_space is None: 1308 r = KernRe(r'^(\s+)') 1309 if r.match(cont): 1310 self.entry.leading_space = len(r.group(1)) 1311 else: 1312 self.entry.leading_space = 0 1313 # 1314 # Otherwise, before trimming any leading chars, be *sure* 1315 # that they are white space. We should maybe warn if this 1316 # isn't the case. 1317 # 1318 for i in range(0, self.entry.leading_space): 1319 if cont[i] != " ": 1320 self.entry.leading_space = i 1321 break 1322 # 1323 # Add the trimmed result to the section and we're done. 1324 # 1325 self.entry.add_text(cont[self.entry.leading_space:]) 1326 else: 1327 # Unknown line, ignore 1328 self.emit_msg(ln, f"bad line: {line}") 1329 1330 def process_body(self, ln, line): 1331 """ 1332 STATE_BODY: the bulk of a kerneldoc comment. 1333 """ 1334 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1335 return 1336 1337 if doc_content.search(line): 1338 cont = doc_content.group(1) 1339 self.entry.add_text(cont) 1340 else: 1341 # Unknown line, ignore 1342 self.emit_msg(ln, f"bad line: {line}") 1343 1344 def process_inline_name(self, ln, line): 1345 """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" 1346 1347 if doc_inline_sect.search(line): 1348 self.entry.begin_section(ln, doc_inline_sect.group(1)) 1349 self.entry.add_text(doc_inline_sect.group(2).lstrip()) 1350 self.state = state.INLINE_TEXT 1351 elif doc_inline_end.search(line): 1352 self.dump_section() 1353 self.state = state.PROTO 1354 elif doc_content.search(line): 1355 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") 1356 self.state = state.PROTO 1357 1358 # 1359 # Don't let it add partial comments at the code, as breaks the 1360 # logic meant to remove comments from prototypes. 1361 # 1362 self.process_proto_type(ln, "/**\n" + line) 1363 # else ... ?? 1364 1365 def process_inline_text(self, ln, line): 1366 """STATE_INLINE_TEXT: docbook comments within a prototype.""" 1367 1368 if doc_inline_end.search(line): 1369 self.dump_section() 1370 self.state = state.PROTO 1371 elif doc_content.search(line): 1372 self.entry.add_text(doc_content.group(1)) 1373 # else ... ?? 1374 1375 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1376 """ 1377 Handle syscall definitions. 1378 """ 1379 1380 is_void = False 1381 1382 # Strip newlines/CR's 1383 proto = re.sub(r'[\r\n]+', ' ', proto) 1384 1385 # Check if it's a SYSCALL_DEFINE0 1386 if 'SYSCALL_DEFINE0' in proto: 1387 is_void = True 1388 1389 # Replace SYSCALL_DEFINE with correct return type & function name 1390 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1391 1392 r = KernRe(r'long\s+(sys_.*?),') 1393 if r.search(proto): 1394 proto = KernRe(',').sub('(', proto, count=1) 1395 elif is_void: 1396 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1397 1398 # Now delete all of the odd-numbered commas in the proto 1399 # so that argument types & names don't have a comma between them 1400 count = 0 1401 length = len(proto) 1402 1403 if is_void: 1404 length = 0 # skip the loop if is_void 1405 1406 for ix in range(length): 1407 if proto[ix] == ',': 1408 count += 1 1409 if count % 2 == 1: 1410 proto = proto[:ix] + ' ' + proto[ix + 1:] 1411 1412 return proto 1413 1414 def tracepoint_munge(self, ln, proto): 1415 """ 1416 Handle tracepoint definitions. 1417 """ 1418 1419 tracepointname = None 1420 tracepointargs = None 1421 1422 # Match tracepoint name based on different patterns 1423 r = KernRe(r'TRACE_EVENT\((.*?),') 1424 if r.search(proto): 1425 tracepointname = r.group(1) 1426 1427 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1428 if r.search(proto): 1429 tracepointname = r.group(1) 1430 1431 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1432 if r.search(proto): 1433 tracepointname = r.group(2) 1434 1435 if tracepointname: 1436 tracepointname = tracepointname.lstrip() 1437 1438 r = KernRe(r'TP_PROTO\((.*?)\)') 1439 if r.search(proto): 1440 tracepointargs = r.group(1) 1441 1442 if not tracepointname or not tracepointargs: 1443 self.emit_msg(ln, 1444 f"Unrecognized tracepoint format:\n{proto}\n") 1445 else: 1446 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1447 self.entry.identifier = f"trace_{self.entry.identifier}" 1448 1449 return proto 1450 1451 def process_proto_function(self, ln, line): 1452 """Ancillary routine to process a function prototype.""" 1453 1454 # strip C99-style comments to end of line 1455 line = KernRe(r"//.*$", re.S).sub('', line) 1456 # 1457 # Soak up the line's worth of prototype text, stopping at { or ; if present. 1458 # 1459 if KernRe(r'\s*#\s*define').match(line): 1460 self.entry.prototype = line 1461 elif not line.startswith('#'): # skip other preprocessor stuff 1462 r = KernRe(r'([^\{]*)') 1463 if r.match(line): 1464 self.entry.prototype += r.group(1) + " " 1465 # 1466 # If we now have the whole prototype, clean it up and declare victory. 1467 # 1468 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1469 # strip comments and surrounding spaces 1470 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() 1471 # 1472 # Handle self.entry.prototypes for function pointers like: 1473 # int (*pcs_config)(struct foo) 1474 # by turning it into 1475 # int pcs_config(struct foo) 1476 # 1477 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1478 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1479 # 1480 # Handle special declaration syntaxes 1481 # 1482 if 'SYSCALL_DEFINE' in self.entry.prototype: 1483 self.entry.prototype = self.syscall_munge(ln, 1484 self.entry.prototype) 1485 else: 1486 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1487 if r.search(self.entry.prototype): 1488 self.entry.prototype = self.tracepoint_munge(ln, 1489 self.entry.prototype) 1490 # 1491 # ... and we're done 1492 # 1493 self.dump_function(ln, self.entry.prototype) 1494 self.reset_state(ln) 1495 1496 def process_proto_type(self, ln, line): 1497 """ 1498 Ancillary routine to process a type. 1499 """ 1500 1501 # Strip C99-style comments and surrounding whitespace 1502 line = KernRe(r"//.*$", re.S).sub('', line).strip() 1503 if not line: 1504 return # nothing to see here 1505 1506 # To distinguish preprocessor directive from regular declaration later. 1507 if line.startswith('#'): 1508 line += ";" 1509 # 1510 # Split the declaration on any of { } or ;, and accumulate pieces 1511 # until we hit a semicolon while not inside {brackets} 1512 # 1513 r = KernRe(r'(.*?)([{};])') 1514 for chunk in r.split(line): 1515 if chunk: # Ignore empty matches 1516 self.entry.prototype += chunk 1517 # 1518 # This cries out for a match statement ... someday after we can 1519 # drop Python 3.9 ... 1520 # 1521 if chunk == '{': 1522 self.entry.brcount += 1 1523 elif chunk == '}': 1524 self.entry.brcount -= 1 1525 elif chunk == ';' and self.entry.brcount <= 0: 1526 self.dump_declaration(ln, self.entry.prototype) 1527 self.reset_state(ln) 1528 return 1529 # 1530 # We hit the end of the line while still in the declaration; put 1531 # in a space to represent the newline. 1532 # 1533 self.entry.prototype += ' ' 1534 1535 def process_proto(self, ln, line): 1536 """STATE_PROTO: reading a function/whatever prototype.""" 1537 1538 if doc_inline_oneline.search(line): 1539 self.entry.begin_section(ln, doc_inline_oneline.group(1)) 1540 self.entry.add_text(doc_inline_oneline.group(2)) 1541 self.dump_section() 1542 1543 elif doc_inline_start.search(line): 1544 self.state = state.INLINE_NAME 1545 1546 elif self.entry.decl_type == 'function': 1547 self.process_proto_function(ln, line) 1548 1549 else: 1550 self.process_proto_type(ln, line) 1551 1552 def process_docblock(self, ln, line): 1553 """STATE_DOCBLOCK: within a ``DOC:`` block.""" 1554 1555 if doc_end.search(line): 1556 self.dump_section() 1557 self.output_declaration("doc", self.entry.identifier) 1558 self.reset_state(ln) 1559 1560 elif doc_content.search(line): 1561 self.entry.add_text(doc_content.group(1)) 1562 1563 def parse_export(self): 1564 """ 1565 Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file. 1566 """ 1567 1568 export_table = set() 1569 1570 try: 1571 with open(self.fname, "r", encoding="utf8", 1572 errors="backslashreplace") as fp: 1573 1574 for line in fp: 1575 self.process_export(export_table, line) 1576 1577 except IOError: 1578 return None 1579 1580 return export_table 1581 1582 #: The state/action table telling us which function to invoke in each state. 1583 state_actions = { 1584 state.NORMAL: process_normal, 1585 state.NAME: process_name, 1586 state.BODY: process_body, 1587 state.DECLARATION: process_decl, 1588 state.SPECIAL_SECTION: process_special, 1589 state.INLINE_NAME: process_inline_name, 1590 state.INLINE_TEXT: process_inline_text, 1591 state.PROTO: process_proto, 1592 state.DOCBLOCK: process_docblock, 1593 } 1594 1595 def parse_kdoc(self): 1596 """ 1597 Open and process each line of a C source file. 1598 The parsing is controlled via a state machine, and the line is passed 1599 to a different process function depending on the state. The process 1600 function may update the state as needed. 1601 1602 Besides parsing kernel-doc tags, it also parses export symbols. 1603 """ 1604 1605 prev = "" 1606 prev_ln = None 1607 export_table = set() 1608 1609 try: 1610 with open(self.fname, "r", encoding="utf8", 1611 errors="backslashreplace") as fp: 1612 for ln, line in enumerate(fp): 1613 1614 line = line.expandtabs().strip("\n") 1615 1616 # Group continuation lines on prototypes 1617 if self.state == state.PROTO: 1618 if line.endswith("\\"): 1619 prev += line.rstrip("\\") 1620 if not prev_ln: 1621 prev_ln = ln 1622 continue 1623 1624 if prev: 1625 ln = prev_ln 1626 line = prev + line 1627 prev = "" 1628 prev_ln = None 1629 1630 self.config.log.debug("%d %s: %s", 1631 ln, state.name[self.state], 1632 line) 1633 1634 # This is an optimization over the original script. 1635 # There, when export_file was used for the same file, 1636 # it was read twice. Here, we use the already-existing 1637 # loop to parse exported symbols as well. 1638 # 1639 if (self.state != state.NORMAL) or \ 1640 not self.process_export(export_table, line): 1641 # Hand this line to the appropriate state handler 1642 self.state_actions[self.state](self, ln, line) 1643 1644 self.emit_unused_warnings() 1645 1646 except OSError: 1647 self.config.log.error(f"Error: Cannot open file {self.fname}") 1648 1649 return export_table, self.entries 1650