1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8Classes and functions related to reading a C language source or header FILE 9and extract embedded documentation comments from it. 10""" 11 12import sys 13import re 14from pprint import pformat 15 16from kdoc.kdoc_re import NestedMatch, KernRe 17from kdoc.kdoc_item import KdocItem 18 19# 20# Regular expressions used to parse kernel-doc markups at KernelDoc class. 21# 22# Let's declare them in lowercase outside any class to make it easier to 23# convert from the Perl script. 24# 25# As those are evaluated at the beginning, no need to cache them 26# 27 28# Allow whitespace at end of comment start. 29doc_start = KernRe(r'^/\*\*\s*$', cache=False) 30 31doc_end = KernRe(r'\*/', cache=False) 32doc_com = KernRe(r'\s*\*\s*', cache=False) 33doc_com_body = KernRe(r'\s*\* ?', cache=False) 34doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 35 36# @params and a strictly limited set of supported section names 37# Specifically: 38# Match @word: 39# @...: 40# @{section-name}: 41# while trying to not match literal block starts like "example::" 42# 43known_section_names = 'description|context|returns?|notes?|examples?' 44known_sections = KernRe(known_section_names, flags = re.I) 45doc_sect = doc_com + \ 46 KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', 47 flags=re.I, cache=False) 48 49doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 50doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 51doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 52doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 53doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@\s*[\w][\w\.]*\s*):\s*(.*)\s*\*/\s*$', cache=False) 54 55export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 56export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 57 58type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 59 60# 61# Tests for the beginning of a kerneldoc block in its various forms. 62# 63doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 64doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef|var)\b\s*(\w*)", cache = False) 65doc_begin_func = KernRe(str(doc_com) + # initial " * ' 66 r"(?:\w+\s*\*\s*)?" + # type (not captured) 67 r'(?:define\s+)?' + # possible "define" (not captured) 68 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 69 r'(?:[-:].*)?$', # description (not captured) 70 cache = False) 71 72# 73# Ancillary functions 74# 75 76multi_space = KernRe(r'\s\s+') 77def trim_whitespace(s): 78 """ 79 A little helper to get rid of excess white space. 80 """ 81 return multi_space.sub(' ', s.strip()) 82 83def trim_private_members(text): 84 """ 85 Remove ``struct``/``enum`` members that have been marked "private". 86 """ 87 # First look for a "public:" block that ends a private region, then 88 # handle the "private until the end" case. 89 # 90 text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) 91 text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) 92 # 93 # We needed the comments to do the above, but now we can take them out. 94 # 95 return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() 96 97class state: 98 """ 99 States used by the parser's state machine. 100 """ 101 102 # Parser states 103 NORMAL = 0 #: Normal code. 104 NAME = 1 #: Looking for function name. 105 DECLARATION = 2 #: We have seen a declaration which might not be done. 106 BODY = 3 #: The body of the comment. 107 SPECIAL_SECTION = 4 #: Doc section ending with a blank line. 108 PROTO = 5 #: Scanning prototype. 109 DOCBLOCK = 6 #: Documentation block. 110 INLINE_NAME = 7 #: Gathering doc outside main block. 111 INLINE_TEXT = 8 #: Reading the body of inline docs. 112 113 #: Names for each parser state. 114 name = [ 115 "NORMAL", 116 "NAME", 117 "DECLARATION", 118 "BODY", 119 "SPECIAL_SECTION", 120 "PROTO", 121 "DOCBLOCK", 122 "INLINE_NAME", 123 "INLINE_TEXT", 124 ] 125 126 127SECTION_DEFAULT = "Description" #: Default section. 128 129class KernelEntry: 130 """ 131 Encapsulates a Kernel documentation entry. 132 """ 133 134 def __init__(self, config, fname, ln): 135 self.config = config 136 self.fname = fname 137 138 self._contents = [] 139 self.prototype = "" 140 141 self.warnings = [] 142 143 self.parameterlist = [] 144 self.parameterdescs = {} 145 self.parametertypes = {} 146 self.parameterdesc_start_lines = {} 147 148 self.section_start_lines = {} 149 self.sections = {} 150 151 self.anon_struct_union = False 152 153 self.leading_space = None 154 155 self.fname = fname 156 157 # State flags 158 self.brcount = 0 159 self.declaration_start_line = ln + 1 160 161 # 162 # Management of section contents 163 # 164 def add_text(self, text): 165 """Add a new text to the entry contents list.""" 166 self._contents.append(text) 167 168 def contents(self): 169 """Returns a string with all content texts that were added.""" 170 return '\n'.join(self._contents) + '\n' 171 172 # TODO: rename to emit_message after removal of kernel-doc.pl 173 def emit_msg(self, ln, msg, *, warning=True): 174 """Emit a message.""" 175 176 log_msg = f"{self.fname}:{ln} {msg}" 177 178 if not warning: 179 self.config.log.info(log_msg) 180 return 181 182 # Delegate warning output to output logic, as this way it 183 # will report warnings/info only for symbols that are output 184 185 self.warnings.append(log_msg) 186 return 187 188 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): 189 """ 190 Begin a new section. 191 """ 192 if dump: 193 self.dump_section(start_new = True) 194 self.section = title 195 self.new_start_line = line_no 196 197 def dump_section(self, start_new=True): 198 """ 199 Dumps section contents to arrays/hashes intended for that purpose. 200 """ 201 # 202 # If we have accumulated no contents in the default ("description") 203 # section, don't bother. 204 # 205 if self.section == SECTION_DEFAULT and not self._contents: 206 return 207 name = self.section 208 contents = self.contents() 209 210 if type_param.match(name): 211 name = type_param.group(1) 212 213 self.parameterdescs[name] = contents 214 self.parameterdesc_start_lines[name] = self.new_start_line 215 216 self.new_start_line = 0 217 218 else: 219 if name in self.sections and self.sections[name] != "": 220 # Only warn on user-specified duplicate section names 221 if name != SECTION_DEFAULT: 222 self.emit_msg(self.new_start_line, 223 f"duplicate section name '{name}'") 224 # Treat as a new paragraph - add a blank line 225 self.sections[name] += '\n' + contents 226 else: 227 self.sections[name] = contents 228 self.section_start_lines[name] = self.new_start_line 229 self.new_start_line = 0 230 231# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 232 233 if start_new: 234 self.section = SECTION_DEFAULT 235 self._contents = [] 236 237python_warning = False 238 239class KernelDoc: 240 """ 241 Read a C language source or header FILE and extract embedded 242 documentation comments. 243 """ 244 245 #: Name of context section. 246 section_context = "Context" 247 248 #: Name of return section. 249 section_return = "Return" 250 251 #: String to write when a parameter is not described. 252 undescribed = "-- undescribed --" 253 254 def __init__(self, config, fname, xforms): 255 """Initialize internal variables""" 256 257 self.fname = fname 258 self.config = config 259 self.xforms = xforms 260 261 # Initial state for the state machines 262 self.state = state.NORMAL 263 264 # Store entry currently being processed 265 self.entry = None 266 267 # Place all potential outputs into an array 268 self.entries = [] 269 270 # 271 # We need Python 3.7 for its "dicts remember the insertion 272 # order" guarantee 273 # 274 global python_warning 275 if (not python_warning and 276 sys.version_info.major == 3 and sys.version_info.minor < 7): 277 278 self.emit_msg(0, 279 'Python 3.7 or later is required for correct results') 280 python_warning = True 281 282 def emit_msg(self, ln, msg, *, warning=True): 283 """Emit a message""" 284 285 if self.entry: 286 self.entry.emit_msg(ln, msg, warning=warning) 287 return 288 289 log_msg = f"{self.fname}:{ln} {msg}" 290 291 if warning: 292 self.config.log.warning(log_msg) 293 else: 294 self.config.log.info(log_msg) 295 296 def dump_section(self, start_new=True): 297 """ 298 Dump section contents to arrays/hashes intended for that purpose. 299 """ 300 301 if self.entry: 302 self.entry.dump_section(start_new) 303 304 # TODO: rename it to store_declaration after removal of kernel-doc.pl 305 def output_declaration(self, dtype, name, **args): 306 """ 307 Store the entry into an entry array. 308 309 The actual output and output filters will be handled elsewhere. 310 """ 311 312 item = KdocItem(name, self.fname, dtype, 313 self.entry.declaration_start_line, **args) 314 item.warnings = self.entry.warnings 315 316 # Drop empty sections 317 # TODO: improve empty sections logic to emit warnings 318 sections = self.entry.sections 319 for section in ["Description", "Return"]: 320 if section in sections and not sections[section].rstrip(): 321 del sections[section] 322 item.set_sections(sections, self.entry.section_start_lines) 323 item.set_params(self.entry.parameterlist, self.entry.parameterdescs, 324 self.entry.parametertypes, 325 self.entry.parameterdesc_start_lines) 326 self.entries.append(item) 327 328 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 329 330 def emit_unused_warnings(self): 331 """ 332 When the parser fails to produce a valid entry, it places some 333 warnings under `entry.warnings` that will be discarded when resetting 334 the state. 335 336 Ensure that those warnings are not lost. 337 338 .. note:: 339 340 Because we are calling `config.warning()` here, those 341 warnings are not filtered by the `-W` parameters: they will all 342 be produced even when `-Wreturn`, `-Wshort-desc`, and/or 343 `-Wcontents-before-sections` are used. 344 345 Allowing those warnings to be filtered is complex, because it 346 would require storing them in a buffer and then filtering them 347 during the output step of the code, depending on the 348 selected symbols. 349 """ 350 if self.entry and self.entry not in self.entries: 351 for log_msg in self.entry.warnings: 352 self.config.warning(log_msg) 353 354 def reset_state(self, ln): 355 """ 356 Ancillary routine to create a new entry. It initializes all 357 variables used by the state machine. 358 """ 359 360 self.emit_unused_warnings() 361 362 self.entry = KernelEntry(self.config, self.fname, ln) 363 364 # State flags 365 self.state = state.NORMAL 366 367 def push_parameter(self, ln, decl_type, param, dtype, 368 org_arg, declaration_name): 369 """ 370 Store parameters and their descriptions at self.entry. 371 """ 372 373 if self.entry.anon_struct_union and dtype == "" and param == "}": 374 return # Ignore the ending }; from anonymous struct/union 375 376 self.entry.anon_struct_union = False 377 378 param = KernRe(r'[\[\)].*').sub('', param, count=1) 379 380 # 381 # Look at various "anonymous type" cases. 382 # 383 if dtype == '': 384 if param.endswith("..."): 385 if len(param) > 3: # there is a name provided, use that 386 param = param[:-3] 387 if not self.entry.parameterdescs.get(param): 388 self.entry.parameterdescs[param] = "variable arguments" 389 390 elif (not param) or param == "void": 391 param = "void" 392 self.entry.parameterdescs[param] = "no arguments" 393 394 elif param in ["struct", "union"]: 395 # Handle unnamed (anonymous) union or struct 396 dtype = param 397 param = "{unnamed_" + param + "}" 398 self.entry.parameterdescs[param] = "anonymous\n" 399 self.entry.anon_struct_union = True 400 401 # Warn if parameter has no description 402 # (but ignore ones starting with # as these are not parameters 403 # but inline preprocessor statements) 404 if param not in self.entry.parameterdescs and not param.startswith("#"): 405 self.entry.parameterdescs[param] = self.undescribed 406 407 if "." not in param: 408 if decl_type == 'function': 409 dname = f"{decl_type} parameter" 410 else: 411 dname = f"{decl_type} member" 412 413 self.emit_msg(ln, 414 f"{dname} '{param}' not described in '{declaration_name}'") 415 416 # Strip spaces from param so that it is one continuous string on 417 # parameterlist. This fixes a problem where check_sections() 418 # cannot find a parameter like "addr[6 + 2]" because it actually 419 # appears as "addr[6", "+", "2]" on the parameter list. 420 # However, it's better to maintain the param string unchanged for 421 # output, so just weaken the string compare in check_sections() 422 # to ignore "[blah" in a parameter string. 423 424 self.entry.parameterlist.append(param) 425 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 426 self.entry.parametertypes[param] = org_arg 427 428 429 def create_parameter_list(self, ln, decl_type, args, 430 splitter, declaration_name): 431 """ 432 Creates a list of parameters, storing them at self.entry. 433 """ 434 435 # temporarily replace all commas inside function pointer definition 436 arg_expr = KernRe(r'(\([^\),]+),') 437 while arg_expr.search(args): 438 args = arg_expr.sub(r"\1#", args) 439 440 for arg in args.split(splitter): 441 # Ignore argument attributes 442 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 443 444 # Strip leading/trailing spaces 445 arg = arg.strip() 446 arg = KernRe(r'\s+').sub(' ', arg, count=1) 447 448 if arg.startswith('#'): 449 # Treat preprocessor directive as a typeless variable just to fill 450 # corresponding data structures "correctly". Catch it later in 451 # output_* subs. 452 453 # Treat preprocessor directive as a typeless variable 454 self.push_parameter(ln, decl_type, arg, "", 455 "", declaration_name) 456 # 457 # The pointer-to-function case. 458 # 459 elif KernRe(r'\(.+\)\s*\(').search(arg): 460 arg = arg.replace('#', ',') 461 r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" 462 r'([\w\[\].]*)' # Capture the name and possible [array] 463 r'\s*\)') # Make sure the trailing ")" is there 464 if r.match(arg): 465 param = r.group(1) 466 else: 467 self.emit_msg(ln, f"Invalid param: {arg}") 468 param = arg 469 dtype = arg.replace(param, '') 470 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 471 # 472 # The array-of-pointers case. Dig the parameter name out from the middle 473 # of the declaration. 474 # 475 elif KernRe(r'\(.+\)\s*\[').search(arg): 476 r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" 477 r'([\w.]*?)' # The actual pointer name 478 r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] 479 if r.match(arg): 480 param = r.group(1) 481 else: 482 self.emit_msg(ln, f"Invalid param: {arg}") 483 param = arg 484 dtype = arg.replace(param, '') 485 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 486 elif arg: 487 # 488 # Clean up extraneous spaces and split the string at commas; the first 489 # element of the resulting list will also include the type information. 490 # 491 arg = KernRe(r'\s*:\s*').sub(":", arg) 492 arg = KernRe(r'\s*\[').sub('[', arg) 493 args = KernRe(r'\s*,\s*').split(arg) 494 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 495 # 496 # args[0] has a string of "type a". If "a" includes an [array] 497 # declaration, we want to not be fooled by any white space inside 498 # the brackets, so detect and handle that case specially. 499 # 500 r = KernRe(r'^([^[\]]*\s+)(.*)$') 501 if r.match(args[0]): 502 args[0] = r.group(2) 503 dtype = r.group(1) 504 else: 505 # No space in args[0]; this seems wrong but preserves previous behavior 506 dtype = '' 507 508 bitfield_re = KernRe(r'(.*?):(\w+)') 509 for param in args: 510 # 511 # For pointers, shift the star(s) from the variable name to the 512 # type declaration. 513 # 514 r = KernRe(r'^(\*+)\s*(.*)') 515 if r.match(param): 516 self.push_parameter(ln, decl_type, r.group(2), 517 f"{dtype} {r.group(1)}", 518 arg, declaration_name) 519 # 520 # Perform a similar shift for bitfields. 521 # 522 elif bitfield_re.search(param): 523 if dtype != "": # Skip unnamed bit-fields 524 self.push_parameter(ln, decl_type, bitfield_re.group(1), 525 f"{dtype}:{bitfield_re.group(2)}", 526 arg, declaration_name) 527 else: 528 self.push_parameter(ln, decl_type, param, dtype, 529 arg, declaration_name) 530 531 def check_sections(self, ln, decl_name, decl_type): 532 """ 533 Check for errors inside sections, emitting warnings if not found 534 parameters are described. 535 """ 536 for section in self.entry.sections: 537 if section not in self.entry.parameterlist and \ 538 not known_sections.search(section): 539 if decl_type == 'function': 540 dname = f"{decl_type} parameter" 541 else: 542 dname = f"{decl_type} member" 543 self.emit_msg(ln, 544 f"Excess {dname} '{section}' description in '{decl_name}'") 545 546 def check_return_section(self, ln, declaration_name, return_type): 547 """ 548 If the function doesn't return void, warns about the lack of a 549 return description. 550 """ 551 552 if not self.config.wreturn: 553 return 554 555 # Ignore an empty return type (It's a macro) 556 # Ignore functions with a "void" return type (but not "void *") 557 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 558 return 559 560 if not self.entry.sections.get("Return", None): 561 self.emit_msg(ln, 562 f"No description found for return value of '{declaration_name}'") 563 564 def split_struct_proto(self, proto): 565 """ 566 Split apart a structure prototype; returns (struct|union, name, 567 members) or ``None``. 568 """ 569 570 type_pattern = r'(struct|union)' 571 qualifiers = [ 572 "__attribute__", 573 "__packed", 574 "__aligned", 575 "____cacheline_aligned_in_smp", 576 "____cacheline_aligned", 577 ] 578 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 579 580 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 581 if r.search(proto): 582 return (r.group(1), r.group(2), r.group(3)) 583 else: 584 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 585 if r.search(proto): 586 return (r.group(1), r.group(3), r.group(2)) 587 return None 588 589 def rewrite_struct_members(self, members): 590 """ 591 Process ``struct``/``union`` members from the most deeply nested 592 outward. 593 594 Rewrite the members of a ``struct`` or ``union`` for easier formatting 595 later on. Among other things, this function will turn a member like:: 596 597 struct { inner_members; } foo; 598 599 into:: 600 601 struct foo; inner_members; 602 """ 603 604 # 605 # The trick is in the ``^{`` below - it prevents a match of an outer 606 # ``struct``/``union`` until the inner one has been munged 607 # (removing the ``{`` in the process). 608 # 609 struct_members = KernRe(r'(struct|union)' # 0: declaration type 610 r'([^\{\};]+)' # 1: possible name 611 r'(\{)' 612 r'([^\{\}]*)' # 3: Contents of declaration 613 r'(\})' 614 r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration 615 tuples = struct_members.findall(members) 616 while tuples: 617 for t in tuples: 618 newmember = "" 619 oldmember = "".join(t) # Reconstruct the original formatting 620 dtype, name, lbr, content, rbr, rest, semi = t 621 # 622 # Pass through each field name, normalizing the form and formatting. 623 # 624 for s_id in rest.split(','): 625 s_id = s_id.strip() 626 newmember += f"{dtype} {s_id}; " 627 # 628 # Remove bitfield/array/pointer info, getting the bare name. 629 # 630 s_id = KernRe(r'[:\[].*').sub('', s_id) 631 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 632 # 633 # Pass through the members of this inner structure/union. 634 # 635 for arg in content.split(';'): 636 arg = arg.strip() 637 # 638 # Look for (type)(*name)(args) - pointer to function 639 # 640 r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') 641 if r.match(arg): 642 dtype, name, extra = r.group(1), r.group(2), r.group(3) 643 # Pointer-to-function 644 if not s_id: 645 # Anonymous struct/union 646 newmember += f"{dtype}{name}{extra}; " 647 else: 648 newmember += f"{dtype}{s_id}.{name}{extra}; " 649 # 650 # Otherwise a non-function member. 651 # 652 else: 653 # 654 # Remove bitmap and array portions and spaces around commas 655 # 656 arg = KernRe(r':\s*\d+\s*').sub('', arg) 657 arg = KernRe(r'\[.*\]').sub('', arg) 658 arg = KernRe(r'\s*,\s*').sub(',', arg) 659 # 660 # Look for a normal decl - "type name[,name...]" 661 # 662 r = KernRe(r'(.*)\s+([\S+,]+)') 663 if r.search(arg): 664 for name in r.group(2).split(','): 665 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) 666 if not s_id: 667 # Anonymous struct/union 668 newmember += f"{r.group(1)} {name}; " 669 else: 670 newmember += f"{r.group(1)} {s_id}.{name}; " 671 else: 672 newmember += f"{arg}; " 673 # 674 # At the end of the s_id loop, replace the original declaration with 675 # the munged version. 676 # 677 members = members.replace(oldmember, newmember) 678 # 679 # End of the tuple loop - search again and see if there are outer members 680 # that now turn up. 681 # 682 tuples = struct_members.findall(members) 683 return members 684 685 def format_struct_decl(self, declaration): 686 """ 687 Format the ``struct`` declaration into a standard form for inclusion 688 in the resulting docs. 689 """ 690 691 # 692 # Insert newlines, get rid of extra spaces. 693 # 694 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 695 declaration = KernRe(r'\}\s+;').sub('};', declaration) 696 # 697 # Format inline enums with each member on its own line. 698 # 699 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 700 while r.search(declaration): 701 declaration = r.sub(r'\1,\n\2', declaration) 702 # 703 # Now go through and supply the right number of tabs 704 # for each line. 705 # 706 def_args = declaration.split('\n') 707 level = 1 708 declaration = "" 709 for clause in def_args: 710 clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) 711 if clause: 712 if '}' in clause and level > 1: 713 level -= 1 714 if not clause.startswith('#'): 715 declaration += "\t" * level 716 declaration += "\t" + clause + "\n" 717 if "{" in clause and "}" not in clause: 718 level += 1 719 return declaration 720 721 722 def dump_struct(self, ln, proto): 723 """ 724 Store an entry for a ``struct`` or ``union`` 725 """ 726 # 727 # Do the basic parse to get the pieces of the declaration. 728 # 729 struct_parts = self.split_struct_proto(proto) 730 if not struct_parts: 731 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 732 return 733 decl_type, declaration_name, members = struct_parts 734 735 if self.entry.identifier != declaration_name: 736 self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " 737 f"Prototype was for {decl_type} {declaration_name} instead\n") 738 return 739 # 740 # Go through the list of members applying all of our transformations. 741 # 742 members = trim_private_members(members) 743 members = self.xforms.apply("struct", members) 744 745 # 746 # Deal with embedded struct and union members, and drop enums entirely. 747 # 748 declaration = members 749 members = self.rewrite_struct_members(members) 750 members = re.sub(r'(\{[^\{\}]*\})', '', members) 751 # 752 # Output the result and we are done. 753 # 754 self.create_parameter_list(ln, decl_type, members, ';', 755 declaration_name) 756 self.check_sections(ln, declaration_name, decl_type) 757 self.output_declaration(decl_type, declaration_name, 758 definition=self.format_struct_decl(declaration), 759 purpose=self.entry.declaration_purpose) 760 761 def dump_enum(self, ln, proto): 762 """ 763 Store an ``enum`` inside self.entries array. 764 """ 765 # 766 # Strip preprocessor directives. Note that this depends on the 767 # trailing semicolon we added in process_proto_type(). 768 # 769 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 770 # 771 # Parse out the name and members of the enum. Typedef form first. 772 # 773 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 774 if r.search(proto): 775 declaration_name = r.group(2) 776 members = trim_private_members(r.group(1)) 777 # 778 # Failing that, look for a straight enum 779 # 780 else: 781 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 782 if r.match(proto): 783 declaration_name = r.group(1) 784 members = trim_private_members(r.group(2)) 785 # 786 # OK, this isn't going to work. 787 # 788 else: 789 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 790 return 791 # 792 # Make sure we found what we were expecting. 793 # 794 if self.entry.identifier != declaration_name: 795 if self.entry.identifier == "": 796 self.emit_msg(ln, 797 f"{proto}: wrong kernel-doc identifier on prototype") 798 else: 799 self.emit_msg(ln, 800 f"expecting prototype for enum {self.entry.identifier}. " 801 f"Prototype was for enum {declaration_name} instead") 802 return 803 804 if not declaration_name: 805 declaration_name = "(anonymous)" 806 # 807 # Parse out the name of each enum member, and verify that we 808 # have a description for it. 809 # 810 member_set = set() 811 members = KernRe(r'\([^;)]*\)').sub('', members) 812 for arg in members.split(','): 813 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 814 if not arg.strip(): 815 continue 816 817 self.entry.parameterlist.append(arg) 818 if arg not in self.entry.parameterdescs: 819 self.entry.parameterdescs[arg] = self.undescribed 820 self.emit_msg(ln, 821 f"Enum value '{arg}' not described in enum '{declaration_name}'") 822 member_set.add(arg) 823 # 824 # Ensure that every described member actually exists in the enum. 825 # 826 for k in self.entry.parameterdescs: 827 if k not in member_set: 828 self.emit_msg(ln, 829 f"Excess enum value '@{k}' description in '{declaration_name}'") 830 831 self.output_declaration('enum', declaration_name, 832 purpose=self.entry.declaration_purpose) 833 834 def dump_var(self, ln, proto): 835 """ 836 Store variables that are part of kAPI. 837 """ 838 VAR_ATTRIBS = [ 839 "extern", 840 "const", 841 ] 842 OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*" 843 844 # 845 # Store the full prototype before modifying it 846 # 847 full_proto = proto 848 declaration_name = None 849 850 # 851 # Handle macro definitions 852 # 853 macro_prefixes = [ 854 KernRe(r"DEFINE_[\w_]+\s*\(([\w_]+)\)"), 855 ] 856 857 for r in macro_prefixes: 858 match = r.search(proto) 859 if match: 860 declaration_name = match.group(1) 861 break 862 863 # 864 # Drop comments and macros to have a pure C prototype 865 # 866 if not declaration_name: 867 proto = self.xforms.apply("var", proto) 868 869 proto = proto.rstrip() 870 871 # 872 # Variable name is at the end of the declaration 873 # 874 875 default_val = None 876 877 r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") 878 if r.match(proto): 879 if not declaration_name: 880 declaration_name = r.group(1) 881 882 default_val = r.group(2) 883 else: 884 r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") 885 886 if r.match(proto): 887 default_val = r.group(1) 888 if not declaration_name: 889 self.emit_msg(ln,f"{proto}: can't parse variable") 890 return 891 892 if default_val: 893 default_val = default_val.lstrip("=").strip() 894 895 self.output_declaration("var", declaration_name, 896 full_proto=full_proto, 897 default_val=default_val, 898 purpose=self.entry.declaration_purpose) 899 900 def dump_declaration(self, ln, prototype): 901 """ 902 Store a data declaration inside self.entries array. 903 """ 904 905 if self.entry.decl_type == "enum": 906 self.dump_enum(ln, prototype) 907 elif self.entry.decl_type == "typedef": 908 self.dump_typedef(ln, prototype) 909 elif self.entry.decl_type in ["union", "struct"]: 910 self.dump_struct(ln, prototype) 911 elif self.entry.decl_type == "var": 912 self.dump_var(ln, prototype) 913 else: 914 # This would be a bug 915 self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') 916 917 def dump_function(self, ln, prototype): 918 """ 919 Store a function or function macro inside self.entries array. 920 """ 921 922 found = func_macro = False 923 return_type = '' 924 decl_type = 'function' 925 926 # 927 # If we have a macro, remove the "#define" at the front. 928 # 929 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 930 if new_proto != prototype: 931 prototype = new_proto 932 # 933 # Dispense with the simple "#define A B" case here; the key 934 # is the space after the name of the symbol being defined. 935 # NOTE that the seemingly misnamed "func_macro" indicates a 936 # macro *without* arguments. 937 # 938 r = KernRe(r'^(\w+)\s+') 939 if r.search(prototype): 940 return_type = '' 941 declaration_name = r.group(1) 942 func_macro = True 943 found = True 944 else: 945 # 946 # Apply the initial transformations. 947 # 948 prototype = self.xforms.apply("func", prototype) 949 950 # Yes, this truly is vile. We are looking for: 951 # 1. Return type (may be nothing if we're looking at a macro) 952 # 2. Function name 953 # 3. Function parameters. 954 # 955 # All the while we have to watch out for function pointer parameters 956 # (which IIRC is what the two sections are for), C types (these 957 # regexps don't even start to express all the possibilities), and 958 # so on. 959 # 960 # If you mess with these regexps, it's a good idea to check that 961 # the following functions' documentation still comes out right: 962 # - parport_register_device (function pointer parameters) 963 # - atomic_set (macro) 964 # - pci_match_device, __copy_to_user (long return type) 965 966 name = r'\w+' 967 type1 = r'(?:[\w\s]+)?' 968 type2 = r'(?:[\w\s]+\*+)+' 969 # 970 # Attempt to match first on (args) with no internal parentheses; this 971 # lets us easily filter out __acquires() and other post-args stuff. If 972 # that fails, just grab the rest of the line to the last closing 973 # parenthesis. 974 # 975 proto_args = r'\(([^\(]*|.*)\)' 976 # 977 # (Except for the simple macro case) attempt to split up the prototype 978 # in the various ways we understand. 979 # 980 if not found: 981 patterns = [ 982 rf'^()({name})\s*{proto_args}', 983 rf'^({type1})\s+({name})\s*{proto_args}', 984 rf'^({type2})\s*({name})\s*{proto_args}', 985 ] 986 987 for p in patterns: 988 r = KernRe(p) 989 if r.match(prototype): 990 return_type = r.group(1) 991 declaration_name = r.group(2) 992 args = r.group(3) 993 self.create_parameter_list(ln, decl_type, args, ',', 994 declaration_name) 995 found = True 996 break 997 # 998 # Parsing done; make sure that things are as we expect. 999 # 1000 if not found: 1001 self.emit_msg(ln, 1002 f"cannot understand function prototype: '{prototype}'") 1003 return 1004 if self.entry.identifier != declaration_name: 1005 self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " 1006 f"Prototype was for {declaration_name}() instead") 1007 return 1008 self.check_sections(ln, declaration_name, "function") 1009 self.check_return_section(ln, declaration_name, return_type) 1010 # 1011 # Store the result. 1012 # 1013 self.output_declaration(decl_type, declaration_name, 1014 typedef=('typedef' in return_type), 1015 functiontype=return_type, 1016 purpose=self.entry.declaration_purpose, 1017 func_macro=func_macro) 1018 1019 1020 def dump_typedef(self, ln, proto): 1021 """ 1022 Store a ``typedef`` inside self.entries array. 1023 """ 1024 # 1025 # We start by looking for function typedefs. 1026 # 1027 typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1028 typedef_ident = r'\*?\s*(\w\S+)\s*' 1029 typedef_args = r'\s*\((.*)\);' 1030 1031 typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1032 typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) 1033 1034 # Parse function typedef prototypes 1035 for r in [typedef1, typedef2]: 1036 if not r.match(proto): 1037 continue 1038 1039 return_type = r.group(1).strip() 1040 declaration_name = r.group(2) 1041 args = r.group(3) 1042 1043 if self.entry.identifier != declaration_name: 1044 self.emit_msg(ln, 1045 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1046 return 1047 1048 self.create_parameter_list(ln, 'function', args, ',', declaration_name) 1049 1050 self.output_declaration('function', declaration_name, 1051 typedef=True, 1052 functiontype=return_type, 1053 purpose=self.entry.declaration_purpose) 1054 return 1055 # 1056 # Not a function, try to parse a simple typedef. 1057 # 1058 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1059 if r.match(proto): 1060 declaration_name = r.group(1) 1061 1062 if self.entry.identifier != declaration_name: 1063 self.emit_msg(ln, 1064 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1065 return 1066 1067 self.output_declaration('typedef', declaration_name, 1068 purpose=self.entry.declaration_purpose) 1069 return 1070 1071 self.emit_msg(ln, "error: Cannot parse typedef!") 1072 1073 @staticmethod 1074 def process_export(function_set, line): 1075 """ 1076 process ``EXPORT_SYMBOL*`` tags 1077 1078 This method doesn't use any variable from the class, so declare it 1079 with a staticmethod decorator. 1080 """ 1081 1082 # We support documenting some exported symbols with different 1083 # names. A horrible hack. 1084 suffixes = [ '_noprof' ] 1085 1086 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1087 # multiple export lines would violate Kernel coding style. 1088 1089 if export_symbol.search(line): 1090 symbol = export_symbol.group(2) 1091 elif export_symbol_ns.search(line): 1092 symbol = export_symbol_ns.group(2) 1093 else: 1094 return False 1095 # 1096 # Found an export, trim out any special suffixes 1097 # 1098 for suffix in suffixes: 1099 # Be backward compatible with Python < 3.9 1100 if symbol.endswith(suffix): 1101 symbol = symbol[:-len(suffix)] 1102 function_set.add(symbol) 1103 return True 1104 1105 def process_normal(self, ln, line): 1106 """ 1107 STATE_NORMAL: looking for the ``/**`` to begin everything. 1108 """ 1109 1110 if not doc_start.match(line): 1111 return 1112 1113 # start a new entry 1114 self.reset_state(ln) 1115 1116 # next line is always the function name 1117 self.state = state.NAME 1118 1119 def process_name(self, ln, line): 1120 """ 1121 STATE_NAME: Looking for the "name - description" line 1122 """ 1123 # 1124 # Check for a DOC: block and handle them specially. 1125 # 1126 if doc_block.search(line): 1127 1128 if not doc_block.group(1): 1129 self.entry.begin_section(ln, "Introduction") 1130 else: 1131 self.entry.begin_section(ln, doc_block.group(1)) 1132 1133 self.entry.identifier = self.entry.section 1134 self.state = state.DOCBLOCK 1135 # 1136 # Otherwise we're looking for a normal kerneldoc declaration line. 1137 # 1138 elif doc_decl.search(line): 1139 self.entry.identifier = doc_decl.group(1) 1140 1141 # Test for data declaration 1142 if doc_begin_data.search(line): 1143 self.entry.decl_type = doc_begin_data.group(1) 1144 self.entry.identifier = doc_begin_data.group(2) 1145 # 1146 # Look for a function description 1147 # 1148 elif doc_begin_func.search(line): 1149 self.entry.identifier = doc_begin_func.group(1) 1150 self.entry.decl_type = "function" 1151 # 1152 # We struck out. 1153 # 1154 else: 1155 self.emit_msg(ln, 1156 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}") 1157 self.state = state.NORMAL 1158 return 1159 # 1160 # OK, set up for a new kerneldoc entry. 1161 # 1162 self.state = state.BODY 1163 self.entry.identifier = self.entry.identifier.strip(" ") 1164 # if there's no @param blocks need to set up default section here 1165 self.entry.begin_section(ln + 1) 1166 # 1167 # Find the description portion, which *should* be there but 1168 # isn't always. 1169 # (We should be able to capture this from the previous parsing - someday) 1170 # 1171 r = KernRe("[-:](.*)") 1172 if r.search(line): 1173 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1174 self.state = state.DECLARATION 1175 else: 1176 self.entry.declaration_purpose = "" 1177 1178 if not self.entry.declaration_purpose and self.config.wshort_desc: 1179 self.emit_msg(ln, 1180 f"missing initial short description on line:\n{line}") 1181 1182 if not self.entry.identifier and self.entry.decl_type != "enum": 1183 self.emit_msg(ln, 1184 f"wrong kernel-doc identifier on line:\n{line}") 1185 self.state = state.NORMAL 1186 1187 if self.config.verbose: 1188 self.emit_msg(ln, 1189 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1190 warning=False) 1191 # 1192 # Failed to find an identifier. Emit a warning 1193 # 1194 else: 1195 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1196 1197 def is_new_section(self, ln, line): 1198 """ 1199 Helper function to determine if a new section is being started. 1200 """ 1201 if doc_sect.search(line): 1202 self.state = state.BODY 1203 # 1204 # Pick out the name of our new section, tweaking it if need be. 1205 # 1206 newsection = doc_sect.group(1) 1207 if newsection.lower() == 'description': 1208 newsection = 'Description' 1209 elif newsection.lower() == 'context': 1210 newsection = 'Context' 1211 self.state = state.SPECIAL_SECTION 1212 elif newsection.lower() in ["@return", "@returns", 1213 "return", "returns"]: 1214 newsection = "Return" 1215 self.state = state.SPECIAL_SECTION 1216 elif newsection[0] == '@': 1217 self.state = state.SPECIAL_SECTION 1218 # 1219 # Initialize the contents, and get the new section going. 1220 # 1221 newcontents = doc_sect.group(2) 1222 if not newcontents: 1223 newcontents = "" 1224 self.dump_section() 1225 self.entry.begin_section(ln, newsection) 1226 self.entry.leading_space = None 1227 1228 self.entry.add_text(newcontents.lstrip()) 1229 return True 1230 return False 1231 1232 def is_comment_end(self, ln, line): 1233 """ 1234 Helper function to detect (and effect) the end of a kerneldoc comment. 1235 """ 1236 if doc_end.search(line): 1237 self.dump_section() 1238 1239 # Look for doc_com + <text> + doc_end: 1240 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') 1241 if r.match(line): 1242 self.emit_msg(ln, f"suspicious ending line: {line}") 1243 1244 self.entry.prototype = "" 1245 self.entry.new_start_line = ln + 1 1246 1247 self.state = state.PROTO 1248 return True 1249 return False 1250 1251 1252 def process_decl(self, ln, line): 1253 """ 1254 STATE_DECLARATION: We've seen the beginning of a declaration. 1255 """ 1256 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1257 return 1258 # 1259 # Look for anything with the " * " line beginning. 1260 # 1261 if doc_content.search(line): 1262 cont = doc_content.group(1) 1263 # 1264 # A blank line means that we have moved out of the declaration 1265 # part of the comment (without any "special section" parameter 1266 # descriptions). 1267 # 1268 if cont == "": 1269 self.state = state.BODY 1270 # 1271 # Otherwise we have more of the declaration section to soak up. 1272 # 1273 else: 1274 self.entry.declaration_purpose = \ 1275 trim_whitespace(self.entry.declaration_purpose + ' ' + cont) 1276 else: 1277 # Unknown line, ignore 1278 self.emit_msg(ln, f"bad line: {line}") 1279 1280 1281 def process_special(self, ln, line): 1282 """ 1283 STATE_SPECIAL_SECTION: a section ending with a blank line. 1284 """ 1285 # 1286 # If we have hit a blank line (only the " * " marker), then this 1287 # section is done. 1288 # 1289 if KernRe(r"\s*\*\s*$").match(line): 1290 self.entry.begin_section(ln, dump = True) 1291 self.state = state.BODY 1292 return 1293 # 1294 # Not a blank line, look for the other ways to end the section. 1295 # 1296 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1297 return 1298 # 1299 # OK, we should have a continuation of the text for this section. 1300 # 1301 if doc_content.search(line): 1302 cont = doc_content.group(1) 1303 # 1304 # If the lines of text after the first in a special section have 1305 # leading white space, we need to trim it out or Sphinx will get 1306 # confused. For the second line (the None case), see what we 1307 # find there and remember it. 1308 # 1309 if self.entry.leading_space is None: 1310 r = KernRe(r'^(\s+)') 1311 if r.match(cont): 1312 self.entry.leading_space = len(r.group(1)) 1313 else: 1314 self.entry.leading_space = 0 1315 # 1316 # Otherwise, before trimming any leading chars, be *sure* 1317 # that they are white space. We should maybe warn if this 1318 # isn't the case. 1319 # 1320 for i in range(0, self.entry.leading_space): 1321 if cont[i] != " ": 1322 self.entry.leading_space = i 1323 break 1324 # 1325 # Add the trimmed result to the section and we're done. 1326 # 1327 self.entry.add_text(cont[self.entry.leading_space:]) 1328 else: 1329 # Unknown line, ignore 1330 self.emit_msg(ln, f"bad line: {line}") 1331 1332 def process_body(self, ln, line): 1333 """ 1334 STATE_BODY: the bulk of a kerneldoc comment. 1335 """ 1336 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1337 return 1338 1339 if doc_content.search(line): 1340 cont = doc_content.group(1) 1341 self.entry.add_text(cont) 1342 else: 1343 # Unknown line, ignore 1344 self.emit_msg(ln, f"bad line: {line}") 1345 1346 def process_inline_name(self, ln, line): 1347 """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" 1348 1349 if doc_inline_sect.search(line): 1350 self.entry.begin_section(ln, doc_inline_sect.group(1)) 1351 self.entry.add_text(doc_inline_sect.group(2).lstrip()) 1352 self.state = state.INLINE_TEXT 1353 elif doc_inline_end.search(line): 1354 self.dump_section() 1355 self.state = state.PROTO 1356 elif doc_content.search(line): 1357 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") 1358 self.state = state.PROTO 1359 1360 # 1361 # Don't let it add partial comments at the code, as breaks the 1362 # logic meant to remove comments from prototypes. 1363 # 1364 self.process_proto_type(ln, "/**\n" + line) 1365 # else ... ?? 1366 1367 def process_inline_text(self, ln, line): 1368 """STATE_INLINE_TEXT: docbook comments within a prototype.""" 1369 1370 if doc_inline_end.search(line): 1371 self.dump_section() 1372 self.state = state.PROTO 1373 elif doc_content.search(line): 1374 self.entry.add_text(doc_content.group(1)) 1375 # else ... ?? 1376 1377 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1378 """ 1379 Handle syscall definitions. 1380 """ 1381 1382 is_void = False 1383 1384 # Strip newlines/CR's 1385 proto = re.sub(r'[\r\n]+', ' ', proto) 1386 1387 # Check if it's a SYSCALL_DEFINE0 1388 if 'SYSCALL_DEFINE0' in proto: 1389 is_void = True 1390 1391 # Replace SYSCALL_DEFINE with correct return type & function name 1392 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1393 1394 r = KernRe(r'long\s+(sys_.*?),') 1395 if r.search(proto): 1396 proto = KernRe(',').sub('(', proto, count=1) 1397 elif is_void: 1398 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1399 1400 # Now delete all of the odd-numbered commas in the proto 1401 # so that argument types & names don't have a comma between them 1402 count = 0 1403 length = len(proto) 1404 1405 if is_void: 1406 length = 0 # skip the loop if is_void 1407 1408 for ix in range(length): 1409 if proto[ix] == ',': 1410 count += 1 1411 if count % 2 == 1: 1412 proto = proto[:ix] + ' ' + proto[ix + 1:] 1413 1414 return proto 1415 1416 def tracepoint_munge(self, ln, proto): 1417 """ 1418 Handle tracepoint definitions. 1419 """ 1420 1421 tracepointname = None 1422 tracepointargs = None 1423 1424 # Match tracepoint name based on different patterns 1425 r = KernRe(r'TRACE_EVENT\((.*?),') 1426 if r.search(proto): 1427 tracepointname = r.group(1) 1428 1429 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1430 if r.search(proto): 1431 tracepointname = r.group(1) 1432 1433 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1434 if r.search(proto): 1435 tracepointname = r.group(2) 1436 1437 if tracepointname: 1438 tracepointname = tracepointname.lstrip() 1439 1440 r = KernRe(r'TP_PROTO\((.*?)\)') 1441 if r.search(proto): 1442 tracepointargs = r.group(1) 1443 1444 if not tracepointname or not tracepointargs: 1445 self.emit_msg(ln, 1446 f"Unrecognized tracepoint format:\n{proto}\n") 1447 else: 1448 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1449 self.entry.identifier = f"trace_{self.entry.identifier}" 1450 1451 return proto 1452 1453 def process_proto_function(self, ln, line): 1454 """Ancillary routine to process a function prototype.""" 1455 1456 # strip C99-style comments to end of line 1457 line = KernRe(r"//.*$", re.S).sub('', line) 1458 # 1459 # Soak up the line's worth of prototype text, stopping at { or ; if present. 1460 # 1461 if KernRe(r'\s*#\s*define').match(line): 1462 self.entry.prototype = line 1463 elif not line.startswith('#'): # skip other preprocessor stuff 1464 r = KernRe(r'([^\{]*)') 1465 if r.match(line): 1466 self.entry.prototype += r.group(1) + " " 1467 # 1468 # If we now have the whole prototype, clean it up and declare victory. 1469 # 1470 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1471 # strip comments and surrounding spaces 1472 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() 1473 # 1474 # Handle self.entry.prototypes for function pointers like: 1475 # int (*pcs_config)(struct foo) 1476 # by turning it into 1477 # int pcs_config(struct foo) 1478 # 1479 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1480 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1481 # 1482 # Handle special declaration syntaxes 1483 # 1484 if 'SYSCALL_DEFINE' in self.entry.prototype: 1485 self.entry.prototype = self.syscall_munge(ln, 1486 self.entry.prototype) 1487 else: 1488 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1489 if r.search(self.entry.prototype): 1490 self.entry.prototype = self.tracepoint_munge(ln, 1491 self.entry.prototype) 1492 # 1493 # ... and we're done 1494 # 1495 self.dump_function(ln, self.entry.prototype) 1496 self.reset_state(ln) 1497 1498 def process_proto_type(self, ln, line): 1499 """ 1500 Ancillary routine to process a type. 1501 """ 1502 1503 # Strip C99-style comments and surrounding whitespace 1504 line = KernRe(r"//.*$", re.S).sub('', line).strip() 1505 if not line: 1506 return # nothing to see here 1507 1508 # To distinguish preprocessor directive from regular declaration later. 1509 if line.startswith('#'): 1510 line += ";" 1511 # 1512 # Split the declaration on any of { } or ;, and accumulate pieces 1513 # until we hit a semicolon while not inside {brackets} 1514 # 1515 r = KernRe(r'(.*?)([{};])') 1516 for chunk in r.split(line): 1517 if chunk: # Ignore empty matches 1518 self.entry.prototype += chunk 1519 # 1520 # This cries out for a match statement ... someday after we can 1521 # drop Python 3.9 ... 1522 # 1523 if chunk == '{': 1524 self.entry.brcount += 1 1525 elif chunk == '}': 1526 self.entry.brcount -= 1 1527 elif chunk == ';' and self.entry.brcount <= 0: 1528 self.dump_declaration(ln, self.entry.prototype) 1529 self.reset_state(ln) 1530 return 1531 # 1532 # We hit the end of the line while still in the declaration; put 1533 # in a space to represent the newline. 1534 # 1535 self.entry.prototype += ' ' 1536 1537 def process_proto(self, ln, line): 1538 """STATE_PROTO: reading a function/whatever prototype.""" 1539 1540 if doc_inline_oneline.search(line): 1541 self.entry.begin_section(ln, doc_inline_oneline.group(1)) 1542 self.entry.add_text(doc_inline_oneline.group(2)) 1543 self.dump_section() 1544 1545 elif doc_inline_start.search(line): 1546 self.state = state.INLINE_NAME 1547 1548 elif self.entry.decl_type == 'function': 1549 self.process_proto_function(ln, line) 1550 1551 else: 1552 self.process_proto_type(ln, line) 1553 1554 def process_docblock(self, ln, line): 1555 """STATE_DOCBLOCK: within a ``DOC:`` block.""" 1556 1557 if doc_end.search(line): 1558 self.dump_section() 1559 self.output_declaration("doc", self.entry.identifier) 1560 self.reset_state(ln) 1561 1562 elif doc_content.search(line): 1563 self.entry.add_text(doc_content.group(1)) 1564 1565 def parse_export(self): 1566 """ 1567 Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file. 1568 """ 1569 1570 export_table = set() 1571 1572 try: 1573 with open(self.fname, "r", encoding="utf8", 1574 errors="backslashreplace") as fp: 1575 1576 for line in fp: 1577 self.process_export(export_table, line) 1578 1579 except IOError: 1580 return None 1581 1582 return export_table 1583 1584 #: The state/action table telling us which function to invoke in each state. 1585 state_actions = { 1586 state.NORMAL: process_normal, 1587 state.NAME: process_name, 1588 state.BODY: process_body, 1589 state.DECLARATION: process_decl, 1590 state.SPECIAL_SECTION: process_special, 1591 state.INLINE_NAME: process_inline_name, 1592 state.INLINE_TEXT: process_inline_text, 1593 state.PROTO: process_proto, 1594 state.DOCBLOCK: process_docblock, 1595 } 1596 1597 def parse_kdoc(self): 1598 """ 1599 Open and process each line of a C source file. 1600 The parsing is controlled via a state machine, and the line is passed 1601 to a different process function depending on the state. The process 1602 function may update the state as needed. 1603 1604 Besides parsing kernel-doc tags, it also parses export symbols. 1605 """ 1606 1607 prev = "" 1608 prev_ln = None 1609 export_table = set() 1610 1611 try: 1612 with open(self.fname, "r", encoding="utf8", 1613 errors="backslashreplace") as fp: 1614 for ln, line in enumerate(fp): 1615 1616 line = line.expandtabs().strip("\n") 1617 1618 # Group continuation lines on prototypes 1619 if self.state == state.PROTO: 1620 if line.endswith("\\"): 1621 prev += line.rstrip("\\") 1622 if not prev_ln: 1623 prev_ln = ln 1624 continue 1625 1626 if prev: 1627 ln = prev_ln 1628 line = prev + line 1629 prev = "" 1630 prev_ln = None 1631 1632 self.config.log.debug("%d %s: %s", 1633 ln, state.name[self.state], 1634 line) 1635 1636 # This is an optimization over the original script. 1637 # There, when export_file was used for the same file, 1638 # it was read twice. Here, we use the already-existing 1639 # loop to parse exported symbols as well. 1640 # 1641 if (self.state != state.NORMAL) or \ 1642 not self.process_export(export_table, line): 1643 # Hand this line to the appropriate state handler 1644 self.state_actions[self.state](self, ln, line) 1645 1646 self.emit_unused_warnings() 1647 1648 except OSError: 1649 self.config.log.error(f"Error: Cannot open file {self.fname}") 1650 1651 return export_table, self.entries 1652