1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8Classes and functions related to reading a C language source or header FILE 9and extract embedded documentation comments from it. 10""" 11 12import sys 13import re 14from pprint import pformat 15 16from kdoc.kdoc_re import NestedMatch, KernRe 17from kdoc.kdoc_item import KdocItem 18 19# 20# Regular expressions used to parse kernel-doc markups at KernelDoc class. 21# 22# Let's declare them in lowercase outside any class to make it easier to 23# convert from the Perl script. 24# 25# As those are evaluated at the beginning, no need to cache them 26# 27 28# Allow whitespace at end of comment start. 29doc_start = KernRe(r'^/\*\*\s*$', cache=False) 30 31doc_end = KernRe(r'\*/', cache=False) 32doc_com = KernRe(r'\s*\*\s*', cache=False) 33doc_com_body = KernRe(r'\s*\* ?', cache=False) 34doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 35 36# @params and a strictly limited set of supported section names 37# Specifically: 38# Match @word: 39# @...: 40# @{section-name}: 41# while trying to not match literal block starts like "example::" 42# 43known_section_names = 'description|context|returns?|notes?|examples?' 44known_sections = KernRe(known_section_names, flags = re.I) 45doc_sect = doc_com + \ 46 KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', 47 flags=re.I, cache=False) 48 49doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 50doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 51doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 52doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 53doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@\s*[\w][\w\.]*\s*):\s*(.*)\s*\*/\s*$', cache=False) 54 55export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 56export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 57 58type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 59 60# 61# Tests for the beginning of a kerneldoc block in its various forms. 62# 63doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 64doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef|var)\b\s*(\w*)", cache = False) 65doc_begin_func = KernRe(str(doc_com) + # initial " * ' 66 r"(?:\w+\s*\*\s*)?" + # type (not captured) 67 r'(?:define\s+)?' + # possible "define" (not captured) 68 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 69 r'(?:[-:].*)?$', # description (not captured) 70 cache = False) 71 72# 73# Regexes here are guaranteed to have the end delimiter matching 74# the start delimiter. Yet, right now, only one replace group 75# is allowed. 76# 77struct_nested_prefixes = [ 78 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), 79] 80 81# 82# Ancillary functions 83# 84 85multi_space = KernRe(r'\s\s+') 86def trim_whitespace(s): 87 """ 88 A little helper to get rid of excess white space. 89 """ 90 return multi_space.sub(' ', s.strip()) 91 92def trim_private_members(text): 93 """ 94 Remove ``struct``/``enum`` members that have been marked "private". 95 """ 96 # First look for a "public:" block that ends a private region, then 97 # handle the "private until the end" case. 98 # 99 text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) 100 text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) 101 # 102 # We needed the comments to do the above, but now we can take them out. 103 # 104 return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() 105 106class state: 107 """ 108 States used by the parser's state machine. 109 """ 110 111 # Parser states 112 NORMAL = 0 #: Normal code. 113 NAME = 1 #: Looking for function name. 114 DECLARATION = 2 #: We have seen a declaration which might not be done. 115 BODY = 3 #: The body of the comment. 116 SPECIAL_SECTION = 4 #: Doc section ending with a blank line. 117 PROTO = 5 #: Scanning prototype. 118 DOCBLOCK = 6 #: Documentation block. 119 INLINE_NAME = 7 #: Gathering doc outside main block. 120 INLINE_TEXT = 8 #: Reading the body of inline docs. 121 122 #: Names for each parser state. 123 name = [ 124 "NORMAL", 125 "NAME", 126 "DECLARATION", 127 "BODY", 128 "SPECIAL_SECTION", 129 "PROTO", 130 "DOCBLOCK", 131 "INLINE_NAME", 132 "INLINE_TEXT", 133 ] 134 135 136SECTION_DEFAULT = "Description" #: Default section. 137 138class KernelEntry: 139 """ 140 Encapsulates a Kernel documentation entry. 141 """ 142 143 def __init__(self, config, fname, ln): 144 self.config = config 145 self.fname = fname 146 147 self._contents = [] 148 self.prototype = "" 149 150 self.warnings = [] 151 152 self.parameterlist = [] 153 self.parameterdescs = {} 154 self.parametertypes = {} 155 self.parameterdesc_start_lines = {} 156 157 self.section_start_lines = {} 158 self.sections = {} 159 160 self.anon_struct_union = False 161 162 self.leading_space = None 163 164 self.fname = fname 165 166 # State flags 167 self.brcount = 0 168 self.declaration_start_line = ln + 1 169 170 # 171 # Management of section contents 172 # 173 def add_text(self, text): 174 """Add a new text to the entry contents list.""" 175 self._contents.append(text) 176 177 def contents(self): 178 """Returns a string with all content texts that were added.""" 179 return '\n'.join(self._contents) + '\n' 180 181 # TODO: rename to emit_message after removal of kernel-doc.pl 182 def emit_msg(self, ln, msg, *, warning=True): 183 """Emit a message.""" 184 185 log_msg = f"{self.fname}:{ln} {msg}" 186 187 if not warning: 188 self.config.log.info(log_msg) 189 return 190 191 # Delegate warning output to output logic, as this way it 192 # will report warnings/info only for symbols that are output 193 194 self.warnings.append(log_msg) 195 return 196 197 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): 198 """ 199 Begin a new section. 200 """ 201 if dump: 202 self.dump_section(start_new = True) 203 self.section = title 204 self.new_start_line = line_no 205 206 def dump_section(self, start_new=True): 207 """ 208 Dumps section contents to arrays/hashes intended for that purpose. 209 """ 210 # 211 # If we have accumulated no contents in the default ("description") 212 # section, don't bother. 213 # 214 if self.section == SECTION_DEFAULT and not self._contents: 215 return 216 name = self.section 217 contents = self.contents() 218 219 if type_param.match(name): 220 name = type_param.group(1) 221 222 self.parameterdescs[name] = contents 223 self.parameterdesc_start_lines[name] = self.new_start_line 224 225 self.new_start_line = 0 226 227 else: 228 if name in self.sections and self.sections[name] != "": 229 # Only warn on user-specified duplicate section names 230 if name != SECTION_DEFAULT: 231 self.emit_msg(self.new_start_line, 232 f"duplicate section name '{name}'") 233 # Treat as a new paragraph - add a blank line 234 self.sections[name] += '\n' + contents 235 else: 236 self.sections[name] = contents 237 self.section_start_lines[name] = self.new_start_line 238 self.new_start_line = 0 239 240# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 241 242 if start_new: 243 self.section = SECTION_DEFAULT 244 self._contents = [] 245 246python_warning = False 247 248class KernelDoc: 249 """ 250 Read a C language source or header FILE and extract embedded 251 documentation comments. 252 """ 253 254 #: Name of context section. 255 section_context = "Context" 256 257 #: Name of return section. 258 section_return = "Return" 259 260 #: String to write when a parameter is not described. 261 undescribed = "-- undescribed --" 262 263 def __init__(self, config, fname, xforms): 264 """Initialize internal variables""" 265 266 self.fname = fname 267 self.config = config 268 self.xforms = xforms 269 270 # Initial state for the state machines 271 self.state = state.NORMAL 272 273 # Store entry currently being processed 274 self.entry = None 275 276 # Place all potential outputs into an array 277 self.entries = [] 278 279 # 280 # We need Python 3.7 for its "dicts remember the insertion 281 # order" guarantee 282 # 283 global python_warning 284 if (not python_warning and 285 sys.version_info.major == 3 and sys.version_info.minor < 7): 286 287 self.emit_msg(0, 288 'Python 3.7 or later is required for correct results') 289 python_warning = True 290 291 def emit_msg(self, ln, msg, *, warning=True): 292 """Emit a message""" 293 294 if self.entry: 295 self.entry.emit_msg(ln, msg, warning=warning) 296 return 297 298 log_msg = f"{self.fname}:{ln} {msg}" 299 300 if warning: 301 self.config.log.warning(log_msg) 302 else: 303 self.config.log.info(log_msg) 304 305 def dump_section(self, start_new=True): 306 """ 307 Dump section contents to arrays/hashes intended for that purpose. 308 """ 309 310 if self.entry: 311 self.entry.dump_section(start_new) 312 313 # TODO: rename it to store_declaration after removal of kernel-doc.pl 314 def output_declaration(self, dtype, name, **args): 315 """ 316 Store the entry into an entry array. 317 318 The actual output and output filters will be handled elsewhere. 319 """ 320 321 item = KdocItem(name, self.fname, dtype, 322 self.entry.declaration_start_line, **args) 323 item.warnings = self.entry.warnings 324 325 # Drop empty sections 326 # TODO: improve empty sections logic to emit warnings 327 sections = self.entry.sections 328 for section in ["Description", "Return"]: 329 if section in sections and not sections[section].rstrip(): 330 del sections[section] 331 item.set_sections(sections, self.entry.section_start_lines) 332 item.set_params(self.entry.parameterlist, self.entry.parameterdescs, 333 self.entry.parametertypes, 334 self.entry.parameterdesc_start_lines) 335 self.entries.append(item) 336 337 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 338 339 def emit_unused_warnings(self): 340 """ 341 When the parser fails to produce a valid entry, it places some 342 warnings under `entry.warnings` that will be discarded when resetting 343 the state. 344 345 Ensure that those warnings are not lost. 346 347 .. note:: 348 349 Because we are calling `config.warning()` here, those 350 warnings are not filtered by the `-W` parameters: they will all 351 be produced even when `-Wreturn`, `-Wshort-desc`, and/or 352 `-Wcontents-before-sections` are used. 353 354 Allowing those warnings to be filtered is complex, because it 355 would require storing them in a buffer and then filtering them 356 during the output step of the code, depending on the 357 selected symbols. 358 """ 359 if self.entry and self.entry not in self.entries: 360 for log_msg in self.entry.warnings: 361 self.config.warning(log_msg) 362 363 def reset_state(self, ln): 364 """ 365 Ancillary routine to create a new entry. It initializes all 366 variables used by the state machine. 367 """ 368 369 self.emit_unused_warnings() 370 371 self.entry = KernelEntry(self.config, self.fname, ln) 372 373 # State flags 374 self.state = state.NORMAL 375 376 def push_parameter(self, ln, decl_type, param, dtype, 377 org_arg, declaration_name): 378 """ 379 Store parameters and their descriptions at self.entry. 380 """ 381 382 if self.entry.anon_struct_union and dtype == "" and param == "}": 383 return # Ignore the ending }; from anonymous struct/union 384 385 self.entry.anon_struct_union = False 386 387 param = KernRe(r'[\[\)].*').sub('', param, count=1) 388 389 # 390 # Look at various "anonymous type" cases. 391 # 392 if dtype == '': 393 if param.endswith("..."): 394 if len(param) > 3: # there is a name provided, use that 395 param = param[:-3] 396 if not self.entry.parameterdescs.get(param): 397 self.entry.parameterdescs[param] = "variable arguments" 398 399 elif (not param) or param == "void": 400 param = "void" 401 self.entry.parameterdescs[param] = "no arguments" 402 403 elif param in ["struct", "union"]: 404 # Handle unnamed (anonymous) union or struct 405 dtype = param 406 param = "{unnamed_" + param + "}" 407 self.entry.parameterdescs[param] = "anonymous\n" 408 self.entry.anon_struct_union = True 409 410 # Warn if parameter has no description 411 # (but ignore ones starting with # as these are not parameters 412 # but inline preprocessor statements) 413 if param not in self.entry.parameterdescs and not param.startswith("#"): 414 self.entry.parameterdescs[param] = self.undescribed 415 416 if "." not in param: 417 if decl_type == 'function': 418 dname = f"{decl_type} parameter" 419 else: 420 dname = f"{decl_type} member" 421 422 self.emit_msg(ln, 423 f"{dname} '{param}' not described in '{declaration_name}'") 424 425 # Strip spaces from param so that it is one continuous string on 426 # parameterlist. This fixes a problem where check_sections() 427 # cannot find a parameter like "addr[6 + 2]" because it actually 428 # appears as "addr[6", "+", "2]" on the parameter list. 429 # However, it's better to maintain the param string unchanged for 430 # output, so just weaken the string compare in check_sections() 431 # to ignore "[blah" in a parameter string. 432 433 self.entry.parameterlist.append(param) 434 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 435 self.entry.parametertypes[param] = org_arg 436 437 438 def create_parameter_list(self, ln, decl_type, args, 439 splitter, declaration_name): 440 """ 441 Creates a list of parameters, storing them at self.entry. 442 """ 443 444 # temporarily replace all commas inside function pointer definition 445 arg_expr = KernRe(r'(\([^\),]+),') 446 while arg_expr.search(args): 447 args = arg_expr.sub(r"\1#", args) 448 449 for arg in args.split(splitter): 450 # Ignore argument attributes 451 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 452 453 # Strip leading/trailing spaces 454 arg = arg.strip() 455 arg = KernRe(r'\s+').sub(' ', arg, count=1) 456 457 if arg.startswith('#'): 458 # Treat preprocessor directive as a typeless variable just to fill 459 # corresponding data structures "correctly". Catch it later in 460 # output_* subs. 461 462 # Treat preprocessor directive as a typeless variable 463 self.push_parameter(ln, decl_type, arg, "", 464 "", declaration_name) 465 # 466 # The pointer-to-function case. 467 # 468 elif KernRe(r'\(.+\)\s*\(').search(arg): 469 arg = arg.replace('#', ',') 470 r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" 471 r'([\w\[\].]*)' # Capture the name and possible [array] 472 r'\s*\)') # Make sure the trailing ")" is there 473 if r.match(arg): 474 param = r.group(1) 475 else: 476 self.emit_msg(ln, f"Invalid param: {arg}") 477 param = arg 478 dtype = arg.replace(param, '') 479 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 480 # 481 # The array-of-pointers case. Dig the parameter name out from the middle 482 # of the declaration. 483 # 484 elif KernRe(r'\(.+\)\s*\[').search(arg): 485 r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" 486 r'([\w.]*?)' # The actual pointer name 487 r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] 488 if r.match(arg): 489 param = r.group(1) 490 else: 491 self.emit_msg(ln, f"Invalid param: {arg}") 492 param = arg 493 dtype = arg.replace(param, '') 494 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 495 elif arg: 496 # 497 # Clean up extraneous spaces and split the string at commas; the first 498 # element of the resulting list will also include the type information. 499 # 500 arg = KernRe(r'\s*:\s*').sub(":", arg) 501 arg = KernRe(r'\s*\[').sub('[', arg) 502 args = KernRe(r'\s*,\s*').split(arg) 503 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 504 # 505 # args[0] has a string of "type a". If "a" includes an [array] 506 # declaration, we want to not be fooled by any white space inside 507 # the brackets, so detect and handle that case specially. 508 # 509 r = KernRe(r'^([^[\]]*\s+)(.*)$') 510 if r.match(args[0]): 511 args[0] = r.group(2) 512 dtype = r.group(1) 513 else: 514 # No space in args[0]; this seems wrong but preserves previous behavior 515 dtype = '' 516 517 bitfield_re = KernRe(r'(.*?):(\w+)') 518 for param in args: 519 # 520 # For pointers, shift the star(s) from the variable name to the 521 # type declaration. 522 # 523 r = KernRe(r'^(\*+)\s*(.*)') 524 if r.match(param): 525 self.push_parameter(ln, decl_type, r.group(2), 526 f"{dtype} {r.group(1)}", 527 arg, declaration_name) 528 # 529 # Perform a similar shift for bitfields. 530 # 531 elif bitfield_re.search(param): 532 if dtype != "": # Skip unnamed bit-fields 533 self.push_parameter(ln, decl_type, bitfield_re.group(1), 534 f"{dtype}:{bitfield_re.group(2)}", 535 arg, declaration_name) 536 else: 537 self.push_parameter(ln, decl_type, param, dtype, 538 arg, declaration_name) 539 540 def check_sections(self, ln, decl_name, decl_type): 541 """ 542 Check for errors inside sections, emitting warnings if not found 543 parameters are described. 544 """ 545 for section in self.entry.sections: 546 if section not in self.entry.parameterlist and \ 547 not known_sections.search(section): 548 if decl_type == 'function': 549 dname = f"{decl_type} parameter" 550 else: 551 dname = f"{decl_type} member" 552 self.emit_msg(ln, 553 f"Excess {dname} '{section}' description in '{decl_name}'") 554 555 def check_return_section(self, ln, declaration_name, return_type): 556 """ 557 If the function doesn't return void, warns about the lack of a 558 return description. 559 """ 560 561 if not self.config.wreturn: 562 return 563 564 # Ignore an empty return type (It's a macro) 565 # Ignore functions with a "void" return type (but not "void *") 566 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 567 return 568 569 if not self.entry.sections.get("Return", None): 570 self.emit_msg(ln, 571 f"No description found for return value of '{declaration_name}'") 572 573 def split_struct_proto(self, proto): 574 """ 575 Split apart a structure prototype; returns (struct|union, name, 576 members) or ``None``. 577 """ 578 579 type_pattern = r'(struct|union)' 580 qualifiers = [ 581 "__attribute__", 582 "__packed", 583 "__aligned", 584 "____cacheline_aligned_in_smp", 585 "____cacheline_aligned", 586 ] 587 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 588 589 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 590 if r.search(proto): 591 return (r.group(1), r.group(2), r.group(3)) 592 else: 593 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 594 if r.search(proto): 595 return (r.group(1), r.group(3), r.group(2)) 596 return None 597 598 def rewrite_struct_members(self, members): 599 """ 600 Process ``struct``/``union`` members from the most deeply nested 601 outward. 602 603 Rewrite the members of a ``struct`` or ``union`` for easier formatting 604 later on. Among other things, this function will turn a member like:: 605 606 struct { inner_members; } foo; 607 608 into:: 609 610 struct foo; inner_members; 611 """ 612 613 # 614 # The trick is in the ``^{`` below - it prevents a match of an outer 615 # ``struct``/``union`` until the inner one has been munged 616 # (removing the ``{`` in the process). 617 # 618 struct_members = KernRe(r'(struct|union)' # 0: declaration type 619 r'([^\{\};]+)' # 1: possible name 620 r'(\{)' 621 r'([^\{\}]*)' # 3: Contents of declaration 622 r'(\})' 623 r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration 624 tuples = struct_members.findall(members) 625 while tuples: 626 for t in tuples: 627 newmember = "" 628 oldmember = "".join(t) # Reconstruct the original formatting 629 dtype, name, lbr, content, rbr, rest, semi = t 630 # 631 # Pass through each field name, normalizing the form and formatting. 632 # 633 for s_id in rest.split(','): 634 s_id = s_id.strip() 635 newmember += f"{dtype} {s_id}; " 636 # 637 # Remove bitfield/array/pointer info, getting the bare name. 638 # 639 s_id = KernRe(r'[:\[].*').sub('', s_id) 640 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 641 # 642 # Pass through the members of this inner structure/union. 643 # 644 for arg in content.split(';'): 645 arg = arg.strip() 646 # 647 # Look for (type)(*name)(args) - pointer to function 648 # 649 r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') 650 if r.match(arg): 651 dtype, name, extra = r.group(1), r.group(2), r.group(3) 652 # Pointer-to-function 653 if not s_id: 654 # Anonymous struct/union 655 newmember += f"{dtype}{name}{extra}; " 656 else: 657 newmember += f"{dtype}{s_id}.{name}{extra}; " 658 # 659 # Otherwise a non-function member. 660 # 661 else: 662 # 663 # Remove bitmap and array portions and spaces around commas 664 # 665 arg = KernRe(r':\s*\d+\s*').sub('', arg) 666 arg = KernRe(r'\[.*\]').sub('', arg) 667 arg = KernRe(r'\s*,\s*').sub(',', arg) 668 # 669 # Look for a normal decl - "type name[,name...]" 670 # 671 r = KernRe(r'(.*)\s+([\S+,]+)') 672 if r.search(arg): 673 for name in r.group(2).split(','): 674 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) 675 if not s_id: 676 # Anonymous struct/union 677 newmember += f"{r.group(1)} {name}; " 678 else: 679 newmember += f"{r.group(1)} {s_id}.{name}; " 680 else: 681 newmember += f"{arg}; " 682 # 683 # At the end of the s_id loop, replace the original declaration with 684 # the munged version. 685 # 686 members = members.replace(oldmember, newmember) 687 # 688 # End of the tuple loop - search again and see if there are outer members 689 # that now turn up. 690 # 691 tuples = struct_members.findall(members) 692 return members 693 694 def format_struct_decl(self, declaration): 695 """ 696 Format the ``struct`` declaration into a standard form for inclusion 697 in the resulting docs. 698 """ 699 700 # 701 # Insert newlines, get rid of extra spaces. 702 # 703 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 704 declaration = KernRe(r'\}\s+;').sub('};', declaration) 705 # 706 # Format inline enums with each member on its own line. 707 # 708 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 709 while r.search(declaration): 710 declaration = r.sub(r'\1,\n\2', declaration) 711 # 712 # Now go through and supply the right number of tabs 713 # for each line. 714 # 715 def_args = declaration.split('\n') 716 level = 1 717 declaration = "" 718 for clause in def_args: 719 clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) 720 if clause: 721 if '}' in clause and level > 1: 722 level -= 1 723 if not clause.startswith('#'): 724 declaration += "\t" * level 725 declaration += "\t" + clause + "\n" 726 if "{" in clause and "}" not in clause: 727 level += 1 728 return declaration 729 730 731 def dump_struct(self, ln, proto): 732 """ 733 Store an entry for a ``struct`` or ``union`` 734 """ 735 # 736 # Do the basic parse to get the pieces of the declaration. 737 # 738 struct_parts = self.split_struct_proto(proto) 739 if not struct_parts: 740 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 741 return 742 decl_type, declaration_name, members = struct_parts 743 744 if self.entry.identifier != declaration_name: 745 self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " 746 f"Prototype was for {decl_type} {declaration_name} instead\n") 747 return 748 # 749 # Go through the list of members applying all of our transformations. 750 # 751 members = trim_private_members(members) 752 members = self.xforms.apply("struct", members) 753 754 nested = NestedMatch() 755 for search, sub in struct_nested_prefixes: 756 members = nested.sub(search, sub, members) 757 # 758 # Deal with embedded struct and union members, and drop enums entirely. 759 # 760 declaration = members 761 members = self.rewrite_struct_members(members) 762 members = re.sub(r'(\{[^\{\}]*\})', '', members) 763 # 764 # Output the result and we are done. 765 # 766 self.create_parameter_list(ln, decl_type, members, ';', 767 declaration_name) 768 self.check_sections(ln, declaration_name, decl_type) 769 self.output_declaration(decl_type, declaration_name, 770 definition=self.format_struct_decl(declaration), 771 purpose=self.entry.declaration_purpose) 772 773 def dump_enum(self, ln, proto): 774 """ 775 Store an ``enum`` inside self.entries array. 776 """ 777 # 778 # Strip preprocessor directives. Note that this depends on the 779 # trailing semicolon we added in process_proto_type(). 780 # 781 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 782 # 783 # Parse out the name and members of the enum. Typedef form first. 784 # 785 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 786 if r.search(proto): 787 declaration_name = r.group(2) 788 members = trim_private_members(r.group(1)) 789 # 790 # Failing that, look for a straight enum 791 # 792 else: 793 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 794 if r.match(proto): 795 declaration_name = r.group(1) 796 members = trim_private_members(r.group(2)) 797 # 798 # OK, this isn't going to work. 799 # 800 else: 801 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 802 return 803 # 804 # Make sure we found what we were expecting. 805 # 806 if self.entry.identifier != declaration_name: 807 if self.entry.identifier == "": 808 self.emit_msg(ln, 809 f"{proto}: wrong kernel-doc identifier on prototype") 810 else: 811 self.emit_msg(ln, 812 f"expecting prototype for enum {self.entry.identifier}. " 813 f"Prototype was for enum {declaration_name} instead") 814 return 815 816 if not declaration_name: 817 declaration_name = "(anonymous)" 818 # 819 # Parse out the name of each enum member, and verify that we 820 # have a description for it. 821 # 822 member_set = set() 823 members = KernRe(r'\([^;)]*\)').sub('', members) 824 for arg in members.split(','): 825 if not arg: 826 continue 827 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 828 self.entry.parameterlist.append(arg) 829 if arg not in self.entry.parameterdescs: 830 self.entry.parameterdescs[arg] = self.undescribed 831 self.emit_msg(ln, 832 f"Enum value '{arg}' not described in enum '{declaration_name}'") 833 member_set.add(arg) 834 # 835 # Ensure that every described member actually exists in the enum. 836 # 837 for k in self.entry.parameterdescs: 838 if k not in member_set: 839 self.emit_msg(ln, 840 f"Excess enum value '@{k}' description in '{declaration_name}'") 841 842 self.output_declaration('enum', declaration_name, 843 purpose=self.entry.declaration_purpose) 844 845 def dump_var(self, ln, proto): 846 """ 847 Store variables that are part of kAPI. 848 """ 849 VAR_ATTRIBS = [ 850 "extern", 851 "const", 852 ] 853 OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*" 854 855 # 856 # Store the full prototype before modifying it 857 # 858 full_proto = proto 859 declaration_name = None 860 861 # 862 # Handle macro definitions 863 # 864 macro_prefixes = [ 865 KernRe(r"DEFINE_[\w_]+\s*\(([\w_]+)\)"), 866 ] 867 868 for r in macro_prefixes: 869 match = r.search(proto) 870 if match: 871 declaration_name = match.group(1) 872 break 873 874 # 875 # Drop comments and macros to have a pure C prototype 876 # 877 if not declaration_name: 878 proto = self.xforms.apply("var", proto) 879 880 proto = proto.rstrip() 881 882 # 883 # Variable name is at the end of the declaration 884 # 885 886 default_val = None 887 888 r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") 889 if r.match(proto): 890 if not declaration_name: 891 declaration_name = r.group(1) 892 893 default_val = r.group(2) 894 else: 895 r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") 896 897 if r.match(proto): 898 default_val = r.group(1) 899 if not declaration_name: 900 self.emit_msg(ln,f"{proto}: can't parse variable") 901 return 902 903 if default_val: 904 default_val = default_val.lstrip("=").strip() 905 906 self.output_declaration("var", declaration_name, 907 full_proto=full_proto, 908 default_val=default_val, 909 purpose=self.entry.declaration_purpose) 910 911 def dump_declaration(self, ln, prototype): 912 """ 913 Store a data declaration inside self.entries array. 914 """ 915 916 if self.entry.decl_type == "enum": 917 self.dump_enum(ln, prototype) 918 elif self.entry.decl_type == "typedef": 919 self.dump_typedef(ln, prototype) 920 elif self.entry.decl_type in ["union", "struct"]: 921 self.dump_struct(ln, prototype) 922 elif self.entry.decl_type == "var": 923 self.dump_var(ln, prototype) 924 else: 925 # This would be a bug 926 self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') 927 928 def dump_function(self, ln, prototype): 929 """ 930 Store a function or function macro inside self.entries array. 931 """ 932 933 found = func_macro = False 934 return_type = '' 935 decl_type = 'function' 936 937 # 938 # If we have a macro, remove the "#define" at the front. 939 # 940 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 941 if new_proto != prototype: 942 prototype = new_proto 943 # 944 # Dispense with the simple "#define A B" case here; the key 945 # is the space after the name of the symbol being defined. 946 # NOTE that the seemingly misnamed "func_macro" indicates a 947 # macro *without* arguments. 948 # 949 r = KernRe(r'^(\w+)\s+') 950 if r.search(prototype): 951 return_type = '' 952 declaration_name = r.group(1) 953 func_macro = True 954 found = True 955 else: 956 # 957 # Apply the initial transformations. 958 # 959 prototype = self.xforms.apply("func", prototype) 960 961 # Yes, this truly is vile. We are looking for: 962 # 1. Return type (may be nothing if we're looking at a macro) 963 # 2. Function name 964 # 3. Function parameters. 965 # 966 # All the while we have to watch out for function pointer parameters 967 # (which IIRC is what the two sections are for), C types (these 968 # regexps don't even start to express all the possibilities), and 969 # so on. 970 # 971 # If you mess with these regexps, it's a good idea to check that 972 # the following functions' documentation still comes out right: 973 # - parport_register_device (function pointer parameters) 974 # - atomic_set (macro) 975 # - pci_match_device, __copy_to_user (long return type) 976 977 name = r'\w+' 978 type1 = r'(?:[\w\s]+)?' 979 type2 = r'(?:[\w\s]+\*+)+' 980 # 981 # Attempt to match first on (args) with no internal parentheses; this 982 # lets us easily filter out __acquires() and other post-args stuff. If 983 # that fails, just grab the rest of the line to the last closing 984 # parenthesis. 985 # 986 proto_args = r'\(([^\(]*|.*)\)' 987 # 988 # (Except for the simple macro case) attempt to split up the prototype 989 # in the various ways we understand. 990 # 991 if not found: 992 patterns = [ 993 rf'^()({name})\s*{proto_args}', 994 rf'^({type1})\s+({name})\s*{proto_args}', 995 rf'^({type2})\s*({name})\s*{proto_args}', 996 ] 997 998 for p in patterns: 999 r = KernRe(p) 1000 if r.match(prototype): 1001 return_type = r.group(1) 1002 declaration_name = r.group(2) 1003 args = r.group(3) 1004 self.create_parameter_list(ln, decl_type, args, ',', 1005 declaration_name) 1006 found = True 1007 break 1008 # 1009 # Parsing done; make sure that things are as we expect. 1010 # 1011 if not found: 1012 self.emit_msg(ln, 1013 f"cannot understand function prototype: '{prototype}'") 1014 return 1015 if self.entry.identifier != declaration_name: 1016 self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " 1017 f"Prototype was for {declaration_name}() instead") 1018 return 1019 self.check_sections(ln, declaration_name, "function") 1020 self.check_return_section(ln, declaration_name, return_type) 1021 # 1022 # Store the result. 1023 # 1024 self.output_declaration(decl_type, declaration_name, 1025 typedef=('typedef' in return_type), 1026 functiontype=return_type, 1027 purpose=self.entry.declaration_purpose, 1028 func_macro=func_macro) 1029 1030 1031 def dump_typedef(self, ln, proto): 1032 """ 1033 Store a ``typedef`` inside self.entries array. 1034 """ 1035 # 1036 # We start by looking for function typedefs. 1037 # 1038 typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1039 typedef_ident = r'\*?\s*(\w\S+)\s*' 1040 typedef_args = r'\s*\((.*)\);' 1041 1042 typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1043 typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) 1044 1045 # Parse function typedef prototypes 1046 for r in [typedef1, typedef2]: 1047 if not r.match(proto): 1048 continue 1049 1050 return_type = r.group(1).strip() 1051 declaration_name = r.group(2) 1052 args = r.group(3) 1053 1054 if self.entry.identifier != declaration_name: 1055 self.emit_msg(ln, 1056 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1057 return 1058 1059 self.create_parameter_list(ln, 'function', args, ',', declaration_name) 1060 1061 self.output_declaration('function', declaration_name, 1062 typedef=True, 1063 functiontype=return_type, 1064 purpose=self.entry.declaration_purpose) 1065 return 1066 # 1067 # Not a function, try to parse a simple typedef. 1068 # 1069 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1070 if r.match(proto): 1071 declaration_name = r.group(1) 1072 1073 if self.entry.identifier != declaration_name: 1074 self.emit_msg(ln, 1075 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1076 return 1077 1078 self.output_declaration('typedef', declaration_name, 1079 purpose=self.entry.declaration_purpose) 1080 return 1081 1082 self.emit_msg(ln, "error: Cannot parse typedef!") 1083 1084 @staticmethod 1085 def process_export(function_set, line): 1086 """ 1087 process ``EXPORT_SYMBOL*`` tags 1088 1089 This method doesn't use any variable from the class, so declare it 1090 with a staticmethod decorator. 1091 """ 1092 1093 # We support documenting some exported symbols with different 1094 # names. A horrible hack. 1095 suffixes = [ '_noprof' ] 1096 1097 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1098 # multiple export lines would violate Kernel coding style. 1099 1100 if export_symbol.search(line): 1101 symbol = export_symbol.group(2) 1102 elif export_symbol_ns.search(line): 1103 symbol = export_symbol_ns.group(2) 1104 else: 1105 return False 1106 # 1107 # Found an export, trim out any special suffixes 1108 # 1109 for suffix in suffixes: 1110 # Be backward compatible with Python < 3.9 1111 if symbol.endswith(suffix): 1112 symbol = symbol[:-len(suffix)] 1113 function_set.add(symbol) 1114 return True 1115 1116 def process_normal(self, ln, line): 1117 """ 1118 STATE_NORMAL: looking for the ``/**`` to begin everything. 1119 """ 1120 1121 if not doc_start.match(line): 1122 return 1123 1124 # start a new entry 1125 self.reset_state(ln) 1126 1127 # next line is always the function name 1128 self.state = state.NAME 1129 1130 def process_name(self, ln, line): 1131 """ 1132 STATE_NAME: Looking for the "name - description" line 1133 """ 1134 # 1135 # Check for a DOC: block and handle them specially. 1136 # 1137 if doc_block.search(line): 1138 1139 if not doc_block.group(1): 1140 self.entry.begin_section(ln, "Introduction") 1141 else: 1142 self.entry.begin_section(ln, doc_block.group(1)) 1143 1144 self.entry.identifier = self.entry.section 1145 self.state = state.DOCBLOCK 1146 # 1147 # Otherwise we're looking for a normal kerneldoc declaration line. 1148 # 1149 elif doc_decl.search(line): 1150 self.entry.identifier = doc_decl.group(1) 1151 1152 # Test for data declaration 1153 if doc_begin_data.search(line): 1154 self.entry.decl_type = doc_begin_data.group(1) 1155 self.entry.identifier = doc_begin_data.group(2) 1156 # 1157 # Look for a function description 1158 # 1159 elif doc_begin_func.search(line): 1160 self.entry.identifier = doc_begin_func.group(1) 1161 self.entry.decl_type = "function" 1162 # 1163 # We struck out. 1164 # 1165 else: 1166 self.emit_msg(ln, 1167 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}") 1168 self.state = state.NORMAL 1169 return 1170 # 1171 # OK, set up for a new kerneldoc entry. 1172 # 1173 self.state = state.BODY 1174 self.entry.identifier = self.entry.identifier.strip(" ") 1175 # if there's no @param blocks need to set up default section here 1176 self.entry.begin_section(ln + 1) 1177 # 1178 # Find the description portion, which *should* be there but 1179 # isn't always. 1180 # (We should be able to capture this from the previous parsing - someday) 1181 # 1182 r = KernRe("[-:](.*)") 1183 if r.search(line): 1184 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1185 self.state = state.DECLARATION 1186 else: 1187 self.entry.declaration_purpose = "" 1188 1189 if not self.entry.declaration_purpose and self.config.wshort_desc: 1190 self.emit_msg(ln, 1191 f"missing initial short description on line:\n{line}") 1192 1193 if not self.entry.identifier and self.entry.decl_type != "enum": 1194 self.emit_msg(ln, 1195 f"wrong kernel-doc identifier on line:\n{line}") 1196 self.state = state.NORMAL 1197 1198 if self.config.verbose: 1199 self.emit_msg(ln, 1200 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1201 warning=False) 1202 # 1203 # Failed to find an identifier. Emit a warning 1204 # 1205 else: 1206 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1207 1208 def is_new_section(self, ln, line): 1209 """ 1210 Helper function to determine if a new section is being started. 1211 """ 1212 if doc_sect.search(line): 1213 self.state = state.BODY 1214 # 1215 # Pick out the name of our new section, tweaking it if need be. 1216 # 1217 newsection = doc_sect.group(1) 1218 if newsection.lower() == 'description': 1219 newsection = 'Description' 1220 elif newsection.lower() == 'context': 1221 newsection = 'Context' 1222 self.state = state.SPECIAL_SECTION 1223 elif newsection.lower() in ["@return", "@returns", 1224 "return", "returns"]: 1225 newsection = "Return" 1226 self.state = state.SPECIAL_SECTION 1227 elif newsection[0] == '@': 1228 self.state = state.SPECIAL_SECTION 1229 # 1230 # Initialize the contents, and get the new section going. 1231 # 1232 newcontents = doc_sect.group(2) 1233 if not newcontents: 1234 newcontents = "" 1235 self.dump_section() 1236 self.entry.begin_section(ln, newsection) 1237 self.entry.leading_space = None 1238 1239 self.entry.add_text(newcontents.lstrip()) 1240 return True 1241 return False 1242 1243 def is_comment_end(self, ln, line): 1244 """ 1245 Helper function to detect (and effect) the end of a kerneldoc comment. 1246 """ 1247 if doc_end.search(line): 1248 self.dump_section() 1249 1250 # Look for doc_com + <text> + doc_end: 1251 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') 1252 if r.match(line): 1253 self.emit_msg(ln, f"suspicious ending line: {line}") 1254 1255 self.entry.prototype = "" 1256 self.entry.new_start_line = ln + 1 1257 1258 self.state = state.PROTO 1259 return True 1260 return False 1261 1262 1263 def process_decl(self, ln, line): 1264 """ 1265 STATE_DECLARATION: We've seen the beginning of a declaration. 1266 """ 1267 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1268 return 1269 # 1270 # Look for anything with the " * " line beginning. 1271 # 1272 if doc_content.search(line): 1273 cont = doc_content.group(1) 1274 # 1275 # A blank line means that we have moved out of the declaration 1276 # part of the comment (without any "special section" parameter 1277 # descriptions). 1278 # 1279 if cont == "": 1280 self.state = state.BODY 1281 # 1282 # Otherwise we have more of the declaration section to soak up. 1283 # 1284 else: 1285 self.entry.declaration_purpose = \ 1286 trim_whitespace(self.entry.declaration_purpose + ' ' + cont) 1287 else: 1288 # Unknown line, ignore 1289 self.emit_msg(ln, f"bad line: {line}") 1290 1291 1292 def process_special(self, ln, line): 1293 """ 1294 STATE_SPECIAL_SECTION: a section ending with a blank line. 1295 """ 1296 # 1297 # If we have hit a blank line (only the " * " marker), then this 1298 # section is done. 1299 # 1300 if KernRe(r"\s*\*\s*$").match(line): 1301 self.entry.begin_section(ln, dump = True) 1302 self.state = state.BODY 1303 return 1304 # 1305 # Not a blank line, look for the other ways to end the section. 1306 # 1307 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1308 return 1309 # 1310 # OK, we should have a continuation of the text for this section. 1311 # 1312 if doc_content.search(line): 1313 cont = doc_content.group(1) 1314 # 1315 # If the lines of text after the first in a special section have 1316 # leading white space, we need to trim it out or Sphinx will get 1317 # confused. For the second line (the None case), see what we 1318 # find there and remember it. 1319 # 1320 if self.entry.leading_space is None: 1321 r = KernRe(r'^(\s+)') 1322 if r.match(cont): 1323 self.entry.leading_space = len(r.group(1)) 1324 else: 1325 self.entry.leading_space = 0 1326 # 1327 # Otherwise, before trimming any leading chars, be *sure* 1328 # that they are white space. We should maybe warn if this 1329 # isn't the case. 1330 # 1331 for i in range(0, self.entry.leading_space): 1332 if cont[i] != " ": 1333 self.entry.leading_space = i 1334 break 1335 # 1336 # Add the trimmed result to the section and we're done. 1337 # 1338 self.entry.add_text(cont[self.entry.leading_space:]) 1339 else: 1340 # Unknown line, ignore 1341 self.emit_msg(ln, f"bad line: {line}") 1342 1343 def process_body(self, ln, line): 1344 """ 1345 STATE_BODY: the bulk of a kerneldoc comment. 1346 """ 1347 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1348 return 1349 1350 if doc_content.search(line): 1351 cont = doc_content.group(1) 1352 self.entry.add_text(cont) 1353 else: 1354 # Unknown line, ignore 1355 self.emit_msg(ln, f"bad line: {line}") 1356 1357 def process_inline_name(self, ln, line): 1358 """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" 1359 1360 if doc_inline_sect.search(line): 1361 self.entry.begin_section(ln, doc_inline_sect.group(1)) 1362 self.entry.add_text(doc_inline_sect.group(2).lstrip()) 1363 self.state = state.INLINE_TEXT 1364 elif doc_inline_end.search(line): 1365 self.dump_section() 1366 self.state = state.PROTO 1367 elif doc_content.search(line): 1368 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") 1369 self.state = state.PROTO 1370 # else ... ?? 1371 1372 def process_inline_text(self, ln, line): 1373 """STATE_INLINE_TEXT: docbook comments within a prototype.""" 1374 1375 if doc_inline_end.search(line): 1376 self.dump_section() 1377 self.state = state.PROTO 1378 elif doc_content.search(line): 1379 self.entry.add_text(doc_content.group(1)) 1380 # else ... ?? 1381 1382 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1383 """ 1384 Handle syscall definitions. 1385 """ 1386 1387 is_void = False 1388 1389 # Strip newlines/CR's 1390 proto = re.sub(r'[\r\n]+', ' ', proto) 1391 1392 # Check if it's a SYSCALL_DEFINE0 1393 if 'SYSCALL_DEFINE0' in proto: 1394 is_void = True 1395 1396 # Replace SYSCALL_DEFINE with correct return type & function name 1397 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1398 1399 r = KernRe(r'long\s+(sys_.*?),') 1400 if r.search(proto): 1401 proto = KernRe(',').sub('(', proto, count=1) 1402 elif is_void: 1403 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1404 1405 # Now delete all of the odd-numbered commas in the proto 1406 # so that argument types & names don't have a comma between them 1407 count = 0 1408 length = len(proto) 1409 1410 if is_void: 1411 length = 0 # skip the loop if is_void 1412 1413 for ix in range(length): 1414 if proto[ix] == ',': 1415 count += 1 1416 if count % 2 == 1: 1417 proto = proto[:ix] + ' ' + proto[ix + 1:] 1418 1419 return proto 1420 1421 def tracepoint_munge(self, ln, proto): 1422 """ 1423 Handle tracepoint definitions. 1424 """ 1425 1426 tracepointname = None 1427 tracepointargs = None 1428 1429 # Match tracepoint name based on different patterns 1430 r = KernRe(r'TRACE_EVENT\((.*?),') 1431 if r.search(proto): 1432 tracepointname = r.group(1) 1433 1434 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1435 if r.search(proto): 1436 tracepointname = r.group(1) 1437 1438 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1439 if r.search(proto): 1440 tracepointname = r.group(2) 1441 1442 if tracepointname: 1443 tracepointname = tracepointname.lstrip() 1444 1445 r = KernRe(r'TP_PROTO\((.*?)\)') 1446 if r.search(proto): 1447 tracepointargs = r.group(1) 1448 1449 if not tracepointname or not tracepointargs: 1450 self.emit_msg(ln, 1451 f"Unrecognized tracepoint format:\n{proto}\n") 1452 else: 1453 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1454 self.entry.identifier = f"trace_{self.entry.identifier}" 1455 1456 return proto 1457 1458 def process_proto_function(self, ln, line): 1459 """Ancillary routine to process a function prototype.""" 1460 1461 # strip C99-style comments to end of line 1462 line = KernRe(r"//.*$", re.S).sub('', line) 1463 # 1464 # Soak up the line's worth of prototype text, stopping at { or ; if present. 1465 # 1466 if KernRe(r'\s*#\s*define').match(line): 1467 self.entry.prototype = line 1468 elif not line.startswith('#'): # skip other preprocessor stuff 1469 r = KernRe(r'([^\{]*)') 1470 if r.match(line): 1471 self.entry.prototype += r.group(1) + " " 1472 # 1473 # If we now have the whole prototype, clean it up and declare victory. 1474 # 1475 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1476 # strip comments and surrounding spaces 1477 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() 1478 # 1479 # Handle self.entry.prototypes for function pointers like: 1480 # int (*pcs_config)(struct foo) 1481 # by turning it into 1482 # int pcs_config(struct foo) 1483 # 1484 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1485 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1486 # 1487 # Handle special declaration syntaxes 1488 # 1489 if 'SYSCALL_DEFINE' in self.entry.prototype: 1490 self.entry.prototype = self.syscall_munge(ln, 1491 self.entry.prototype) 1492 else: 1493 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1494 if r.search(self.entry.prototype): 1495 self.entry.prototype = self.tracepoint_munge(ln, 1496 self.entry.prototype) 1497 # 1498 # ... and we're done 1499 # 1500 self.dump_function(ln, self.entry.prototype) 1501 self.reset_state(ln) 1502 1503 def process_proto_type(self, ln, line): 1504 """ 1505 Ancillary routine to process a type. 1506 """ 1507 1508 # Strip C99-style comments and surrounding whitespace 1509 line = KernRe(r"//.*$", re.S).sub('', line).strip() 1510 if not line: 1511 return # nothing to see here 1512 1513 # To distinguish preprocessor directive from regular declaration later. 1514 if line.startswith('#'): 1515 line += ";" 1516 # 1517 # Split the declaration on any of { } or ;, and accumulate pieces 1518 # until we hit a semicolon while not inside {brackets} 1519 # 1520 r = KernRe(r'(.*?)([{};])') 1521 for chunk in r.split(line): 1522 if chunk: # Ignore empty matches 1523 self.entry.prototype += chunk 1524 # 1525 # This cries out for a match statement ... someday after we can 1526 # drop Python 3.9 ... 1527 # 1528 if chunk == '{': 1529 self.entry.brcount += 1 1530 elif chunk == '}': 1531 self.entry.brcount -= 1 1532 elif chunk == ';' and self.entry.brcount <= 0: 1533 self.dump_declaration(ln, self.entry.prototype) 1534 self.reset_state(ln) 1535 return 1536 # 1537 # We hit the end of the line while still in the declaration; put 1538 # in a space to represent the newline. 1539 # 1540 self.entry.prototype += ' ' 1541 1542 def process_proto(self, ln, line): 1543 """STATE_PROTO: reading a function/whatever prototype.""" 1544 1545 if doc_inline_oneline.search(line): 1546 self.entry.begin_section(ln, doc_inline_oneline.group(1)) 1547 self.entry.add_text(doc_inline_oneline.group(2)) 1548 self.dump_section() 1549 1550 elif doc_inline_start.search(line): 1551 self.state = state.INLINE_NAME 1552 1553 elif self.entry.decl_type == 'function': 1554 self.process_proto_function(ln, line) 1555 1556 else: 1557 self.process_proto_type(ln, line) 1558 1559 def process_docblock(self, ln, line): 1560 """STATE_DOCBLOCK: within a ``DOC:`` block.""" 1561 1562 if doc_end.search(line): 1563 self.dump_section() 1564 self.output_declaration("doc", self.entry.identifier) 1565 self.reset_state(ln) 1566 1567 elif doc_content.search(line): 1568 self.entry.add_text(doc_content.group(1)) 1569 1570 def parse_export(self): 1571 """ 1572 Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file. 1573 """ 1574 1575 export_table = set() 1576 1577 try: 1578 with open(self.fname, "r", encoding="utf8", 1579 errors="backslashreplace") as fp: 1580 1581 for line in fp: 1582 self.process_export(export_table, line) 1583 1584 except IOError: 1585 return None 1586 1587 return export_table 1588 1589 #: The state/action table telling us which function to invoke in each state. 1590 state_actions = { 1591 state.NORMAL: process_normal, 1592 state.NAME: process_name, 1593 state.BODY: process_body, 1594 state.DECLARATION: process_decl, 1595 state.SPECIAL_SECTION: process_special, 1596 state.INLINE_NAME: process_inline_name, 1597 state.INLINE_TEXT: process_inline_text, 1598 state.PROTO: process_proto, 1599 state.DOCBLOCK: process_docblock, 1600 } 1601 1602 def parse_kdoc(self): 1603 """ 1604 Open and process each line of a C source file. 1605 The parsing is controlled via a state machine, and the line is passed 1606 to a different process function depending on the state. The process 1607 function may update the state as needed. 1608 1609 Besides parsing kernel-doc tags, it also parses export symbols. 1610 """ 1611 1612 prev = "" 1613 prev_ln = None 1614 export_table = set() 1615 1616 try: 1617 with open(self.fname, "r", encoding="utf8", 1618 errors="backslashreplace") as fp: 1619 for ln, line in enumerate(fp): 1620 1621 line = line.expandtabs().strip("\n") 1622 1623 # Group continuation lines on prototypes 1624 if self.state == state.PROTO: 1625 if line.endswith("\\"): 1626 prev += line.rstrip("\\") 1627 if not prev_ln: 1628 prev_ln = ln 1629 continue 1630 1631 if prev: 1632 ln = prev_ln 1633 line = prev + line 1634 prev = "" 1635 prev_ln = None 1636 1637 self.config.log.debug("%d %s: %s", 1638 ln, state.name[self.state], 1639 line) 1640 1641 # This is an optimization over the original script. 1642 # There, when export_file was used for the same file, 1643 # it was read twice. Here, we use the already-existing 1644 # loop to parse exported symbols as well. 1645 # 1646 if (self.state != state.NORMAL) or \ 1647 not self.process_export(export_table, line): 1648 # Hand this line to the appropriate state handler 1649 self.state_actions[self.state](self, ln, line) 1650 1651 self.emit_unused_warnings() 1652 1653 except OSError: 1654 self.config.log.error(f"Error: Cannot open file {self.fname}") 1655 1656 return export_table, self.entries 1657