1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. 4# 5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 6 7""" 8Classes and functions related to reading a C language source or header FILE 9and extract embedded documentation comments from it. 10""" 11 12import sys 13import re 14from pprint import pformat 15 16from kdoc.kdoc_re import NestedMatch, KernRe 17from kdoc.kdoc_item import KdocItem 18 19# 20# Regular expressions used to parse kernel-doc markups at KernelDoc class. 21# 22# Let's declare them in lowercase outside any class to make it easier to 23# convert from the Perl script. 24# 25# As those are evaluated at the beginning, no need to cache them 26# 27 28# Allow whitespace at end of comment start. 29doc_start = KernRe(r'^/\*\*\s*$', cache=False) 30 31doc_end = KernRe(r'\*/', cache=False) 32doc_com = KernRe(r'\s*\*\s*', cache=False) 33doc_com_body = KernRe(r'\s*\* ?', cache=False) 34doc_decl = doc_com + KernRe(r'(\w+)', cache=False) 35 36# @params and a strictly limited set of supported section names 37# Specifically: 38# Match @word: 39# @...: 40# @{section-name}: 41# while trying to not match literal block starts like "example::" 42# 43known_section_names = 'description|context|returns?|notes?|examples?' 44known_sections = KernRe(known_section_names, flags = re.I) 45doc_sect = doc_com + \ 46 KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', 47 flags=re.I, cache=False) 48 49doc_content = doc_com_body + KernRe(r'(.*)', cache=False) 50doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) 51doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) 52doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) 53doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@\s*[\w][\w\.]*\s*):\s*(.*)\s*\*/\s*$', cache=False) 54 55export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) 56export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) 57 58type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) 59 60# 61# Tests for the beginning of a kerneldoc block in its various forms. 62# 63doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) 64doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef|var)\b\s*(\w*)", cache = False) 65doc_begin_func = KernRe(str(doc_com) + # initial " * ' 66 r"(?:\w+\s*\*\s*)?" + # type (not captured) 67 r'(?:define\s+)?' + # possible "define" (not captured) 68 r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" 69 r'(?:[-:].*)?$', # description (not captured) 70 cache = False) 71 72# 73# Regexes here are guaranteed to have the end delimiter matching 74# the start delimiter. Yet, right now, only one replace group 75# is allowed. 76# 77struct_nested_prefixes = [ 78 (re.compile(r"__cond_acquires\s*\("), ""), 79 (re.compile(r"__cond_releases\s*\("), ""), 80 (re.compile(r"__acquires\s*\("), ""), 81 (re.compile(r"__releases\s*\("), ""), 82 (re.compile(r"__must_hold\s*\("), ""), 83 (re.compile(r"__must_not_hold\s*\("), ""), 84 (re.compile(r"__must_hold_shared\s*\("), ""), 85 (re.compile(r"__cond_acquires_shared\s*\("), ""), 86 (re.compile(r"__acquires_shared\s*\("), ""), 87 (re.compile(r"__releases_shared\s*\("), ""), 88 (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), 89] 90 91# 92# Ancillary functions 93# 94 95multi_space = KernRe(r'\s\s+') 96def trim_whitespace(s): 97 """ 98 A little helper to get rid of excess white space. 99 """ 100 return multi_space.sub(' ', s.strip()) 101 102def trim_private_members(text): 103 """ 104 Remove ``struct``/``enum`` members that have been marked "private". 105 """ 106 # First look for a "public:" block that ends a private region, then 107 # handle the "private until the end" case. 108 # 109 text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) 110 text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) 111 # 112 # We needed the comments to do the above, but now we can take them out. 113 # 114 return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() 115 116class state: 117 """ 118 States used by the parser's state machine. 119 """ 120 121 # Parser states 122 NORMAL = 0 #: Normal code. 123 NAME = 1 #: Looking for function name. 124 DECLARATION = 2 #: We have seen a declaration which might not be done. 125 BODY = 3 #: The body of the comment. 126 SPECIAL_SECTION = 4 #: Doc section ending with a blank line. 127 PROTO = 5 #: Scanning prototype. 128 DOCBLOCK = 6 #: Documentation block. 129 INLINE_NAME = 7 #: Gathering doc outside main block. 130 INLINE_TEXT = 8 #: Reading the body of inline docs. 131 132 #: Names for each parser state. 133 name = [ 134 "NORMAL", 135 "NAME", 136 "DECLARATION", 137 "BODY", 138 "SPECIAL_SECTION", 139 "PROTO", 140 "DOCBLOCK", 141 "INLINE_NAME", 142 "INLINE_TEXT", 143 ] 144 145 146SECTION_DEFAULT = "Description" #: Default section. 147 148class KernelEntry: 149 """ 150 Encapsulates a Kernel documentation entry. 151 """ 152 153 def __init__(self, config, fname, ln): 154 self.config = config 155 self.fname = fname 156 157 self._contents = [] 158 self.prototype = "" 159 160 self.warnings = [] 161 162 self.parameterlist = [] 163 self.parameterdescs = {} 164 self.parametertypes = {} 165 self.parameterdesc_start_lines = {} 166 167 self.section_start_lines = {} 168 self.sections = {} 169 170 self.anon_struct_union = False 171 172 self.leading_space = None 173 174 self.fname = fname 175 176 # State flags 177 self.brcount = 0 178 self.declaration_start_line = ln + 1 179 180 # 181 # Management of section contents 182 # 183 def add_text(self, text): 184 """Add a new text to the entry contents list.""" 185 self._contents.append(text) 186 187 def contents(self): 188 """Returns a string with all content texts that were added.""" 189 return '\n'.join(self._contents) + '\n' 190 191 # TODO: rename to emit_message after removal of kernel-doc.pl 192 def emit_msg(self, ln, msg, *, warning=True): 193 """Emit a message.""" 194 195 log_msg = f"{self.fname}:{ln} {msg}" 196 197 if not warning: 198 self.config.log.info(log_msg) 199 return 200 201 # Delegate warning output to output logic, as this way it 202 # will report warnings/info only for symbols that are output 203 204 self.warnings.append(log_msg) 205 return 206 207 def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): 208 """ 209 Begin a new section. 210 """ 211 if dump: 212 self.dump_section(start_new = True) 213 self.section = title 214 self.new_start_line = line_no 215 216 def dump_section(self, start_new=True): 217 """ 218 Dumps section contents to arrays/hashes intended for that purpose. 219 """ 220 # 221 # If we have accumulated no contents in the default ("description") 222 # section, don't bother. 223 # 224 if self.section == SECTION_DEFAULT and not self._contents: 225 return 226 name = self.section 227 contents = self.contents() 228 229 if type_param.match(name): 230 name = type_param.group(1) 231 232 self.parameterdescs[name] = contents 233 self.parameterdesc_start_lines[name] = self.new_start_line 234 235 self.new_start_line = 0 236 237 else: 238 if name in self.sections and self.sections[name] != "": 239 # Only warn on user-specified duplicate section names 240 if name != SECTION_DEFAULT: 241 self.emit_msg(self.new_start_line, 242 f"duplicate section name '{name}'") 243 # Treat as a new paragraph - add a blank line 244 self.sections[name] += '\n' + contents 245 else: 246 self.sections[name] = contents 247 self.section_start_lines[name] = self.new_start_line 248 self.new_start_line = 0 249 250# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) 251 252 if start_new: 253 self.section = SECTION_DEFAULT 254 self._contents = [] 255 256python_warning = False 257 258class KernelDoc: 259 """ 260 Read a C language source or header FILE and extract embedded 261 documentation comments. 262 """ 263 264 #: Name of context section. 265 section_context = "Context" 266 267 #: Name of return section. 268 section_return = "Return" 269 270 #: String to write when a parameter is not described. 271 undescribed = "-- undescribed --" 272 273 def __init__(self, config, fname, xforms): 274 """Initialize internal variables""" 275 276 self.fname = fname 277 self.config = config 278 self.xforms = xforms 279 280 # Initial state for the state machines 281 self.state = state.NORMAL 282 283 # Store entry currently being processed 284 self.entry = None 285 286 # Place all potential outputs into an array 287 self.entries = [] 288 289 # 290 # We need Python 3.7 for its "dicts remember the insertion 291 # order" guarantee 292 # 293 global python_warning 294 if (not python_warning and 295 sys.version_info.major == 3 and sys.version_info.minor < 7): 296 297 self.emit_msg(0, 298 'Python 3.7 or later is required for correct results') 299 python_warning = True 300 301 def emit_msg(self, ln, msg, *, warning=True): 302 """Emit a message""" 303 304 if self.entry: 305 self.entry.emit_msg(ln, msg, warning=warning) 306 return 307 308 log_msg = f"{self.fname}:{ln} {msg}" 309 310 if warning: 311 self.config.log.warning(log_msg) 312 else: 313 self.config.log.info(log_msg) 314 315 def dump_section(self, start_new=True): 316 """ 317 Dump section contents to arrays/hashes intended for that purpose. 318 """ 319 320 if self.entry: 321 self.entry.dump_section(start_new) 322 323 # TODO: rename it to store_declaration after removal of kernel-doc.pl 324 def output_declaration(self, dtype, name, **args): 325 """ 326 Store the entry into an entry array. 327 328 The actual output and output filters will be handled elsewhere. 329 """ 330 331 item = KdocItem(name, self.fname, dtype, 332 self.entry.declaration_start_line, **args) 333 item.warnings = self.entry.warnings 334 335 # Drop empty sections 336 # TODO: improve empty sections logic to emit warnings 337 sections = self.entry.sections 338 for section in ["Description", "Return"]: 339 if section in sections and not sections[section].rstrip(): 340 del sections[section] 341 item.set_sections(sections, self.entry.section_start_lines) 342 item.set_params(self.entry.parameterlist, self.entry.parameterdescs, 343 self.entry.parametertypes, 344 self.entry.parameterdesc_start_lines) 345 self.entries.append(item) 346 347 self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) 348 349 def emit_unused_warnings(self): 350 """ 351 When the parser fails to produce a valid entry, it places some 352 warnings under `entry.warnings` that will be discarded when resetting 353 the state. 354 355 Ensure that those warnings are not lost. 356 357 .. note:: 358 359 Because we are calling `config.warning()` here, those 360 warnings are not filtered by the `-W` parameters: they will all 361 be produced even when `-Wreturn`, `-Wshort-desc`, and/or 362 `-Wcontents-before-sections` are used. 363 364 Allowing those warnings to be filtered is complex, because it 365 would require storing them in a buffer and then filtering them 366 during the output step of the code, depending on the 367 selected symbols. 368 """ 369 if self.entry and self.entry not in self.entries: 370 for log_msg in self.entry.warnings: 371 self.config.warning(log_msg) 372 373 def reset_state(self, ln): 374 """ 375 Ancillary routine to create a new entry. It initializes all 376 variables used by the state machine. 377 """ 378 379 self.emit_unused_warnings() 380 381 self.entry = KernelEntry(self.config, self.fname, ln) 382 383 # State flags 384 self.state = state.NORMAL 385 386 def push_parameter(self, ln, decl_type, param, dtype, 387 org_arg, declaration_name): 388 """ 389 Store parameters and their descriptions at self.entry. 390 """ 391 392 if self.entry.anon_struct_union and dtype == "" and param == "}": 393 return # Ignore the ending }; from anonymous struct/union 394 395 self.entry.anon_struct_union = False 396 397 param = KernRe(r'[\[\)].*').sub('', param, count=1) 398 399 # 400 # Look at various "anonymous type" cases. 401 # 402 if dtype == '': 403 if param.endswith("..."): 404 if len(param) > 3: # there is a name provided, use that 405 param = param[:-3] 406 if not self.entry.parameterdescs.get(param): 407 self.entry.parameterdescs[param] = "variable arguments" 408 409 elif (not param) or param == "void": 410 param = "void" 411 self.entry.parameterdescs[param] = "no arguments" 412 413 elif param in ["struct", "union"]: 414 # Handle unnamed (anonymous) union or struct 415 dtype = param 416 param = "{unnamed_" + param + "}" 417 self.entry.parameterdescs[param] = "anonymous\n" 418 self.entry.anon_struct_union = True 419 420 # Warn if parameter has no description 421 # (but ignore ones starting with # as these are not parameters 422 # but inline preprocessor statements) 423 if param not in self.entry.parameterdescs and not param.startswith("#"): 424 self.entry.parameterdescs[param] = self.undescribed 425 426 if "." not in param: 427 if decl_type == 'function': 428 dname = f"{decl_type} parameter" 429 else: 430 dname = f"{decl_type} member" 431 432 self.emit_msg(ln, 433 f"{dname} '{param}' not described in '{declaration_name}'") 434 435 # Strip spaces from param so that it is one continuous string on 436 # parameterlist. This fixes a problem where check_sections() 437 # cannot find a parameter like "addr[6 + 2]" because it actually 438 # appears as "addr[6", "+", "2]" on the parameter list. 439 # However, it's better to maintain the param string unchanged for 440 # output, so just weaken the string compare in check_sections() 441 # to ignore "[blah" in a parameter string. 442 443 self.entry.parameterlist.append(param) 444 org_arg = KernRe(r'\s\s+').sub(' ', org_arg) 445 self.entry.parametertypes[param] = org_arg 446 447 448 def create_parameter_list(self, ln, decl_type, args, 449 splitter, declaration_name): 450 """ 451 Creates a list of parameters, storing them at self.entry. 452 """ 453 454 # temporarily replace all commas inside function pointer definition 455 arg_expr = KernRe(r'(\([^\),]+),') 456 while arg_expr.search(args): 457 args = arg_expr.sub(r"\1#", args) 458 459 for arg in args.split(splitter): 460 # Ignore argument attributes 461 arg = KernRe(r'\sPOS0?\s').sub(' ', arg) 462 463 # Strip leading/trailing spaces 464 arg = arg.strip() 465 arg = KernRe(r'\s+').sub(' ', arg, count=1) 466 467 if arg.startswith('#'): 468 # Treat preprocessor directive as a typeless variable just to fill 469 # corresponding data structures "correctly". Catch it later in 470 # output_* subs. 471 472 # Treat preprocessor directive as a typeless variable 473 self.push_parameter(ln, decl_type, arg, "", 474 "", declaration_name) 475 # 476 # The pointer-to-function case. 477 # 478 elif KernRe(r'\(.+\)\s*\(').search(arg): 479 arg = arg.replace('#', ',') 480 r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" 481 r'([\w\[\].]*)' # Capture the name and possible [array] 482 r'\s*\)') # Make sure the trailing ")" is there 483 if r.match(arg): 484 param = r.group(1) 485 else: 486 self.emit_msg(ln, f"Invalid param: {arg}") 487 param = arg 488 dtype = arg.replace(param, '') 489 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 490 # 491 # The array-of-pointers case. Dig the parameter name out from the middle 492 # of the declaration. 493 # 494 elif KernRe(r'\(.+\)\s*\[').search(arg): 495 r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" 496 r'([\w.]*?)' # The actual pointer name 497 r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] 498 if r.match(arg): 499 param = r.group(1) 500 else: 501 self.emit_msg(ln, f"Invalid param: {arg}") 502 param = arg 503 dtype = arg.replace(param, '') 504 self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) 505 elif arg: 506 # 507 # Clean up extraneous spaces and split the string at commas; the first 508 # element of the resulting list will also include the type information. 509 # 510 arg = KernRe(r'\s*:\s*').sub(":", arg) 511 arg = KernRe(r'\s*\[').sub('[', arg) 512 args = KernRe(r'\s*,\s*').split(arg) 513 args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) 514 # 515 # args[0] has a string of "type a". If "a" includes an [array] 516 # declaration, we want to not be fooled by any white space inside 517 # the brackets, so detect and handle that case specially. 518 # 519 r = KernRe(r'^([^[\]]*\s+)(.*)$') 520 if r.match(args[0]): 521 args[0] = r.group(2) 522 dtype = r.group(1) 523 else: 524 # No space in args[0]; this seems wrong but preserves previous behavior 525 dtype = '' 526 527 bitfield_re = KernRe(r'(.*?):(\w+)') 528 for param in args: 529 # 530 # For pointers, shift the star(s) from the variable name to the 531 # type declaration. 532 # 533 r = KernRe(r'^(\*+)\s*(.*)') 534 if r.match(param): 535 self.push_parameter(ln, decl_type, r.group(2), 536 f"{dtype} {r.group(1)}", 537 arg, declaration_name) 538 # 539 # Perform a similar shift for bitfields. 540 # 541 elif bitfield_re.search(param): 542 if dtype != "": # Skip unnamed bit-fields 543 self.push_parameter(ln, decl_type, bitfield_re.group(1), 544 f"{dtype}:{bitfield_re.group(2)}", 545 arg, declaration_name) 546 else: 547 self.push_parameter(ln, decl_type, param, dtype, 548 arg, declaration_name) 549 550 def check_sections(self, ln, decl_name, decl_type): 551 """ 552 Check for errors inside sections, emitting warnings if not found 553 parameters are described. 554 """ 555 for section in self.entry.sections: 556 if section not in self.entry.parameterlist and \ 557 not known_sections.search(section): 558 if decl_type == 'function': 559 dname = f"{decl_type} parameter" 560 else: 561 dname = f"{decl_type} member" 562 self.emit_msg(ln, 563 f"Excess {dname} '{section}' description in '{decl_name}'") 564 565 def check_return_section(self, ln, declaration_name, return_type): 566 """ 567 If the function doesn't return void, warns about the lack of a 568 return description. 569 """ 570 571 if not self.config.wreturn: 572 return 573 574 # Ignore an empty return type (It's a macro) 575 # Ignore functions with a "void" return type (but not "void *") 576 if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): 577 return 578 579 if not self.entry.sections.get("Return", None): 580 self.emit_msg(ln, 581 f"No description found for return value of '{declaration_name}'") 582 583 def split_struct_proto(self, proto): 584 """ 585 Split apart a structure prototype; returns (struct|union, name, 586 members) or ``None``. 587 """ 588 589 type_pattern = r'(struct|union)' 590 qualifiers = [ 591 "__attribute__", 592 "__packed", 593 "__aligned", 594 "____cacheline_aligned_in_smp", 595 "____cacheline_aligned", 596 ] 597 definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" 598 599 r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) 600 if r.search(proto): 601 return (r.group(1), r.group(2), r.group(3)) 602 else: 603 r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') 604 if r.search(proto): 605 return (r.group(1), r.group(3), r.group(2)) 606 return None 607 608 def rewrite_struct_members(self, members): 609 """ 610 Process ``struct``/``union`` members from the most deeply nested 611 outward. 612 613 Rewrite the members of a ``struct`` or ``union`` for easier formatting 614 later on. Among other things, this function will turn a member like:: 615 616 struct { inner_members; } foo; 617 618 into:: 619 620 struct foo; inner_members; 621 """ 622 623 # 624 # The trick is in the ``^{`` below - it prevents a match of an outer 625 # ``struct``/``union`` until the inner one has been munged 626 # (removing the ``{`` in the process). 627 # 628 struct_members = KernRe(r'(struct|union)' # 0: declaration type 629 r'([^\{\};]+)' # 1: possible name 630 r'(\{)' 631 r'([^\{\}]*)' # 3: Contents of declaration 632 r'(\})' 633 r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration 634 tuples = struct_members.findall(members) 635 while tuples: 636 for t in tuples: 637 newmember = "" 638 oldmember = "".join(t) # Reconstruct the original formatting 639 dtype, name, lbr, content, rbr, rest, semi = t 640 # 641 # Pass through each field name, normalizing the form and formatting. 642 # 643 for s_id in rest.split(','): 644 s_id = s_id.strip() 645 newmember += f"{dtype} {s_id}; " 646 # 647 # Remove bitfield/array/pointer info, getting the bare name. 648 # 649 s_id = KernRe(r'[:\[].*').sub('', s_id) 650 s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) 651 # 652 # Pass through the members of this inner structure/union. 653 # 654 for arg in content.split(';'): 655 arg = arg.strip() 656 # 657 # Look for (type)(*name)(args) - pointer to function 658 # 659 r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') 660 if r.match(arg): 661 dtype, name, extra = r.group(1), r.group(2), r.group(3) 662 # Pointer-to-function 663 if not s_id: 664 # Anonymous struct/union 665 newmember += f"{dtype}{name}{extra}; " 666 else: 667 newmember += f"{dtype}{s_id}.{name}{extra}; " 668 # 669 # Otherwise a non-function member. 670 # 671 else: 672 # 673 # Remove bitmap and array portions and spaces around commas 674 # 675 arg = KernRe(r':\s*\d+\s*').sub('', arg) 676 arg = KernRe(r'\[.*\]').sub('', arg) 677 arg = KernRe(r'\s*,\s*').sub(',', arg) 678 # 679 # Look for a normal decl - "type name[,name...]" 680 # 681 r = KernRe(r'(.*)\s+([\S+,]+)') 682 if r.search(arg): 683 for name in r.group(2).split(','): 684 name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) 685 if not s_id: 686 # Anonymous struct/union 687 newmember += f"{r.group(1)} {name}; " 688 else: 689 newmember += f"{r.group(1)} {s_id}.{name}; " 690 else: 691 newmember += f"{arg}; " 692 # 693 # At the end of the s_id loop, replace the original declaration with 694 # the munged version. 695 # 696 members = members.replace(oldmember, newmember) 697 # 698 # End of the tuple loop - search again and see if there are outer members 699 # that now turn up. 700 # 701 tuples = struct_members.findall(members) 702 return members 703 704 def format_struct_decl(self, declaration): 705 """ 706 Format the ``struct`` declaration into a standard form for inclusion 707 in the resulting docs. 708 """ 709 710 # 711 # Insert newlines, get rid of extra spaces. 712 # 713 declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) 714 declaration = KernRe(r'\}\s+;').sub('};', declaration) 715 # 716 # Format inline enums with each member on its own line. 717 # 718 r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') 719 while r.search(declaration): 720 declaration = r.sub(r'\1,\n\2', declaration) 721 # 722 # Now go through and supply the right number of tabs 723 # for each line. 724 # 725 def_args = declaration.split('\n') 726 level = 1 727 declaration = "" 728 for clause in def_args: 729 clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) 730 if clause: 731 if '}' in clause and level > 1: 732 level -= 1 733 if not clause.startswith('#'): 734 declaration += "\t" * level 735 declaration += "\t" + clause + "\n" 736 if "{" in clause and "}" not in clause: 737 level += 1 738 return declaration 739 740 741 def dump_struct(self, ln, proto): 742 """ 743 Store an entry for a ``struct`` or ``union`` 744 """ 745 # 746 # Do the basic parse to get the pieces of the declaration. 747 # 748 struct_parts = self.split_struct_proto(proto) 749 if not struct_parts: 750 self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") 751 return 752 decl_type, declaration_name, members = struct_parts 753 754 if self.entry.identifier != declaration_name: 755 self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " 756 f"Prototype was for {decl_type} {declaration_name} instead\n") 757 return 758 # 759 # Go through the list of members applying all of our transformations. 760 # 761 members = trim_private_members(members) 762 members = self.xforms.apply("struct", members) 763 764 nested = NestedMatch() 765 for search, sub in struct_nested_prefixes: 766 members = nested.sub(search, sub, members) 767 # 768 # Deal with embedded struct and union members, and drop enums entirely. 769 # 770 declaration = members 771 members = self.rewrite_struct_members(members) 772 members = re.sub(r'(\{[^\{\}]*\})', '', members) 773 # 774 # Output the result and we are done. 775 # 776 self.create_parameter_list(ln, decl_type, members, ';', 777 declaration_name) 778 self.check_sections(ln, declaration_name, decl_type) 779 self.output_declaration(decl_type, declaration_name, 780 definition=self.format_struct_decl(declaration), 781 purpose=self.entry.declaration_purpose) 782 783 def dump_enum(self, ln, proto): 784 """ 785 Store an ``enum`` inside self.entries array. 786 """ 787 # 788 # Strip preprocessor directives. Note that this depends on the 789 # trailing semicolon we added in process_proto_type(). 790 # 791 proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) 792 # 793 # Parse out the name and members of the enum. Typedef form first. 794 # 795 r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') 796 if r.search(proto): 797 declaration_name = r.group(2) 798 members = trim_private_members(r.group(1)) 799 # 800 # Failing that, look for a straight enum 801 # 802 else: 803 r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') 804 if r.match(proto): 805 declaration_name = r.group(1) 806 members = trim_private_members(r.group(2)) 807 # 808 # OK, this isn't going to work. 809 # 810 else: 811 self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") 812 return 813 # 814 # Make sure we found what we were expecting. 815 # 816 if self.entry.identifier != declaration_name: 817 if self.entry.identifier == "": 818 self.emit_msg(ln, 819 f"{proto}: wrong kernel-doc identifier on prototype") 820 else: 821 self.emit_msg(ln, 822 f"expecting prototype for enum {self.entry.identifier}. " 823 f"Prototype was for enum {declaration_name} instead") 824 return 825 826 if not declaration_name: 827 declaration_name = "(anonymous)" 828 # 829 # Parse out the name of each enum member, and verify that we 830 # have a description for it. 831 # 832 member_set = set() 833 members = KernRe(r'\([^;)]*\)').sub('', members) 834 for arg in members.split(','): 835 if not arg: 836 continue 837 arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) 838 self.entry.parameterlist.append(arg) 839 if arg not in self.entry.parameterdescs: 840 self.entry.parameterdescs[arg] = self.undescribed 841 self.emit_msg(ln, 842 f"Enum value '{arg}' not described in enum '{declaration_name}'") 843 member_set.add(arg) 844 # 845 # Ensure that every described member actually exists in the enum. 846 # 847 for k in self.entry.parameterdescs: 848 if k not in member_set: 849 self.emit_msg(ln, 850 f"Excess enum value '@{k}' description in '{declaration_name}'") 851 852 self.output_declaration('enum', declaration_name, 853 purpose=self.entry.declaration_purpose) 854 855 def dump_var(self, ln, proto): 856 """ 857 Store variables that are part of kAPI. 858 """ 859 VAR_ATTRIBS = [ 860 "extern", 861 "const", 862 ] 863 OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*" 864 865 # 866 # Store the full prototype before modifying it 867 # 868 full_proto = proto 869 declaration_name = None 870 871 # 872 # Handle macro definitions 873 # 874 macro_prefixes = [ 875 KernRe(r"DEFINE_[\w_]+\s*\(([\w_]+)\)"), 876 ] 877 878 for r in macro_prefixes: 879 match = r.search(proto) 880 if match: 881 declaration_name = match.group(1) 882 break 883 884 # 885 # Drop comments and macros to have a pure C prototype 886 # 887 if not declaration_name: 888 proto = self.xforms.apply("var", proto) 889 890 proto = proto.rstrip() 891 892 # 893 # Variable name is at the end of the declaration 894 # 895 896 default_val = None 897 898 r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") 899 if r.match(proto): 900 if not declaration_name: 901 declaration_name = r.group(1) 902 903 default_val = r.group(2) 904 else: 905 r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") 906 907 if r.match(proto): 908 default_val = r.group(1) 909 if not declaration_name: 910 self.emit_msg(ln,f"{proto}: can't parse variable") 911 return 912 913 if default_val: 914 default_val = default_val.lstrip("=").strip() 915 916 self.output_declaration("var", declaration_name, 917 full_proto=full_proto, 918 default_val=default_val, 919 purpose=self.entry.declaration_purpose) 920 921 def dump_declaration(self, ln, prototype): 922 """ 923 Store a data declaration inside self.entries array. 924 """ 925 926 if self.entry.decl_type == "enum": 927 self.dump_enum(ln, prototype) 928 elif self.entry.decl_type == "typedef": 929 self.dump_typedef(ln, prototype) 930 elif self.entry.decl_type in ["union", "struct"]: 931 self.dump_struct(ln, prototype) 932 elif self.entry.decl_type == "var": 933 self.dump_var(ln, prototype) 934 else: 935 # This would be a bug 936 self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') 937 938 def dump_function(self, ln, prototype): 939 """ 940 Store a function or function macro inside self.entries array. 941 """ 942 943 found = func_macro = False 944 return_type = '' 945 decl_type = 'function' 946 947 # 948 # If we have a macro, remove the "#define" at the front. 949 # 950 new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) 951 if new_proto != prototype: 952 prototype = new_proto 953 # 954 # Dispense with the simple "#define A B" case here; the key 955 # is the space after the name of the symbol being defined. 956 # NOTE that the seemingly misnamed "func_macro" indicates a 957 # macro *without* arguments. 958 # 959 r = KernRe(r'^(\w+)\s+') 960 if r.search(prototype): 961 return_type = '' 962 declaration_name = r.group(1) 963 func_macro = True 964 found = True 965 else: 966 # 967 # Apply the initial transformations. 968 # 969 prototype = self.xforms.apply("func", prototype) 970 971 # Yes, this truly is vile. We are looking for: 972 # 1. Return type (may be nothing if we're looking at a macro) 973 # 2. Function name 974 # 3. Function parameters. 975 # 976 # All the while we have to watch out for function pointer parameters 977 # (which IIRC is what the two sections are for), C types (these 978 # regexps don't even start to express all the possibilities), and 979 # so on. 980 # 981 # If you mess with these regexps, it's a good idea to check that 982 # the following functions' documentation still comes out right: 983 # - parport_register_device (function pointer parameters) 984 # - atomic_set (macro) 985 # - pci_match_device, __copy_to_user (long return type) 986 987 name = r'\w+' 988 type1 = r'(?:[\w\s]+)?' 989 type2 = r'(?:[\w\s]+\*+)+' 990 # 991 # Attempt to match first on (args) with no internal parentheses; this 992 # lets us easily filter out __acquires() and other post-args stuff. If 993 # that fails, just grab the rest of the line to the last closing 994 # parenthesis. 995 # 996 proto_args = r'\(([^\(]*|.*)\)' 997 # 998 # (Except for the simple macro case) attempt to split up the prototype 999 # in the various ways we understand. 1000 # 1001 if not found: 1002 patterns = [ 1003 rf'^()({name})\s*{proto_args}', 1004 rf'^({type1})\s+({name})\s*{proto_args}', 1005 rf'^({type2})\s*({name})\s*{proto_args}', 1006 ] 1007 1008 for p in patterns: 1009 r = KernRe(p) 1010 if r.match(prototype): 1011 return_type = r.group(1) 1012 declaration_name = r.group(2) 1013 args = r.group(3) 1014 self.create_parameter_list(ln, decl_type, args, ',', 1015 declaration_name) 1016 found = True 1017 break 1018 # 1019 # Parsing done; make sure that things are as we expect. 1020 # 1021 if not found: 1022 self.emit_msg(ln, 1023 f"cannot understand function prototype: '{prototype}'") 1024 return 1025 if self.entry.identifier != declaration_name: 1026 self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " 1027 f"Prototype was for {declaration_name}() instead") 1028 return 1029 self.check_sections(ln, declaration_name, "function") 1030 self.check_return_section(ln, declaration_name, return_type) 1031 # 1032 # Store the result. 1033 # 1034 self.output_declaration(decl_type, declaration_name, 1035 typedef=('typedef' in return_type), 1036 functiontype=return_type, 1037 purpose=self.entry.declaration_purpose, 1038 func_macro=func_macro) 1039 1040 1041 def dump_typedef(self, ln, proto): 1042 """ 1043 Store a ``typedef`` inside self.entries array. 1044 """ 1045 # 1046 # We start by looking for function typedefs. 1047 # 1048 typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' 1049 typedef_ident = r'\*?\s*(\w\S+)\s*' 1050 typedef_args = r'\s*\((.*)\);' 1051 1052 typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) 1053 typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) 1054 1055 # Parse function typedef prototypes 1056 for r in [typedef1, typedef2]: 1057 if not r.match(proto): 1058 continue 1059 1060 return_type = r.group(1).strip() 1061 declaration_name = r.group(2) 1062 args = r.group(3) 1063 1064 if self.entry.identifier != declaration_name: 1065 self.emit_msg(ln, 1066 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1067 return 1068 1069 self.create_parameter_list(ln, 'function', args, ',', declaration_name) 1070 1071 self.output_declaration('function', declaration_name, 1072 typedef=True, 1073 functiontype=return_type, 1074 purpose=self.entry.declaration_purpose) 1075 return 1076 # 1077 # Not a function, try to parse a simple typedef. 1078 # 1079 r = KernRe(r'typedef.*\s+(\w+)\s*;') 1080 if r.match(proto): 1081 declaration_name = r.group(1) 1082 1083 if self.entry.identifier != declaration_name: 1084 self.emit_msg(ln, 1085 f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") 1086 return 1087 1088 self.output_declaration('typedef', declaration_name, 1089 purpose=self.entry.declaration_purpose) 1090 return 1091 1092 self.emit_msg(ln, "error: Cannot parse typedef!") 1093 1094 @staticmethod 1095 def process_export(function_set, line): 1096 """ 1097 process ``EXPORT_SYMBOL*`` tags 1098 1099 This method doesn't use any variable from the class, so declare it 1100 with a staticmethod decorator. 1101 """ 1102 1103 # We support documenting some exported symbols with different 1104 # names. A horrible hack. 1105 suffixes = [ '_noprof' ] 1106 1107 # Note: it accepts only one EXPORT_SYMBOL* per line, as having 1108 # multiple export lines would violate Kernel coding style. 1109 1110 if export_symbol.search(line): 1111 symbol = export_symbol.group(2) 1112 elif export_symbol_ns.search(line): 1113 symbol = export_symbol_ns.group(2) 1114 else: 1115 return False 1116 # 1117 # Found an export, trim out any special suffixes 1118 # 1119 for suffix in suffixes: 1120 # Be backward compatible with Python < 3.9 1121 if symbol.endswith(suffix): 1122 symbol = symbol[:-len(suffix)] 1123 function_set.add(symbol) 1124 return True 1125 1126 def process_normal(self, ln, line): 1127 """ 1128 STATE_NORMAL: looking for the ``/**`` to begin everything. 1129 """ 1130 1131 if not doc_start.match(line): 1132 return 1133 1134 # start a new entry 1135 self.reset_state(ln) 1136 1137 # next line is always the function name 1138 self.state = state.NAME 1139 1140 def process_name(self, ln, line): 1141 """ 1142 STATE_NAME: Looking for the "name - description" line 1143 """ 1144 # 1145 # Check for a DOC: block and handle them specially. 1146 # 1147 if doc_block.search(line): 1148 1149 if not doc_block.group(1): 1150 self.entry.begin_section(ln, "Introduction") 1151 else: 1152 self.entry.begin_section(ln, doc_block.group(1)) 1153 1154 self.entry.identifier = self.entry.section 1155 self.state = state.DOCBLOCK 1156 # 1157 # Otherwise we're looking for a normal kerneldoc declaration line. 1158 # 1159 elif doc_decl.search(line): 1160 self.entry.identifier = doc_decl.group(1) 1161 1162 # Test for data declaration 1163 if doc_begin_data.search(line): 1164 self.entry.decl_type = doc_begin_data.group(1) 1165 self.entry.identifier = doc_begin_data.group(2) 1166 # 1167 # Look for a function description 1168 # 1169 elif doc_begin_func.search(line): 1170 self.entry.identifier = doc_begin_func.group(1) 1171 self.entry.decl_type = "function" 1172 # 1173 # We struck out. 1174 # 1175 else: 1176 self.emit_msg(ln, 1177 f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}") 1178 self.state = state.NORMAL 1179 return 1180 # 1181 # OK, set up for a new kerneldoc entry. 1182 # 1183 self.state = state.BODY 1184 self.entry.identifier = self.entry.identifier.strip(" ") 1185 # if there's no @param blocks need to set up default section here 1186 self.entry.begin_section(ln + 1) 1187 # 1188 # Find the description portion, which *should* be there but 1189 # isn't always. 1190 # (We should be able to capture this from the previous parsing - someday) 1191 # 1192 r = KernRe("[-:](.*)") 1193 if r.search(line): 1194 self.entry.declaration_purpose = trim_whitespace(r.group(1)) 1195 self.state = state.DECLARATION 1196 else: 1197 self.entry.declaration_purpose = "" 1198 1199 if not self.entry.declaration_purpose and self.config.wshort_desc: 1200 self.emit_msg(ln, 1201 f"missing initial short description on line:\n{line}") 1202 1203 if not self.entry.identifier and self.entry.decl_type != "enum": 1204 self.emit_msg(ln, 1205 f"wrong kernel-doc identifier on line:\n{line}") 1206 self.state = state.NORMAL 1207 1208 if self.config.verbose: 1209 self.emit_msg(ln, 1210 f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", 1211 warning=False) 1212 # 1213 # Failed to find an identifier. Emit a warning 1214 # 1215 else: 1216 self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") 1217 1218 def is_new_section(self, ln, line): 1219 """ 1220 Helper function to determine if a new section is being started. 1221 """ 1222 if doc_sect.search(line): 1223 self.state = state.BODY 1224 # 1225 # Pick out the name of our new section, tweaking it if need be. 1226 # 1227 newsection = doc_sect.group(1) 1228 if newsection.lower() == 'description': 1229 newsection = 'Description' 1230 elif newsection.lower() == 'context': 1231 newsection = 'Context' 1232 self.state = state.SPECIAL_SECTION 1233 elif newsection.lower() in ["@return", "@returns", 1234 "return", "returns"]: 1235 newsection = "Return" 1236 self.state = state.SPECIAL_SECTION 1237 elif newsection[0] == '@': 1238 self.state = state.SPECIAL_SECTION 1239 # 1240 # Initialize the contents, and get the new section going. 1241 # 1242 newcontents = doc_sect.group(2) 1243 if not newcontents: 1244 newcontents = "" 1245 self.dump_section() 1246 self.entry.begin_section(ln, newsection) 1247 self.entry.leading_space = None 1248 1249 self.entry.add_text(newcontents.lstrip()) 1250 return True 1251 return False 1252 1253 def is_comment_end(self, ln, line): 1254 """ 1255 Helper function to detect (and effect) the end of a kerneldoc comment. 1256 """ 1257 if doc_end.search(line): 1258 self.dump_section() 1259 1260 # Look for doc_com + <text> + doc_end: 1261 r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') 1262 if r.match(line): 1263 self.emit_msg(ln, f"suspicious ending line: {line}") 1264 1265 self.entry.prototype = "" 1266 self.entry.new_start_line = ln + 1 1267 1268 self.state = state.PROTO 1269 return True 1270 return False 1271 1272 1273 def process_decl(self, ln, line): 1274 """ 1275 STATE_DECLARATION: We've seen the beginning of a declaration. 1276 """ 1277 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1278 return 1279 # 1280 # Look for anything with the " * " line beginning. 1281 # 1282 if doc_content.search(line): 1283 cont = doc_content.group(1) 1284 # 1285 # A blank line means that we have moved out of the declaration 1286 # part of the comment (without any "special section" parameter 1287 # descriptions). 1288 # 1289 if cont == "": 1290 self.state = state.BODY 1291 # 1292 # Otherwise we have more of the declaration section to soak up. 1293 # 1294 else: 1295 self.entry.declaration_purpose = \ 1296 trim_whitespace(self.entry.declaration_purpose + ' ' + cont) 1297 else: 1298 # Unknown line, ignore 1299 self.emit_msg(ln, f"bad line: {line}") 1300 1301 1302 def process_special(self, ln, line): 1303 """ 1304 STATE_SPECIAL_SECTION: a section ending with a blank line. 1305 """ 1306 # 1307 # If we have hit a blank line (only the " * " marker), then this 1308 # section is done. 1309 # 1310 if KernRe(r"\s*\*\s*$").match(line): 1311 self.entry.begin_section(ln, dump = True) 1312 self.state = state.BODY 1313 return 1314 # 1315 # Not a blank line, look for the other ways to end the section. 1316 # 1317 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1318 return 1319 # 1320 # OK, we should have a continuation of the text for this section. 1321 # 1322 if doc_content.search(line): 1323 cont = doc_content.group(1) 1324 # 1325 # If the lines of text after the first in a special section have 1326 # leading white space, we need to trim it out or Sphinx will get 1327 # confused. For the second line (the None case), see what we 1328 # find there and remember it. 1329 # 1330 if self.entry.leading_space is None: 1331 r = KernRe(r'^(\s+)') 1332 if r.match(cont): 1333 self.entry.leading_space = len(r.group(1)) 1334 else: 1335 self.entry.leading_space = 0 1336 # 1337 # Otherwise, before trimming any leading chars, be *sure* 1338 # that they are white space. We should maybe warn if this 1339 # isn't the case. 1340 # 1341 for i in range(0, self.entry.leading_space): 1342 if cont[i] != " ": 1343 self.entry.leading_space = i 1344 break 1345 # 1346 # Add the trimmed result to the section and we're done. 1347 # 1348 self.entry.add_text(cont[self.entry.leading_space:]) 1349 else: 1350 # Unknown line, ignore 1351 self.emit_msg(ln, f"bad line: {line}") 1352 1353 def process_body(self, ln, line): 1354 """ 1355 STATE_BODY: the bulk of a kerneldoc comment. 1356 """ 1357 if self.is_new_section(ln, line) or self.is_comment_end(ln, line): 1358 return 1359 1360 if doc_content.search(line): 1361 cont = doc_content.group(1) 1362 self.entry.add_text(cont) 1363 else: 1364 # Unknown line, ignore 1365 self.emit_msg(ln, f"bad line: {line}") 1366 1367 def process_inline_name(self, ln, line): 1368 """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" 1369 1370 if doc_inline_sect.search(line): 1371 self.entry.begin_section(ln, doc_inline_sect.group(1)) 1372 self.entry.add_text(doc_inline_sect.group(2).lstrip()) 1373 self.state = state.INLINE_TEXT 1374 elif doc_inline_end.search(line): 1375 self.dump_section() 1376 self.state = state.PROTO 1377 elif doc_content.search(line): 1378 self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") 1379 self.state = state.PROTO 1380 # else ... ?? 1381 1382 def process_inline_text(self, ln, line): 1383 """STATE_INLINE_TEXT: docbook comments within a prototype.""" 1384 1385 if doc_inline_end.search(line): 1386 self.dump_section() 1387 self.state = state.PROTO 1388 elif doc_content.search(line): 1389 self.entry.add_text(doc_content.group(1)) 1390 # else ... ?? 1391 1392 def syscall_munge(self, ln, proto): # pylint: disable=W0613 1393 """ 1394 Handle syscall definitions. 1395 """ 1396 1397 is_void = False 1398 1399 # Strip newlines/CR's 1400 proto = re.sub(r'[\r\n]+', ' ', proto) 1401 1402 # Check if it's a SYSCALL_DEFINE0 1403 if 'SYSCALL_DEFINE0' in proto: 1404 is_void = True 1405 1406 # Replace SYSCALL_DEFINE with correct return type & function name 1407 proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) 1408 1409 r = KernRe(r'long\s+(sys_.*?),') 1410 if r.search(proto): 1411 proto = KernRe(',').sub('(', proto, count=1) 1412 elif is_void: 1413 proto = KernRe(r'\)').sub('(void)', proto, count=1) 1414 1415 # Now delete all of the odd-numbered commas in the proto 1416 # so that argument types & names don't have a comma between them 1417 count = 0 1418 length = len(proto) 1419 1420 if is_void: 1421 length = 0 # skip the loop if is_void 1422 1423 for ix in range(length): 1424 if proto[ix] == ',': 1425 count += 1 1426 if count % 2 == 1: 1427 proto = proto[:ix] + ' ' + proto[ix + 1:] 1428 1429 return proto 1430 1431 def tracepoint_munge(self, ln, proto): 1432 """ 1433 Handle tracepoint definitions. 1434 """ 1435 1436 tracepointname = None 1437 tracepointargs = None 1438 1439 # Match tracepoint name based on different patterns 1440 r = KernRe(r'TRACE_EVENT\((.*?),') 1441 if r.search(proto): 1442 tracepointname = r.group(1) 1443 1444 r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') 1445 if r.search(proto): 1446 tracepointname = r.group(1) 1447 1448 r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') 1449 if r.search(proto): 1450 tracepointname = r.group(2) 1451 1452 if tracepointname: 1453 tracepointname = tracepointname.lstrip() 1454 1455 r = KernRe(r'TP_PROTO\((.*?)\)') 1456 if r.search(proto): 1457 tracepointargs = r.group(1) 1458 1459 if not tracepointname or not tracepointargs: 1460 self.emit_msg(ln, 1461 f"Unrecognized tracepoint format:\n{proto}\n") 1462 else: 1463 proto = f"static inline void trace_{tracepointname}({tracepointargs})" 1464 self.entry.identifier = f"trace_{self.entry.identifier}" 1465 1466 return proto 1467 1468 def process_proto_function(self, ln, line): 1469 """Ancillary routine to process a function prototype.""" 1470 1471 # strip C99-style comments to end of line 1472 line = KernRe(r"//.*$", re.S).sub('', line) 1473 # 1474 # Soak up the line's worth of prototype text, stopping at { or ; if present. 1475 # 1476 if KernRe(r'\s*#\s*define').match(line): 1477 self.entry.prototype = line 1478 elif not line.startswith('#'): # skip other preprocessor stuff 1479 r = KernRe(r'([^\{]*)') 1480 if r.match(line): 1481 self.entry.prototype += r.group(1) + " " 1482 # 1483 # If we now have the whole prototype, clean it up and declare victory. 1484 # 1485 if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): 1486 # strip comments and surrounding spaces 1487 self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() 1488 # 1489 # Handle self.entry.prototypes for function pointers like: 1490 # int (*pcs_config)(struct foo) 1491 # by turning it into 1492 # int pcs_config(struct foo) 1493 # 1494 r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') 1495 self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) 1496 # 1497 # Handle special declaration syntaxes 1498 # 1499 if 'SYSCALL_DEFINE' in self.entry.prototype: 1500 self.entry.prototype = self.syscall_munge(ln, 1501 self.entry.prototype) 1502 else: 1503 r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') 1504 if r.search(self.entry.prototype): 1505 self.entry.prototype = self.tracepoint_munge(ln, 1506 self.entry.prototype) 1507 # 1508 # ... and we're done 1509 # 1510 self.dump_function(ln, self.entry.prototype) 1511 self.reset_state(ln) 1512 1513 def process_proto_type(self, ln, line): 1514 """ 1515 Ancillary routine to process a type. 1516 """ 1517 1518 # Strip C99-style comments and surrounding whitespace 1519 line = KernRe(r"//.*$", re.S).sub('', line).strip() 1520 if not line: 1521 return # nothing to see here 1522 1523 # To distinguish preprocessor directive from regular declaration later. 1524 if line.startswith('#'): 1525 line += ";" 1526 # 1527 # Split the declaration on any of { } or ;, and accumulate pieces 1528 # until we hit a semicolon while not inside {brackets} 1529 # 1530 r = KernRe(r'(.*?)([{};])') 1531 for chunk in r.split(line): 1532 if chunk: # Ignore empty matches 1533 self.entry.prototype += chunk 1534 # 1535 # This cries out for a match statement ... someday after we can 1536 # drop Python 3.9 ... 1537 # 1538 if chunk == '{': 1539 self.entry.brcount += 1 1540 elif chunk == '}': 1541 self.entry.brcount -= 1 1542 elif chunk == ';' and self.entry.brcount <= 0: 1543 self.dump_declaration(ln, self.entry.prototype) 1544 self.reset_state(ln) 1545 return 1546 # 1547 # We hit the end of the line while still in the declaration; put 1548 # in a space to represent the newline. 1549 # 1550 self.entry.prototype += ' ' 1551 1552 def process_proto(self, ln, line): 1553 """STATE_PROTO: reading a function/whatever prototype.""" 1554 1555 if doc_inline_oneline.search(line): 1556 self.entry.begin_section(ln, doc_inline_oneline.group(1)) 1557 self.entry.add_text(doc_inline_oneline.group(2)) 1558 self.dump_section() 1559 1560 elif doc_inline_start.search(line): 1561 self.state = state.INLINE_NAME 1562 1563 elif self.entry.decl_type == 'function': 1564 self.process_proto_function(ln, line) 1565 1566 else: 1567 self.process_proto_type(ln, line) 1568 1569 def process_docblock(self, ln, line): 1570 """STATE_DOCBLOCK: within a ``DOC:`` block.""" 1571 1572 if doc_end.search(line): 1573 self.dump_section() 1574 self.output_declaration("doc", self.entry.identifier) 1575 self.reset_state(ln) 1576 1577 elif doc_content.search(line): 1578 self.entry.add_text(doc_content.group(1)) 1579 1580 def parse_export(self): 1581 """ 1582 Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file. 1583 """ 1584 1585 export_table = set() 1586 1587 try: 1588 with open(self.fname, "r", encoding="utf8", 1589 errors="backslashreplace") as fp: 1590 1591 for line in fp: 1592 self.process_export(export_table, line) 1593 1594 except IOError: 1595 return None 1596 1597 return export_table 1598 1599 #: The state/action table telling us which function to invoke in each state. 1600 state_actions = { 1601 state.NORMAL: process_normal, 1602 state.NAME: process_name, 1603 state.BODY: process_body, 1604 state.DECLARATION: process_decl, 1605 state.SPECIAL_SECTION: process_special, 1606 state.INLINE_NAME: process_inline_name, 1607 state.INLINE_TEXT: process_inline_text, 1608 state.PROTO: process_proto, 1609 state.DOCBLOCK: process_docblock, 1610 } 1611 1612 def parse_kdoc(self): 1613 """ 1614 Open and process each line of a C source file. 1615 The parsing is controlled via a state machine, and the line is passed 1616 to a different process function depending on the state. The process 1617 function may update the state as needed. 1618 1619 Besides parsing kernel-doc tags, it also parses export symbols. 1620 """ 1621 1622 prev = "" 1623 prev_ln = None 1624 export_table = set() 1625 1626 try: 1627 with open(self.fname, "r", encoding="utf8", 1628 errors="backslashreplace") as fp: 1629 for ln, line in enumerate(fp): 1630 1631 line = line.expandtabs().strip("\n") 1632 1633 # Group continuation lines on prototypes 1634 if self.state == state.PROTO: 1635 if line.endswith("\\"): 1636 prev += line.rstrip("\\") 1637 if not prev_ln: 1638 prev_ln = ln 1639 continue 1640 1641 if prev: 1642 ln = prev_ln 1643 line = prev + line 1644 prev = "" 1645 prev_ln = None 1646 1647 self.config.log.debug("%d %s: %s", 1648 ln, state.name[self.state], 1649 line) 1650 1651 # This is an optimization over the original script. 1652 # There, when export_file was used for the same file, 1653 # it was read twice. Here, we use the already-existing 1654 # loop to parse exported symbols as well. 1655 # 1656 if (self.state != state.NORMAL) or \ 1657 not self.process_export(export_table, line): 1658 # Hand this line to the appropriate state handler 1659 self.state_actions[self.state](self, ln, line) 1660 1661 self.emit_unused_warnings() 1662 1663 except OSError: 1664 self.config.log.error(f"Error: Cannot open file {self.fname}") 1665 1666 return export_table, self.entries 1667