xref: /linux/tools/lib/python/kdoc/kdoc_parser.py (revision 4ff59bdd93f0e80b5014977502d082c778f96304)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4#
5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702
6
7"""
8Classes and functions related to reading a C language source or header FILE
9and extract embedded documentation comments from it.
10"""
11
12import sys
13import re
14from pprint import pformat
15
16from kdoc.kdoc_re import NestedMatch, KernRe
17from kdoc.kdoc_item import KdocItem
18
19#
20# Regular expressions used to parse kernel-doc markups at KernelDoc class.
21#
22# Let's declare them in lowercase outside any class to make it easier to
23# convert from the Perl script.
24#
25# As those are evaluated at the beginning, no need to cache them
26#
27
28# Allow whitespace at end of comment start.
29doc_start = KernRe(r'^/\*\*\s*$', cache=False)
30
31doc_end = KernRe(r'\*/', cache=False)
32doc_com = KernRe(r'\s*\*\s*', cache=False)
33doc_com_body = KernRe(r'\s*\* ?', cache=False)
34doc_decl = doc_com + KernRe(r'(\w+)', cache=False)
35
36# @params and a strictly limited set of supported section names
37# Specifically:
38#   Match @word:
39#         @...:
40#         @{section-name}:
41# while trying to not match literal block starts like "example::"
42#
43known_section_names = 'description|context|returns?|notes?|examples?'
44known_sections = KernRe(known_section_names, flags = re.I)
45doc_sect = doc_com + \
46    KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$',
47           flags=re.I, cache=False)
48
49doc_content = doc_com_body + KernRe(r'(.*)', cache=False)
50doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False)
51doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False)
52doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False)
53doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@\s*[\w][\w\.]*\s*):\s*(.*)\s*\*/\s*$', cache=False)
54
55export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False)
56export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False)
57
58type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)
59
60#
61# Tests for the beginning of a kerneldoc block in its various forms.
62#
63doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False)
64doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef|var)\b\s*(\w*)", cache = False)
65doc_begin_func = KernRe(str(doc_com) +			# initial " * '
66                        r"(?:\w+\s*\*\s*)?" + 		# type (not captured)
67                        r'(?:define\s+)?' + 		# possible "define" (not captured)
68                        r'(\w+)\s*(?:\(\w*\))?\s*' +	# name and optional "(...)"
69                        r'(?:[-:].*)?$',		# description (not captured)
70                        cache = False)
71
72#
73# Regexes here are guaranteed to have the end delimiter matching
74# the start delimiter. Yet, right now, only one replace group
75# is allowed.
76#
77struct_nested_prefixes = [
78    (re.compile(r"__cond_acquires\s*\("), ""),
79    (re.compile(r"__cond_releases\s*\("), ""),
80    (re.compile(r"__acquires\s*\("), ""),
81    (re.compile(r"__releases\s*\("), ""),
82    (re.compile(r"__must_hold\s*\("), ""),
83    (re.compile(r"__must_not_hold\s*\("), ""),
84    (re.compile(r"__must_hold_shared\s*\("), ""),
85    (re.compile(r"__cond_acquires_shared\s*\("), ""),
86    (re.compile(r"__acquires_shared\s*\("), ""),
87    (re.compile(r"__releases_shared\s*\("), ""),
88    (re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
89]
90
91#
92# Ancillary functions
93#
94
95multi_space = KernRe(r'\s\s+')
96def trim_whitespace(s):
97    """
98    A little helper to get rid of excess white space.
99    """
100    return multi_space.sub(' ', s.strip())
101
102def trim_private_members(text):
103    """
104    Remove ``struct``/``enum`` members that have been marked "private".
105    """
106    # First look for a "public:" block that ends a private region, then
107    # handle the "private until the end" case.
108    #
109    text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text)
110    text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text)
111    #
112    # We needed the comments to do the above, but now we can take them out.
113    #
114    return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip()
115
116class state:
117    """
118    States used by the parser's state machine.
119    """
120
121    # Parser states
122    NORMAL        = 0        #: Normal code.
123    NAME          = 1        #: Looking for function name.
124    DECLARATION   = 2        #: We have seen a declaration which might not be done.
125    BODY          = 3        #: The body of the comment.
126    SPECIAL_SECTION = 4      #: Doc section ending with a blank line.
127    PROTO         = 5        #: Scanning prototype.
128    DOCBLOCK      = 6        #: Documentation block.
129    INLINE_NAME   = 7        #: Gathering doc outside main block.
130    INLINE_TEXT   = 8	     #: Reading the body of inline docs.
131
132    #: Names for each parser state.
133    name = [
134        "NORMAL",
135        "NAME",
136        "DECLARATION",
137        "BODY",
138        "SPECIAL_SECTION",
139        "PROTO",
140        "DOCBLOCK",
141        "INLINE_NAME",
142        "INLINE_TEXT",
143    ]
144
145
146SECTION_DEFAULT = "Description"  #: Default section.
147
148class KernelEntry:
149    """
150    Encapsulates a Kernel documentation entry.
151    """
152
153    def __init__(self, config, fname, ln):
154        self.config = config
155        self.fname = fname
156
157        self._contents = []
158        self.prototype = ""
159
160        self.warnings = []
161
162        self.parameterlist = []
163        self.parameterdescs = {}
164        self.parametertypes = {}
165        self.parameterdesc_start_lines = {}
166
167        self.section_start_lines = {}
168        self.sections = {}
169
170        self.anon_struct_union = False
171
172        self.leading_space = None
173
174        self.fname = fname
175
176        # State flags
177        self.brcount = 0
178        self.declaration_start_line = ln + 1
179
180    #
181    # Management of section contents
182    #
183    def add_text(self, text):
184        """Add a new text to the entry contents list."""
185        self._contents.append(text)
186
187    def contents(self):
188        """Returns a string with all content texts that were added."""
189        return '\n'.join(self._contents) + '\n'
190
191    # TODO: rename to emit_message after removal of kernel-doc.pl
192    def emit_msg(self, ln, msg, *, warning=True):
193        """Emit a message."""
194
195        log_msg = f"{self.fname}:{ln} {msg}"
196
197        if not warning:
198            self.config.log.info(log_msg)
199            return
200
201        # Delegate warning output to output logic, as this way it
202        # will report warnings/info only for symbols that are output
203
204        self.warnings.append(log_msg)
205        return
206
207    def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False):
208        """
209        Begin a new section.
210        """
211        if dump:
212            self.dump_section(start_new = True)
213        self.section = title
214        self.new_start_line = line_no
215
216    def dump_section(self, start_new=True):
217        """
218        Dumps section contents to arrays/hashes intended for that purpose.
219        """
220        #
221        # If we have accumulated no contents in the default ("description")
222        # section, don't bother.
223        #
224        if self.section == SECTION_DEFAULT and not self._contents:
225            return
226        name = self.section
227        contents = self.contents()
228
229        if type_param.match(name):
230            name = type_param.group(1)
231
232            self.parameterdescs[name] = contents
233            self.parameterdesc_start_lines[name] = self.new_start_line
234
235            self.new_start_line = 0
236
237        else:
238            if name in self.sections and self.sections[name] != "":
239                # Only warn on user-specified duplicate section names
240                if name != SECTION_DEFAULT:
241                    self.emit_msg(self.new_start_line,
242                                  f"duplicate section name '{name}'")
243                # Treat as a new paragraph - add a blank line
244                self.sections[name] += '\n' + contents
245            else:
246                self.sections[name] = contents
247                self.section_start_lines[name] = self.new_start_line
248                self.new_start_line = 0
249
250#        self.config.log.debug("Section: %s : %s", name, pformat(vars(self)))
251
252        if start_new:
253            self.section = SECTION_DEFAULT
254            self._contents = []
255
256python_warning = False
257
258class KernelDoc:
259    """
260    Read a C language source or header FILE and extract embedded
261    documentation comments.
262    """
263
264    #: Name of context section.
265    section_context = "Context"
266
267    #: Name of return section.
268    section_return = "Return"
269
270    #: String to write when a parameter is not described.
271    undescribed = "-- undescribed --"
272
273    def __init__(self, config, fname, xforms):
274        """Initialize internal variables"""
275
276        self.fname = fname
277        self.config = config
278        self.xforms = xforms
279
280        # Initial state for the state machines
281        self.state = state.NORMAL
282
283        # Store entry currently being processed
284        self.entry = None
285
286        # Place all potential outputs into an array
287        self.entries = []
288
289        #
290        # We need Python 3.7 for its "dicts remember the insertion
291        # order" guarantee
292        #
293        global python_warning
294        if (not python_warning and
295            sys.version_info.major == 3 and sys.version_info.minor < 7):
296
297            self.emit_msg(0,
298                          'Python 3.7 or later is required for correct results')
299            python_warning = True
300
301    def emit_msg(self, ln, msg, *, warning=True):
302        """Emit a message"""
303
304        if self.entry:
305            self.entry.emit_msg(ln, msg, warning=warning)
306            return
307
308        log_msg = f"{self.fname}:{ln} {msg}"
309
310        if warning:
311            self.config.log.warning(log_msg)
312        else:
313            self.config.log.info(log_msg)
314
315    def dump_section(self, start_new=True):
316        """
317        Dump section contents to arrays/hashes intended for that purpose.
318        """
319
320        if self.entry:
321            self.entry.dump_section(start_new)
322
323    # TODO: rename it to store_declaration after removal of kernel-doc.pl
324    def output_declaration(self, dtype, name, **args):
325        """
326        Store the entry into an entry array.
327
328        The actual output and output filters will be handled elsewhere.
329        """
330
331        item = KdocItem(name, self.fname, dtype,
332                        self.entry.declaration_start_line, **args)
333        item.warnings = self.entry.warnings
334
335        # Drop empty sections
336        # TODO: improve empty sections logic to emit warnings
337        sections = self.entry.sections
338        for section in ["Description", "Return"]:
339            if section in sections and not sections[section].rstrip():
340                del sections[section]
341        item.set_sections(sections, self.entry.section_start_lines)
342        item.set_params(self.entry.parameterlist, self.entry.parameterdescs,
343                        self.entry.parametertypes,
344                        self.entry.parameterdesc_start_lines)
345        self.entries.append(item)
346
347        self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args))
348
349    def emit_unused_warnings(self):
350        """
351        When the parser fails to produce a valid entry, it places some
352        warnings under `entry.warnings` that will be discarded when resetting
353        the state.
354
355        Ensure that those warnings are not lost.
356
357        .. note::
358
359              Because we are calling `config.warning()` here, those
360              warnings are not filtered by the `-W` parameters: they will all
361              be produced even when `-Wreturn`, `-Wshort-desc`, and/or
362              `-Wcontents-before-sections` are used.
363
364              Allowing those warnings to be filtered is complex, because it
365              would require storing them in a buffer and then filtering them
366              during the output step of the code, depending on the
367              selected symbols.
368        """
369        if self.entry and self.entry not in self.entries:
370            for log_msg in self.entry.warnings:
371                self.config.warning(log_msg)
372
373    def reset_state(self, ln):
374        """
375        Ancillary routine to create a new entry. It initializes all
376        variables used by the state machine.
377        """
378
379        self.emit_unused_warnings()
380
381        self.entry = KernelEntry(self.config, self.fname, ln)
382
383        # State flags
384        self.state = state.NORMAL
385
386    def push_parameter(self, ln, decl_type, param, dtype,
387                       org_arg, declaration_name):
388        """
389        Store parameters and their descriptions at self.entry.
390        """
391
392        if self.entry.anon_struct_union and dtype == "" and param == "}":
393            return  # Ignore the ending }; from anonymous struct/union
394
395        self.entry.anon_struct_union = False
396
397        param = KernRe(r'[\[\)].*').sub('', param, count=1)
398
399        #
400        # Look at various "anonymous type" cases.
401        #
402        if dtype == '':
403            if param.endswith("..."):
404                if len(param) > 3: # there is a name provided, use that
405                    param = param[:-3]
406                if not self.entry.parameterdescs.get(param):
407                    self.entry.parameterdescs[param] = "variable arguments"
408
409            elif (not param) or param == "void":
410                param = "void"
411                self.entry.parameterdescs[param] = "no arguments"
412
413            elif param in ["struct", "union"]:
414                # Handle unnamed (anonymous) union or struct
415                dtype = param
416                param = "{unnamed_" + param + "}"
417                self.entry.parameterdescs[param] = "anonymous\n"
418                self.entry.anon_struct_union = True
419
420        # Warn if parameter has no description
421        # (but ignore ones starting with # as these are not parameters
422        # but inline preprocessor statements)
423        if param not in self.entry.parameterdescs and not param.startswith("#"):
424            self.entry.parameterdescs[param] = self.undescribed
425
426            if "." not in param:
427                if decl_type == 'function':
428                    dname = f"{decl_type} parameter"
429                else:
430                    dname = f"{decl_type} member"
431
432                self.emit_msg(ln,
433                              f"{dname} '{param}' not described in '{declaration_name}'")
434
435        # Strip spaces from param so that it is one continuous string on
436        # parameterlist. This fixes a problem where check_sections()
437        # cannot find a parameter like "addr[6 + 2]" because it actually
438        # appears as "addr[6", "+", "2]" on the parameter list.
439        # However, it's better to maintain the param string unchanged for
440        # output, so just weaken the string compare in check_sections()
441        # to ignore "[blah" in a parameter string.
442
443        self.entry.parameterlist.append(param)
444        org_arg = KernRe(r'\s\s+').sub(' ', org_arg)
445        self.entry.parametertypes[param] = org_arg
446
447
448    def create_parameter_list(self, ln, decl_type, args,
449                              splitter, declaration_name):
450        """
451        Creates a list of parameters, storing them at self.entry.
452        """
453
454        # temporarily replace all commas inside function pointer definition
455        arg_expr = KernRe(r'(\([^\),]+),')
456        while arg_expr.search(args):
457            args = arg_expr.sub(r"\1#", args)
458
459        for arg in args.split(splitter):
460            # Ignore argument attributes
461            arg = KernRe(r'\sPOS0?\s').sub(' ', arg)
462
463            # Strip leading/trailing spaces
464            arg = arg.strip()
465            arg = KernRe(r'\s+').sub(' ', arg, count=1)
466
467            if arg.startswith('#'):
468                # Treat preprocessor directive as a typeless variable just to fill
469                # corresponding data structures "correctly". Catch it later in
470                # output_* subs.
471
472                # Treat preprocessor directive as a typeless variable
473                self.push_parameter(ln, decl_type, arg, "",
474                                    "", declaration_name)
475            #
476            # The pointer-to-function case.
477            #
478            elif KernRe(r'\(.+\)\s*\(').search(arg):
479                arg = arg.replace('#', ',')
480                r = KernRe(r'[^\(]+\(\*?\s*'  # Everything up to "(*"
481                           r'([\w\[\].]*)'    # Capture the name and possible [array]
482                           r'\s*\)')	      # Make sure the trailing ")" is there
483                if r.match(arg):
484                    param = r.group(1)
485                else:
486                    self.emit_msg(ln, f"Invalid param: {arg}")
487                    param = arg
488                dtype = arg.replace(param, '')
489                self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
490            #
491            # The array-of-pointers case.  Dig the parameter name out from the middle
492            # of the declaration.
493            #
494            elif KernRe(r'\(.+\)\s*\[').search(arg):
495                r = KernRe(r'[^\(]+\(\s*\*\s*'		# Up to "(" and maybe "*"
496                           r'([\w.]*?)'			# The actual pointer name
497                           r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion]
498                if r.match(arg):
499                    param = r.group(1)
500                else:
501                    self.emit_msg(ln, f"Invalid param: {arg}")
502                    param = arg
503                dtype = arg.replace(param, '')
504                self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
505            elif arg:
506                #
507                # Clean up extraneous spaces and split the string at commas; the first
508                # element of the resulting list will also include the type information.
509                #
510                arg = KernRe(r'\s*:\s*').sub(":", arg)
511                arg = KernRe(r'\s*\[').sub('[', arg)
512                args = KernRe(r'\s*,\s*').split(arg)
513                args[0] = re.sub(r'(\*+)\s*', r' \1', args[0])
514                #
515                # args[0] has a string of "type a".  If "a" includes an [array]
516                # declaration, we want to not be fooled by any white space inside
517                # the brackets, so detect and handle that case specially.
518                #
519                r = KernRe(r'^([^[\]]*\s+)(.*)$')
520                if r.match(args[0]):
521                    args[0] = r.group(2)
522                    dtype = r.group(1)
523                else:
524                    # No space in args[0]; this seems wrong but preserves previous behavior
525                    dtype = ''
526
527                bitfield_re = KernRe(r'(.*?):(\w+)')
528                for param in args:
529                    #
530                    # For pointers, shift the star(s) from the variable name to the
531                    # type declaration.
532                    #
533                    r = KernRe(r'^(\*+)\s*(.*)')
534                    if r.match(param):
535                        self.push_parameter(ln, decl_type, r.group(2),
536                                            f"{dtype} {r.group(1)}",
537                                            arg, declaration_name)
538                    #
539                    # Perform a similar shift for bitfields.
540                    #
541                    elif bitfield_re.search(param):
542                        if dtype != "":  # Skip unnamed bit-fields
543                            self.push_parameter(ln, decl_type, bitfield_re.group(1),
544                                                f"{dtype}:{bitfield_re.group(2)}",
545                                                arg, declaration_name)
546                    else:
547                        self.push_parameter(ln, decl_type, param, dtype,
548                                            arg, declaration_name)
549
550    def check_sections(self, ln, decl_name, decl_type):
551        """
552        Check for errors inside sections, emitting warnings if not found
553        parameters are described.
554        """
555        for section in self.entry.sections:
556            if section not in self.entry.parameterlist and \
557               not known_sections.search(section):
558                if decl_type == 'function':
559                    dname = f"{decl_type} parameter"
560                else:
561                    dname = f"{decl_type} member"
562                self.emit_msg(ln,
563                              f"Excess {dname} '{section}' description in '{decl_name}'")
564
565    def check_return_section(self, ln, declaration_name, return_type):
566        """
567        If the function doesn't return void, warns about the lack of a
568        return description.
569        """
570
571        if not self.config.wreturn:
572            return
573
574        # Ignore an empty return type (It's a macro)
575        # Ignore functions with a "void" return type (but not "void *")
576        if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type):
577            return
578
579        if not self.entry.sections.get("Return", None):
580            self.emit_msg(ln,
581                          f"No description found for return value of '{declaration_name}'")
582
583    def split_struct_proto(self, proto):
584        """
585        Split apart a structure prototype; returns (struct|union, name,
586        members) or ``None``.
587        """
588
589        type_pattern = r'(struct|union)'
590        qualifiers = [
591            "__attribute__",
592            "__packed",
593            "__aligned",
594            "____cacheline_aligned_in_smp",
595            "____cacheline_aligned",
596        ]
597        definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?"
598
599        r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body)
600        if r.search(proto):
601            return (r.group(1), r.group(2), r.group(3))
602        else:
603            r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;')
604            if r.search(proto):
605                return (r.group(1), r.group(3), r.group(2))
606        return None
607
608    def rewrite_struct_members(self, members):
609        """
610        Process ``struct``/``union`` members from the most deeply nested
611        outward.
612
613        Rewrite the members of a ``struct`` or ``union`` for easier formatting
614        later on. Among other things, this function will turn a member like::
615
616          struct { inner_members; } foo;
617
618        into::
619
620          struct foo; inner_members;
621        """
622
623        #
624        # The trick is in the ``^{`` below - it prevents a match of an outer
625        # ``struct``/``union`` until the inner one has been munged
626        # (removing the ``{`` in the process).
627        #
628        struct_members = KernRe(r'(struct|union)'   # 0: declaration type
629                                r'([^\{\};]+)' 	    # 1: possible name
630                                r'(\{)'
631                                r'([^\{\}]*)'       # 3: Contents of declaration
632                                r'(\})'
633                                r'([^\{\};]*)(;)')  # 5: Remaining stuff after declaration
634        tuples = struct_members.findall(members)
635        while tuples:
636            for t in tuples:
637                newmember = ""
638                oldmember = "".join(t) # Reconstruct the original formatting
639                dtype, name, lbr, content, rbr, rest, semi = t
640                #
641                # Pass through each field name, normalizing the form and formatting.
642                #
643                for s_id in rest.split(','):
644                    s_id = s_id.strip()
645                    newmember += f"{dtype} {s_id}; "
646                    #
647                    # Remove bitfield/array/pointer info, getting the bare name.
648                    #
649                    s_id = KernRe(r'[:\[].*').sub('', s_id)
650                    s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id)
651                    #
652                    # Pass through the members of this inner structure/union.
653                    #
654                    for arg in content.split(';'):
655                        arg = arg.strip()
656                        #
657                        # Look for (type)(*name)(args) - pointer to function
658                        #
659                        r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)')
660                        if r.match(arg):
661                            dtype, name, extra = r.group(1), r.group(2), r.group(3)
662                            # Pointer-to-function
663                            if not s_id:
664                                # Anonymous struct/union
665                                newmember += f"{dtype}{name}{extra}; "
666                            else:
667                                newmember += f"{dtype}{s_id}.{name}{extra}; "
668                        #
669                        # Otherwise a non-function member.
670                        #
671                        else:
672                            #
673                            # Remove bitmap and array portions and spaces around commas
674                            #
675                            arg = KernRe(r':\s*\d+\s*').sub('', arg)
676                            arg = KernRe(r'\[.*\]').sub('', arg)
677                            arg = KernRe(r'\s*,\s*').sub(',', arg)
678                            #
679                            # Look for a normal decl - "type name[,name...]"
680                            #
681                            r = KernRe(r'(.*)\s+([\S+,]+)')
682                            if r.search(arg):
683                                for name in r.group(2).split(','):
684                                    name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name)
685                                    if not s_id:
686                                        # Anonymous struct/union
687                                        newmember += f"{r.group(1)} {name}; "
688                                    else:
689                                        newmember += f"{r.group(1)} {s_id}.{name}; "
690                            else:
691                                newmember += f"{arg}; "
692                #
693                # At the end of the s_id loop, replace the original declaration with
694                # the munged version.
695                #
696                members = members.replace(oldmember, newmember)
697            #
698            # End of the tuple loop - search again and see if there are outer members
699            # that now turn up.
700            #
701            tuples = struct_members.findall(members)
702        return members
703
704    def format_struct_decl(self, declaration):
705        """
706        Format the ``struct`` declaration into a standard form for inclusion
707        in the resulting docs.
708        """
709
710        #
711        # Insert newlines, get rid of extra spaces.
712        #
713        declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration)
714        declaration = KernRe(r'\}\s+;').sub('};', declaration)
715        #
716        # Format inline enums with each member on its own line.
717        #
718        r = KernRe(r'(enum\s+\{[^\}]+),([^\n])')
719        while r.search(declaration):
720            declaration = r.sub(r'\1,\n\2', declaration)
721        #
722        # Now go through and supply the right number of tabs
723        # for each line.
724        #
725        def_args = declaration.split('\n')
726        level = 1
727        declaration = ""
728        for clause in def_args:
729            clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1)
730            if clause:
731                if '}' in clause and level > 1:
732                    level -= 1
733                if not clause.startswith('#'):
734                    declaration += "\t" * level
735                declaration += "\t" + clause + "\n"
736                if "{" in clause and "}" not in clause:
737                    level += 1
738        return declaration
739
740
741    def dump_struct(self, ln, proto):
742        """
743        Store an entry for a ``struct`` or ``union``
744        """
745        #
746        # Do the basic parse to get the pieces of the declaration.
747        #
748        struct_parts = self.split_struct_proto(proto)
749        if not struct_parts:
750            self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
751            return
752        decl_type, declaration_name, members = struct_parts
753
754        if self.entry.identifier != declaration_name:
755            self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. "
756                          f"Prototype was for {decl_type} {declaration_name} instead\n")
757            return
758        #
759        # Go through the list of members applying all of our transformations.
760        #
761        members = trim_private_members(members)
762        members = self.xforms.apply("struct", members)
763
764        nested = NestedMatch()
765        for search, sub in struct_nested_prefixes:
766            members = nested.sub(search, sub, members)
767        #
768        # Deal with embedded struct and union members, and drop enums entirely.
769        #
770        declaration = members
771        members = self.rewrite_struct_members(members)
772        members = re.sub(r'(\{[^\{\}]*\})', '', members)
773        #
774        # Output the result and we are done.
775        #
776        self.create_parameter_list(ln, decl_type, members, ';',
777                                   declaration_name)
778        self.check_sections(ln, declaration_name, decl_type)
779        self.output_declaration(decl_type, declaration_name,
780                                definition=self.format_struct_decl(declaration),
781                                purpose=self.entry.declaration_purpose)
782
783    def dump_enum(self, ln, proto):
784        """
785        Store an ``enum`` inside self.entries array.
786        """
787        #
788        # Strip preprocessor directives.  Note that this depends on the
789        # trailing semicolon we added in process_proto_type().
790        #
791        proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
792        #
793        # Parse out the name and members of the enum.  Typedef form first.
794        #
795        r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')
796        if r.search(proto):
797            declaration_name = r.group(2)
798            members = trim_private_members(r.group(1))
799        #
800        # Failing that, look for a straight enum
801        #
802        else:
803            r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')
804            if r.match(proto):
805                declaration_name = r.group(1)
806                members = trim_private_members(r.group(2))
807        #
808        # OK, this isn't going to work.
809        #
810            else:
811                self.emit_msg(ln, f"{proto}: error: Cannot parse enum!")
812                return
813        #
814        # Make sure we found what we were expecting.
815        #
816        if self.entry.identifier != declaration_name:
817            if self.entry.identifier == "":
818                self.emit_msg(ln,
819                              f"{proto}: wrong kernel-doc identifier on prototype")
820            else:
821                self.emit_msg(ln,
822                              f"expecting prototype for enum {self.entry.identifier}. "
823                              f"Prototype was for enum {declaration_name} instead")
824            return
825
826        if not declaration_name:
827            declaration_name = "(anonymous)"
828        #
829        # Parse out the name of each enum member, and verify that we
830        # have a description for it.
831        #
832        member_set = set()
833        members = KernRe(r'\([^;)]*\)').sub('', members)
834        for arg in members.split(','):
835            if not arg:
836                continue
837            arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg)
838            self.entry.parameterlist.append(arg)
839            if arg not in self.entry.parameterdescs:
840                self.entry.parameterdescs[arg] = self.undescribed
841                self.emit_msg(ln,
842                              f"Enum value '{arg}' not described in enum '{declaration_name}'")
843            member_set.add(arg)
844        #
845        # Ensure that every described member actually exists in the enum.
846        #
847        for k in self.entry.parameterdescs:
848            if k not in member_set:
849                self.emit_msg(ln,
850                              f"Excess enum value '@{k}' description in '{declaration_name}'")
851
852        self.output_declaration('enum', declaration_name,
853                                purpose=self.entry.declaration_purpose)
854
855    def dump_var(self, ln, proto):
856        """
857        Store variables that are part of kAPI.
858        """
859        VAR_ATTRIBS = [
860            "extern",
861            "const",
862        ]
863        OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*"
864
865        #
866        # Store the full prototype before modifying it
867        #
868        full_proto = proto
869        declaration_name = None
870
871        #
872        # Handle macro definitions
873        #
874        macro_prefixes = [
875            KernRe(r"DEFINE_[\w_]+\s*\(([\w_]+)\)"),
876        ]
877
878        for r in macro_prefixes:
879            match = r.search(proto)
880            if match:
881                declaration_name = match.group(1)
882                break
883
884        #
885        # Drop comments and macros to have a pure C prototype
886        #
887        if not declaration_name:
888            proto = self.xforms.apply("var", proto)
889
890        proto = proto.rstrip()
891
892        #
893        # Variable name is at the end of the declaration
894        #
895
896        default_val = None
897
898        r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
899        if r.match(proto):
900            if not declaration_name:
901                declaration_name = r.group(1)
902
903            default_val = r.group(2)
904        else:
905            r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
906
907            if r.match(proto):
908                default_val = r.group(1)
909        if not declaration_name:
910           self.emit_msg(ln,f"{proto}: can't parse variable")
911           return
912
913        if default_val:
914            default_val = default_val.lstrip("=").strip()
915
916        self.output_declaration("var", declaration_name,
917                                full_proto=full_proto,
918                                default_val=default_val,
919                                purpose=self.entry.declaration_purpose)
920
921    def dump_declaration(self, ln, prototype):
922        """
923        Store a data declaration inside self.entries array.
924        """
925
926        if self.entry.decl_type == "enum":
927            self.dump_enum(ln, prototype)
928        elif self.entry.decl_type == "typedef":
929            self.dump_typedef(ln, prototype)
930        elif self.entry.decl_type in ["union", "struct"]:
931            self.dump_struct(ln, prototype)
932        elif self.entry.decl_type == "var":
933            self.dump_var(ln, prototype)
934        else:
935            # This would be a bug
936            self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}')
937
938    def dump_function(self, ln, prototype):
939        """
940        Store a function or function macro inside self.entries array.
941        """
942
943        found = func_macro = False
944        return_type = ''
945        decl_type = 'function'
946
947        #
948        # If we have a macro, remove the "#define" at the front.
949        #
950        new_proto = KernRe(r"^#\s*define\s+").sub("", prototype)
951        if new_proto != prototype:
952            prototype = new_proto
953            #
954            # Dispense with the simple "#define A B" case here; the key
955            # is the space after the name of the symbol being defined.
956            # NOTE that the seemingly misnamed "func_macro" indicates a
957            # macro *without* arguments.
958            #
959            r = KernRe(r'^(\w+)\s+')
960            if r.search(prototype):
961                return_type = ''
962                declaration_name = r.group(1)
963                func_macro = True
964                found = True
965        else:
966            #
967            # Apply the initial transformations.
968            #
969            prototype = self.xforms.apply("func", prototype)
970
971        # Yes, this truly is vile.  We are looking for:
972        # 1. Return type (may be nothing if we're looking at a macro)
973        # 2. Function name
974        # 3. Function parameters.
975        #
976        # All the while we have to watch out for function pointer parameters
977        # (which IIRC is what the two sections are for), C types (these
978        # regexps don't even start to express all the possibilities), and
979        # so on.
980        #
981        # If you mess with these regexps, it's a good idea to check that
982        # the following functions' documentation still comes out right:
983        # - parport_register_device (function pointer parameters)
984        # - atomic_set (macro)
985        # - pci_match_device, __copy_to_user (long return type)
986
987        name = r'\w+'
988        type1 = r'(?:[\w\s]+)?'
989        type2 = r'(?:[\w\s]+\*+)+'
990        #
991        # Attempt to match first on (args) with no internal parentheses; this
992        # lets us easily filter out __acquires() and other post-args stuff.  If
993        # that fails, just grab the rest of the line to the last closing
994        # parenthesis.
995        #
996        proto_args = r'\(([^\(]*|.*)\)'
997        #
998        # (Except for the simple macro case) attempt to split up the prototype
999        # in the various ways we understand.
1000        #
1001        if not found:
1002            patterns = [
1003                rf'^()({name})\s*{proto_args}',
1004                rf'^({type1})\s+({name})\s*{proto_args}',
1005                rf'^({type2})\s*({name})\s*{proto_args}',
1006            ]
1007
1008            for p in patterns:
1009                r = KernRe(p)
1010                if r.match(prototype):
1011                    return_type = r.group(1)
1012                    declaration_name = r.group(2)
1013                    args = r.group(3)
1014                    self.create_parameter_list(ln, decl_type, args, ',',
1015                                               declaration_name)
1016                    found = True
1017                    break
1018        #
1019        # Parsing done; make sure that things are as we expect.
1020        #
1021        if not found:
1022            self.emit_msg(ln,
1023                          f"cannot understand function prototype: '{prototype}'")
1024            return
1025        if self.entry.identifier != declaration_name:
1026            self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). "
1027                          f"Prototype was for {declaration_name}() instead")
1028            return
1029        self.check_sections(ln, declaration_name, "function")
1030        self.check_return_section(ln, declaration_name, return_type)
1031        #
1032        # Store the result.
1033        #
1034        self.output_declaration(decl_type, declaration_name,
1035                                typedef=('typedef' in return_type),
1036                                functiontype=return_type,
1037                                purpose=self.entry.declaration_purpose,
1038                                func_macro=func_macro)
1039
1040
1041    def dump_typedef(self, ln, proto):
1042        """
1043        Store a ``typedef`` inside self.entries array.
1044        """
1045        #
1046        # We start by looking for function typedefs.
1047        #
1048        typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*'
1049        typedef_ident = r'\*?\s*(\w\S+)\s*'
1050        typedef_args = r'\s*\((.*)\);'
1051
1052        typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args)
1053        typedef2 = KernRe(typedef_type + typedef_ident + typedef_args)
1054
1055        # Parse function typedef prototypes
1056        for r in [typedef1, typedef2]:
1057            if not r.match(proto):
1058                continue
1059
1060            return_type = r.group(1).strip()
1061            declaration_name = r.group(2)
1062            args = r.group(3)
1063
1064            if self.entry.identifier != declaration_name:
1065                self.emit_msg(ln,
1066                              f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
1067                return
1068
1069            self.create_parameter_list(ln, 'function', args, ',', declaration_name)
1070
1071            self.output_declaration('function', declaration_name,
1072                                    typedef=True,
1073                                    functiontype=return_type,
1074                                    purpose=self.entry.declaration_purpose)
1075            return
1076        #
1077        # Not a function, try to parse a simple typedef.
1078        #
1079        r = KernRe(r'typedef.*\s+(\w+)\s*;')
1080        if r.match(proto):
1081            declaration_name = r.group(1)
1082
1083            if self.entry.identifier != declaration_name:
1084                self.emit_msg(ln,
1085                              f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
1086                return
1087
1088            self.output_declaration('typedef', declaration_name,
1089                                    purpose=self.entry.declaration_purpose)
1090            return
1091
1092        self.emit_msg(ln, "error: Cannot parse typedef!")
1093
1094    @staticmethod
1095    def process_export(function_set, line):
1096        """
1097        process ``EXPORT_SYMBOL*`` tags
1098
1099        This method doesn't use any variable from the class, so declare it
1100        with a staticmethod decorator.
1101        """
1102
1103        # We support documenting some exported symbols with different
1104        # names.  A horrible hack.
1105        suffixes = [ '_noprof' ]
1106
1107        # Note: it accepts only one EXPORT_SYMBOL* per line, as having
1108        # multiple export lines would violate Kernel coding style.
1109
1110        if export_symbol.search(line):
1111            symbol = export_symbol.group(2)
1112        elif export_symbol_ns.search(line):
1113            symbol = export_symbol_ns.group(2)
1114        else:
1115            return False
1116        #
1117        # Found an export, trim out any special suffixes
1118        #
1119        for suffix in suffixes:
1120            # Be backward compatible with Python < 3.9
1121            if symbol.endswith(suffix):
1122                symbol = symbol[:-len(suffix)]
1123        function_set.add(symbol)
1124        return True
1125
1126    def process_normal(self, ln, line):
1127        """
1128        STATE_NORMAL: looking for the ``/**`` to begin everything.
1129        """
1130
1131        if not doc_start.match(line):
1132            return
1133
1134        # start a new entry
1135        self.reset_state(ln)
1136
1137        # next line is always the function name
1138        self.state = state.NAME
1139
1140    def process_name(self, ln, line):
1141        """
1142        STATE_NAME: Looking for the "name - description" line
1143        """
1144        #
1145        # Check for a DOC: block and handle them specially.
1146        #
1147        if doc_block.search(line):
1148
1149            if not doc_block.group(1):
1150                self.entry.begin_section(ln, "Introduction")
1151            else:
1152                self.entry.begin_section(ln, doc_block.group(1))
1153
1154            self.entry.identifier = self.entry.section
1155            self.state = state.DOCBLOCK
1156        #
1157        # Otherwise we're looking for a normal kerneldoc declaration line.
1158        #
1159        elif doc_decl.search(line):
1160            self.entry.identifier = doc_decl.group(1)
1161
1162            # Test for data declaration
1163            if doc_begin_data.search(line):
1164                self.entry.decl_type = doc_begin_data.group(1)
1165                self.entry.identifier = doc_begin_data.group(2)
1166            #
1167            # Look for a function description
1168            #
1169            elif doc_begin_func.search(line):
1170                self.entry.identifier = doc_begin_func.group(1)
1171                self.entry.decl_type = "function"
1172            #
1173            # We struck out.
1174            #
1175            else:
1176                self.emit_msg(ln,
1177                              f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}")
1178                self.state = state.NORMAL
1179                return
1180            #
1181            # OK, set up for a new kerneldoc entry.
1182            #
1183            self.state = state.BODY
1184            self.entry.identifier = self.entry.identifier.strip(" ")
1185            # if there's no @param blocks need to set up default section here
1186            self.entry.begin_section(ln + 1)
1187            #
1188            # Find the description portion, which *should* be there but
1189            # isn't always.
1190            # (We should be able to capture this from the previous parsing - someday)
1191            #
1192            r = KernRe("[-:](.*)")
1193            if r.search(line):
1194                self.entry.declaration_purpose = trim_whitespace(r.group(1))
1195                self.state = state.DECLARATION
1196            else:
1197                self.entry.declaration_purpose = ""
1198
1199            if not self.entry.declaration_purpose and self.config.wshort_desc:
1200                self.emit_msg(ln,
1201                              f"missing initial short description on line:\n{line}")
1202
1203            if not self.entry.identifier and self.entry.decl_type != "enum":
1204                self.emit_msg(ln,
1205                              f"wrong kernel-doc identifier on line:\n{line}")
1206                self.state = state.NORMAL
1207
1208            if self.config.verbose:
1209                self.emit_msg(ln,
1210                              f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}",
1211                                  warning=False)
1212        #
1213        # Failed to find an identifier. Emit a warning
1214        #
1215        else:
1216            self.emit_msg(ln, f"Cannot find identifier on line:\n{line}")
1217
1218    def is_new_section(self, ln, line):
1219        """
1220        Helper function to determine if a new section is being started.
1221        """
1222        if doc_sect.search(line):
1223            self.state = state.BODY
1224            #
1225            # Pick out the name of our new section, tweaking it if need be.
1226            #
1227            newsection = doc_sect.group(1)
1228            if newsection.lower() == 'description':
1229                newsection = 'Description'
1230            elif newsection.lower() == 'context':
1231                newsection = 'Context'
1232                self.state = state.SPECIAL_SECTION
1233            elif newsection.lower() in ["@return", "@returns",
1234                                        "return", "returns"]:
1235                newsection = "Return"
1236                self.state = state.SPECIAL_SECTION
1237            elif newsection[0] == '@':
1238                self.state = state.SPECIAL_SECTION
1239            #
1240            # Initialize the contents, and get the new section going.
1241            #
1242            newcontents = doc_sect.group(2)
1243            if not newcontents:
1244                newcontents = ""
1245            self.dump_section()
1246            self.entry.begin_section(ln, newsection)
1247            self.entry.leading_space = None
1248
1249            self.entry.add_text(newcontents.lstrip())
1250            return True
1251        return False
1252
1253    def is_comment_end(self, ln, line):
1254        """
1255        Helper function to detect (and effect) the end of a kerneldoc comment.
1256        """
1257        if doc_end.search(line):
1258            self.dump_section()
1259
1260            # Look for doc_com + <text> + doc_end:
1261            r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/')
1262            if r.match(line):
1263                self.emit_msg(ln, f"suspicious ending line: {line}")
1264
1265            self.entry.prototype = ""
1266            self.entry.new_start_line = ln + 1
1267
1268            self.state = state.PROTO
1269            return True
1270        return False
1271
1272
1273    def process_decl(self, ln, line):
1274        """
1275        STATE_DECLARATION: We've seen the beginning of a declaration.
1276        """
1277        if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
1278            return
1279        #
1280        # Look for anything with the " * " line beginning.
1281        #
1282        if doc_content.search(line):
1283            cont = doc_content.group(1)
1284            #
1285            # A blank line means that we have moved out of the declaration
1286            # part of the comment (without any "special section" parameter
1287            # descriptions).
1288            #
1289            if cont == "":
1290                self.state = state.BODY
1291            #
1292            # Otherwise we have more of the declaration section to soak up.
1293            #
1294            else:
1295                self.entry.declaration_purpose = \
1296                    trim_whitespace(self.entry.declaration_purpose + ' ' + cont)
1297        else:
1298            # Unknown line, ignore
1299            self.emit_msg(ln, f"bad line: {line}")
1300
1301
1302    def process_special(self, ln, line):
1303        """
1304        STATE_SPECIAL_SECTION: a section ending with a blank line.
1305        """
1306        #
1307        # If we have hit a blank line (only the " * " marker), then this
1308        # section is done.
1309        #
1310        if KernRe(r"\s*\*\s*$").match(line):
1311            self.entry.begin_section(ln, dump = True)
1312            self.state = state.BODY
1313            return
1314        #
1315        # Not a blank line, look for the other ways to end the section.
1316        #
1317        if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
1318            return
1319        #
1320        # OK, we should have a continuation of the text for this section.
1321        #
1322        if doc_content.search(line):
1323            cont = doc_content.group(1)
1324            #
1325            # If the lines of text after the first in a special section have
1326            # leading white space, we need to trim it out or Sphinx will get
1327            # confused.  For the second line (the None case), see what we
1328            # find there and remember it.
1329            #
1330            if self.entry.leading_space is None:
1331                r = KernRe(r'^(\s+)')
1332                if r.match(cont):
1333                    self.entry.leading_space = len(r.group(1))
1334                else:
1335                    self.entry.leading_space = 0
1336            #
1337            # Otherwise, before trimming any leading chars, be *sure*
1338            # that they are white space.  We should maybe warn if this
1339            # isn't the case.
1340            #
1341            for i in range(0, self.entry.leading_space):
1342                if cont[i] != " ":
1343                    self.entry.leading_space = i
1344                    break
1345            #
1346            # Add the trimmed result to the section and we're done.
1347            #
1348            self.entry.add_text(cont[self.entry.leading_space:])
1349        else:
1350            # Unknown line, ignore
1351            self.emit_msg(ln, f"bad line: {line}")
1352
1353    def process_body(self, ln, line):
1354        """
1355        STATE_BODY: the bulk of a kerneldoc comment.
1356        """
1357        if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
1358            return
1359
1360        if doc_content.search(line):
1361            cont = doc_content.group(1)
1362            self.entry.add_text(cont)
1363        else:
1364            # Unknown line, ignore
1365            self.emit_msg(ln, f"bad line: {line}")
1366
1367    def process_inline_name(self, ln, line):
1368        """STATE_INLINE_NAME: beginning of docbook comments within a prototype."""
1369
1370        if doc_inline_sect.search(line):
1371            self.entry.begin_section(ln, doc_inline_sect.group(1))
1372            self.entry.add_text(doc_inline_sect.group(2).lstrip())
1373            self.state = state.INLINE_TEXT
1374        elif doc_inline_end.search(line):
1375            self.dump_section()
1376            self.state = state.PROTO
1377        elif doc_content.search(line):
1378            self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}")
1379            self.state = state.PROTO
1380        # else ... ??
1381
1382    def process_inline_text(self, ln, line):
1383        """STATE_INLINE_TEXT: docbook comments within a prototype."""
1384
1385        if doc_inline_end.search(line):
1386            self.dump_section()
1387            self.state = state.PROTO
1388        elif doc_content.search(line):
1389            self.entry.add_text(doc_content.group(1))
1390        # else ... ??
1391
1392    def syscall_munge(self, ln, proto):         # pylint: disable=W0613
1393        """
1394        Handle syscall definitions.
1395        """
1396
1397        is_void = False
1398
1399        # Strip newlines/CR's
1400        proto = re.sub(r'[\r\n]+', ' ', proto)
1401
1402        # Check if it's a SYSCALL_DEFINE0
1403        if 'SYSCALL_DEFINE0' in proto:
1404            is_void = True
1405
1406        # Replace SYSCALL_DEFINE with correct return type & function name
1407        proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto)
1408
1409        r = KernRe(r'long\s+(sys_.*?),')
1410        if r.search(proto):
1411            proto = KernRe(',').sub('(', proto, count=1)
1412        elif is_void:
1413            proto = KernRe(r'\)').sub('(void)', proto, count=1)
1414
1415        # Now delete all of the odd-numbered commas in the proto
1416        # so that argument types & names don't have a comma between them
1417        count = 0
1418        length = len(proto)
1419
1420        if is_void:
1421            length = 0  # skip the loop if is_void
1422
1423        for ix in range(length):
1424            if proto[ix] == ',':
1425                count += 1
1426                if count % 2 == 1:
1427                    proto = proto[:ix] + ' ' + proto[ix + 1:]
1428
1429        return proto
1430
1431    def tracepoint_munge(self, ln, proto):
1432        """
1433        Handle tracepoint definitions.
1434        """
1435
1436        tracepointname = None
1437        tracepointargs = None
1438
1439        # Match tracepoint name based on different patterns
1440        r = KernRe(r'TRACE_EVENT\((.*?),')
1441        if r.search(proto):
1442            tracepointname = r.group(1)
1443
1444        r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),')
1445        if r.search(proto):
1446            tracepointname = r.group(1)
1447
1448        r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),')
1449        if r.search(proto):
1450            tracepointname = r.group(2)
1451
1452        if tracepointname:
1453            tracepointname = tracepointname.lstrip()
1454
1455        r = KernRe(r'TP_PROTO\((.*?)\)')
1456        if r.search(proto):
1457            tracepointargs = r.group(1)
1458
1459        if not tracepointname or not tracepointargs:
1460            self.emit_msg(ln,
1461                          f"Unrecognized tracepoint format:\n{proto}\n")
1462        else:
1463            proto = f"static inline void trace_{tracepointname}({tracepointargs})"
1464            self.entry.identifier = f"trace_{self.entry.identifier}"
1465
1466        return proto
1467
1468    def process_proto_function(self, ln, line):
1469        """Ancillary routine to process a function prototype."""
1470
1471        # strip C99-style comments to end of line
1472        line = KernRe(r"//.*$", re.S).sub('', line)
1473        #
1474        # Soak up the line's worth of prototype text, stopping at { or ; if present.
1475        #
1476        if KernRe(r'\s*#\s*define').match(line):
1477            self.entry.prototype = line
1478        elif not line.startswith('#'):   # skip other preprocessor stuff
1479            r = KernRe(r'([^\{]*)')
1480            if r.match(line):
1481                self.entry.prototype += r.group(1) + " "
1482        #
1483        # If we now have the whole prototype, clean it up and declare victory.
1484        #
1485        if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line):
1486            # strip comments and surrounding spaces
1487            self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip()
1488            #
1489            # Handle self.entry.prototypes for function pointers like:
1490            #       int (*pcs_config)(struct foo)
1491            # by turning it into
1492            #	    int pcs_config(struct foo)
1493            #
1494            r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)')
1495            self.entry.prototype = r.sub(r'\1\2', self.entry.prototype)
1496            #
1497            # Handle special declaration syntaxes
1498            #
1499            if 'SYSCALL_DEFINE' in self.entry.prototype:
1500                self.entry.prototype = self.syscall_munge(ln,
1501                                                          self.entry.prototype)
1502            else:
1503                r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT')
1504                if r.search(self.entry.prototype):
1505                    self.entry.prototype = self.tracepoint_munge(ln,
1506                                                                 self.entry.prototype)
1507            #
1508            # ... and we're done
1509            #
1510            self.dump_function(ln, self.entry.prototype)
1511            self.reset_state(ln)
1512
1513    def process_proto_type(self, ln, line):
1514        """
1515        Ancillary routine to process a type.
1516        """
1517
1518        # Strip C99-style comments and surrounding whitespace
1519        line = KernRe(r"//.*$", re.S).sub('', line).strip()
1520        if not line:
1521            return # nothing to see here
1522
1523        # To distinguish preprocessor directive from regular declaration later.
1524        if line.startswith('#'):
1525            line += ";"
1526        #
1527        # Split the declaration on any of { } or ;, and accumulate pieces
1528        # until we hit a semicolon while not inside {brackets}
1529        #
1530        r = KernRe(r'(.*?)([{};])')
1531        for chunk in r.split(line):
1532            if chunk:  # Ignore empty matches
1533                self.entry.prototype += chunk
1534                #
1535                # This cries out for a match statement ... someday after we can
1536                # drop Python 3.9 ...
1537                #
1538                if chunk == '{':
1539                    self.entry.brcount += 1
1540                elif chunk == '}':
1541                    self.entry.brcount -= 1
1542                elif chunk == ';' and self.entry.brcount <= 0:
1543                    self.dump_declaration(ln, self.entry.prototype)
1544                    self.reset_state(ln)
1545                    return
1546        #
1547        # We hit the end of the line while still in the declaration; put
1548        # in a space to represent the newline.
1549        #
1550        self.entry.prototype += ' '
1551
1552    def process_proto(self, ln, line):
1553        """STATE_PROTO: reading a function/whatever prototype."""
1554
1555        if doc_inline_oneline.search(line):
1556            self.entry.begin_section(ln, doc_inline_oneline.group(1))
1557            self.entry.add_text(doc_inline_oneline.group(2))
1558            self.dump_section()
1559
1560        elif doc_inline_start.search(line):
1561            self.state = state.INLINE_NAME
1562
1563        elif self.entry.decl_type == 'function':
1564            self.process_proto_function(ln, line)
1565
1566        else:
1567            self.process_proto_type(ln, line)
1568
1569    def process_docblock(self, ln, line):
1570        """STATE_DOCBLOCK: within a ``DOC:`` block."""
1571
1572        if doc_end.search(line):
1573            self.dump_section()
1574            self.output_declaration("doc", self.entry.identifier)
1575            self.reset_state(ln)
1576
1577        elif doc_content.search(line):
1578            self.entry.add_text(doc_content.group(1))
1579
1580    def parse_export(self):
1581        """
1582        Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file.
1583        """
1584
1585        export_table = set()
1586
1587        try:
1588            with open(self.fname, "r", encoding="utf8",
1589                      errors="backslashreplace") as fp:
1590
1591                for line in fp:
1592                    self.process_export(export_table, line)
1593
1594        except IOError:
1595            return None
1596
1597        return export_table
1598
1599    #: The state/action table telling us which function to invoke in each state.
1600    state_actions = {
1601        state.NORMAL:			process_normal,
1602        state.NAME:			process_name,
1603        state.BODY:			process_body,
1604        state.DECLARATION:		process_decl,
1605        state.SPECIAL_SECTION:		process_special,
1606        state.INLINE_NAME:		process_inline_name,
1607        state.INLINE_TEXT:		process_inline_text,
1608        state.PROTO:			process_proto,
1609        state.DOCBLOCK:			process_docblock,
1610        }
1611
1612    def parse_kdoc(self):
1613        """
1614        Open and process each line of a C source file.
1615        The parsing is controlled via a state machine, and the line is passed
1616        to a different process function depending on the state. The process
1617        function may update the state as needed.
1618
1619        Besides parsing kernel-doc tags, it also parses export symbols.
1620        """
1621
1622        prev = ""
1623        prev_ln = None
1624        export_table = set()
1625
1626        try:
1627            with open(self.fname, "r", encoding="utf8",
1628                      errors="backslashreplace") as fp:
1629                for ln, line in enumerate(fp):
1630
1631                    line = line.expandtabs().strip("\n")
1632
1633                    # Group continuation lines on prototypes
1634                    if self.state == state.PROTO:
1635                        if line.endswith("\\"):
1636                            prev += line.rstrip("\\")
1637                            if not prev_ln:
1638                                prev_ln = ln
1639                            continue
1640
1641                        if prev:
1642                            ln = prev_ln
1643                            line = prev + line
1644                            prev = ""
1645                            prev_ln = None
1646
1647                    self.config.log.debug("%d %s: %s",
1648                                          ln, state.name[self.state],
1649                                          line)
1650
1651                    # This is an optimization over the original script.
1652                    # There, when export_file was used for the same file,
1653                    # it was read twice. Here, we use the already-existing
1654                    # loop to parse exported symbols as well.
1655                    #
1656                    if (self.state != state.NORMAL) or \
1657                       not self.process_export(export_table, line):
1658                        # Hand this line to the appropriate state handler
1659                        self.state_actions[self.state](self, ln, line)
1660
1661            self.emit_unused_warnings()
1662
1663        except OSError:
1664            self.config.log.error(f"Error: Cannot open file {self.fname}")
1665
1666        return export_table, self.entries
1667