xref: /linux/tools/lib/python/kdoc/kdoc_parser.py (revision cd77a9aa20ef53a03e5bb2630a5e7b16b910f198)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4#
5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702
6
7"""
8Classes and functions related to reading a C language source or header FILE
9and extract embedded documentation comments from it.
10"""
11
12import sys
13import re
14from pprint import pformat
15
16from kdoc.c_lex import CTokenizer
17from kdoc.kdoc_re import NestedMatch, KernRe
18from kdoc.kdoc_item import KdocItem
19
20#
21# Regular expressions used to parse kernel-doc markups at KernelDoc class.
22#
23# Let's declare them in lowercase outside any class to make it easier to
24# convert from the Perl script.
25#
26# As those are evaluated at the beginning, no need to cache them
27#
28
29# Allow whitespace at end of comment start.
30doc_start = KernRe(r'^/\*\*\s*$', cache=False)
31
32doc_end = KernRe(r'\*/', cache=False)
33doc_com = KernRe(r'\s*\*\s*', cache=False)
34doc_com_body = KernRe(r'\s*\* ?', cache=False)
35doc_decl = doc_com + KernRe(r'(\w+)', cache=False)
36
37# @params and a strictly limited set of supported section names
38# Specifically:
39#   Match @word:
40#         @...:
41#         @{section-name}:
42# while trying to not match literal block starts like "example::"
43#
44known_section_names = 'description|context|returns?|notes?|examples?'
45known_sections = KernRe(known_section_names, flags = re.I)
46doc_sect = doc_com + \
47    KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$',
48           flags=re.I, cache=False)
49
50doc_content = doc_com_body + KernRe(r'(.*)', cache=False)
51doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False)
52doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False)
53doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False)
54doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@\s*[\w][\w\.]*\s*):\s*(.*)\s*\*/\s*$', cache=False)
55
56export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False)
57export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False)
58
59type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)
60
61#
62# Tests for the beginning of a kerneldoc block in its various forms.
63#
64doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False)
65doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef|var)\b\s*(\w*)", cache = False)
66doc_begin_func = KernRe(str(doc_com) +			# initial " * '
67                        r"(?:\w+\s*\*\s*)?" + 		# type (not captured)
68                        r'(?:define\s+)?' + 		# possible "define" (not captured)
69                        r'(\w+)\s*(?:\(\w*\))?\s*' +	# name and optional "(...)"
70                        r'(?:[-:].*)?$',		# description (not captured)
71                        cache = False)
72
73#
74# Ancillary functions
75#
76
77multi_space = KernRe(r'\s\s+')
78def trim_whitespace(s):
79    """
80    A little helper to get rid of excess white space.
81    """
82    return multi_space.sub(' ', s.strip())
83
84def trim_private_members(text):
85    """
86    Remove ``struct``/``enum`` members that have been marked "private".
87    """
88
89    tokens = CTokenizer(text)
90    return str(tokens)
91
92class state:
93    """
94    States used by the parser's state machine.
95    """
96
97    # Parser states
98    NORMAL        = 0        #: Normal code.
99    NAME          = 1        #: Looking for function name.
100    DECLARATION   = 2        #: We have seen a declaration which might not be done.
101    BODY          = 3        #: The body of the comment.
102    SPECIAL_SECTION = 4      #: Doc section ending with a blank line.
103    PROTO         = 5        #: Scanning prototype.
104    DOCBLOCK      = 6        #: Documentation block.
105    INLINE_NAME   = 7        #: Gathering doc outside main block.
106    INLINE_TEXT   = 8	     #: Reading the body of inline docs.
107
108    #: Names for each parser state.
109    name = [
110        "NORMAL",
111        "NAME",
112        "DECLARATION",
113        "BODY",
114        "SPECIAL_SECTION",
115        "PROTO",
116        "DOCBLOCK",
117        "INLINE_NAME",
118        "INLINE_TEXT",
119    ]
120
121
122SECTION_DEFAULT = "Description"  #: Default section.
123
124class KernelEntry:
125    """
126    Encapsulates a Kernel documentation entry.
127    """
128
129    def __init__(self, config, fname, ln):
130        self.config = config
131        self.fname = fname
132
133        self._contents = []
134        self.prototype = ""
135
136        self.warnings = []
137
138        self.parameterlist = []
139        self.parameterdescs = {}
140        self.parametertypes = {}
141        self.parameterdesc_start_lines = {}
142
143        self.section_start_lines = {}
144        self.sections = {}
145
146        self.anon_struct_union = False
147
148        self.leading_space = None
149
150        self.fname = fname
151
152        # State flags
153        self.brcount = 0
154        self.declaration_start_line = ln + 1
155
156    #
157    # Management of section contents
158    #
159    def add_text(self, text):
160        """Add a new text to the entry contents list."""
161        self._contents.append(text)
162
163    def contents(self):
164        """Returns a string with all content texts that were added."""
165        return '\n'.join(self._contents) + '\n'
166
167    # TODO: rename to emit_message after removal of kernel-doc.pl
168    def emit_msg(self, ln, msg, *, warning=True):
169        """Emit a message."""
170
171        log_msg = f"{self.fname}:{ln} {msg}"
172
173        if not warning:
174            self.config.log.info(log_msg)
175            return
176
177        # Delegate warning output to output logic, as this way it
178        # will report warnings/info only for symbols that are output
179
180        self.warnings.append(log_msg)
181        return
182
183    def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False):
184        """
185        Begin a new section.
186        """
187        if dump:
188            self.dump_section(start_new = True)
189        self.section = title
190        self.new_start_line = line_no
191
192    def dump_section(self, start_new=True):
193        """
194        Dumps section contents to arrays/hashes intended for that purpose.
195        """
196        #
197        # If we have accumulated no contents in the default ("description")
198        # section, don't bother.
199        #
200        if self.section == SECTION_DEFAULT and not self._contents:
201            return
202        name = self.section
203        contents = self.contents()
204
205        if type_param.match(name):
206            name = type_param.group(1)
207
208            self.parameterdescs[name] = contents
209            self.parameterdesc_start_lines[name] = self.new_start_line
210
211            self.new_start_line = 0
212
213        else:
214            if name in self.sections and self.sections[name] != "":
215                # Only warn on user-specified duplicate section names
216                if name != SECTION_DEFAULT:
217                    self.emit_msg(self.new_start_line,
218                                  f"duplicate section name '{name}'")
219                # Treat as a new paragraph - add a blank line
220                self.sections[name] += '\n' + contents
221            else:
222                self.sections[name] = contents
223                self.section_start_lines[name] = self.new_start_line
224                self.new_start_line = 0
225
226#        self.config.log.debug("Section: %s : %s", name, pformat(vars(self)))
227
228        if start_new:
229            self.section = SECTION_DEFAULT
230            self._contents = []
231
232python_warning = False
233
234class KernelDoc:
235    """
236    Read a C language source or header FILE and extract embedded
237    documentation comments.
238    """
239
240    #: Name of context section.
241    section_context = "Context"
242
243    #: Name of return section.
244    section_return = "Return"
245
246    #: String to write when a parameter is not described.
247    undescribed = "-- undescribed --"
248
249    def __init__(self, config, fname, xforms):
250        """Initialize internal variables"""
251
252        self.fname = fname
253        self.config = config
254        self.xforms = xforms
255
256        # Initial state for the state machines
257        self.state = state.NORMAL
258
259        # Store entry currently being processed
260        self.entry = None
261
262        # Place all potential outputs into an array
263        self.entries = []
264
265        #
266        # We need Python 3.7 for its "dicts remember the insertion
267        # order" guarantee
268        #
269        global python_warning
270        if (not python_warning and
271            sys.version_info.major == 3 and sys.version_info.minor < 7):
272
273            self.emit_msg(0,
274                          'Python 3.7 or later is required for correct results')
275            python_warning = True
276
277    def emit_msg(self, ln, msg, *, warning=True):
278        """Emit a message"""
279
280        if self.entry:
281            self.entry.emit_msg(ln, msg, warning=warning)
282            return
283
284        log_msg = f"{self.fname}:{ln} {msg}"
285
286        if warning:
287            self.config.log.warning(log_msg)
288        else:
289            self.config.log.info(log_msg)
290
291    def dump_section(self, start_new=True):
292        """
293        Dump section contents to arrays/hashes intended for that purpose.
294        """
295
296        if self.entry:
297            self.entry.dump_section(start_new)
298
299    # TODO: rename it to store_declaration after removal of kernel-doc.pl
300    def output_declaration(self, dtype, name, **args):
301        """
302        Store the entry into an entry array.
303
304        The actual output and output filters will be handled elsewhere.
305        """
306
307        item = KdocItem(name, self.fname, dtype,
308                        self.entry.declaration_start_line, **args)
309        item.warnings = self.entry.warnings
310
311        # Drop empty sections
312        # TODO: improve empty sections logic to emit warnings
313        sections = self.entry.sections
314        for section in ["Description", "Return"]:
315            if section in sections and not sections[section].rstrip():
316                del sections[section]
317        item.set_sections(sections, self.entry.section_start_lines)
318        item.set_params(self.entry.parameterlist, self.entry.parameterdescs,
319                        self.entry.parametertypes,
320                        self.entry.parameterdesc_start_lines)
321        self.entries.append(item)
322
323        self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args))
324
325    def emit_unused_warnings(self):
326        """
327        When the parser fails to produce a valid entry, it places some
328        warnings under `entry.warnings` that will be discarded when resetting
329        the state.
330
331        Ensure that those warnings are not lost.
332
333        .. note::
334
335              Because we are calling `config.warning()` here, those
336              warnings are not filtered by the `-W` parameters: they will all
337              be produced even when `-Wreturn`, `-Wshort-desc`, and/or
338              `-Wcontents-before-sections` are used.
339
340              Allowing those warnings to be filtered is complex, because it
341              would require storing them in a buffer and then filtering them
342              during the output step of the code, depending on the
343              selected symbols.
344        """
345        if self.entry and self.entry not in self.entries:
346            for log_msg in self.entry.warnings:
347                self.config.warning(log_msg)
348
349    def reset_state(self, ln):
350        """
351        Ancillary routine to create a new entry. It initializes all
352        variables used by the state machine.
353        """
354
355        self.emit_unused_warnings()
356
357        self.entry = KernelEntry(self.config, self.fname, ln)
358
359        # State flags
360        self.state = state.NORMAL
361
362    def push_parameter(self, ln, decl_type, param, dtype,
363                       org_arg, declaration_name):
364        """
365        Store parameters and their descriptions at self.entry.
366        """
367
368        if self.entry.anon_struct_union and dtype == "" and param == "}":
369            return  # Ignore the ending }; from anonymous struct/union
370
371        self.entry.anon_struct_union = False
372
373        param = KernRe(r'[\[\)].*').sub('', param, count=1)
374
375        #
376        # Look at various "anonymous type" cases.
377        #
378        if dtype == '':
379            if param.endswith("..."):
380                if len(param) > 3: # there is a name provided, use that
381                    param = param[:-3]
382                if not self.entry.parameterdescs.get(param):
383                    self.entry.parameterdescs[param] = "variable arguments"
384
385            elif (not param) or param == "void":
386                param = "void"
387                self.entry.parameterdescs[param] = "no arguments"
388
389            elif param in ["struct", "union"]:
390                # Handle unnamed (anonymous) union or struct
391                dtype = param
392                param = "{unnamed_" + param + "}"
393                self.entry.parameterdescs[param] = "anonymous\n"
394                self.entry.anon_struct_union = True
395
396        # Warn if parameter has no description
397        # (but ignore ones starting with # as these are not parameters
398        # but inline preprocessor statements)
399        if param not in self.entry.parameterdescs and not param.startswith("#"):
400            self.entry.parameterdescs[param] = self.undescribed
401
402            if "." not in param:
403                if decl_type == 'function':
404                    dname = f"{decl_type} parameter"
405                else:
406                    dname = f"{decl_type} member"
407
408                self.emit_msg(ln,
409                              f"{dname} '{param}' not described in '{declaration_name}'")
410
411        # Strip spaces from param so that it is one continuous string on
412        # parameterlist. This fixes a problem where check_sections()
413        # cannot find a parameter like "addr[6 + 2]" because it actually
414        # appears as "addr[6", "+", "2]" on the parameter list.
415        # However, it's better to maintain the param string unchanged for
416        # output, so just weaken the string compare in check_sections()
417        # to ignore "[blah" in a parameter string.
418
419        self.entry.parameterlist.append(param)
420        org_arg = KernRe(r'\s\s+').sub(' ', org_arg)
421        self.entry.parametertypes[param] = org_arg
422
423
424    def create_parameter_list(self, ln, decl_type, args,
425                              splitter, declaration_name):
426        """
427        Creates a list of parameters, storing them at self.entry.
428        """
429
430        # temporarily replace all commas inside function pointer definition
431        arg_expr = KernRe(r'(\([^\),]+),')
432        while arg_expr.search(args):
433            args = arg_expr.sub(r"\1#", args)
434
435        for arg in args.split(splitter):
436            # Ignore argument attributes
437            arg = KernRe(r'\sPOS0?\s').sub(' ', arg)
438
439            # Strip leading/trailing spaces
440            arg = arg.strip()
441            arg = KernRe(r'\s+').sub(' ', arg, count=1)
442
443            if arg.startswith('#'):
444                # Treat preprocessor directive as a typeless variable just to fill
445                # corresponding data structures "correctly". Catch it later in
446                # output_* subs.
447
448                # Treat preprocessor directive as a typeless variable
449                self.push_parameter(ln, decl_type, arg, "",
450                                    "", declaration_name)
451            #
452            # The pointer-to-function case.
453            #
454            elif KernRe(r'\(.+\)\s*\(').search(arg):
455                arg = arg.replace('#', ',')
456                r = KernRe(r'[^\(]+\(\*?\s*'  # Everything up to "(*"
457                           r'([\w\[\].]*)'    # Capture the name and possible [array]
458                           r'\s*\)')	      # Make sure the trailing ")" is there
459                if r.match(arg):
460                    param = r.group(1)
461                else:
462                    self.emit_msg(ln, f"Invalid param: {arg}")
463                    param = arg
464                dtype = arg.replace(param, '')
465                self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
466            #
467            # The array-of-pointers case.  Dig the parameter name out from the middle
468            # of the declaration.
469            #
470            elif KernRe(r'\(.+\)\s*\[').search(arg):
471                r = KernRe(r'[^\(]+\(\s*\*\s*'		# Up to "(" and maybe "*"
472                           r'([\w.]*?)'			# The actual pointer name
473                           r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion]
474                if r.match(arg):
475                    param = r.group(1)
476                else:
477                    self.emit_msg(ln, f"Invalid param: {arg}")
478                    param = arg
479                dtype = arg.replace(param, '')
480                self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
481            elif arg:
482                #
483                # Clean up extraneous spaces and split the string at commas; the first
484                # element of the resulting list will also include the type information.
485                #
486                arg = KernRe(r'\s*:\s*').sub(":", arg)
487                arg = KernRe(r'\s*\[').sub('[', arg)
488                args = KernRe(r'\s*,\s*').split(arg)
489                args[0] = re.sub(r'(\*+)\s*', r' \1', args[0])
490                #
491                # args[0] has a string of "type a".  If "a" includes an [array]
492                # declaration, we want to not be fooled by any white space inside
493                # the brackets, so detect and handle that case specially.
494                #
495                r = KernRe(r'^([^[\]]*\s+)(.*)$')
496                if r.match(args[0]):
497                    args[0] = r.group(2)
498                    dtype = r.group(1)
499                else:
500                    # No space in args[0]; this seems wrong but preserves previous behavior
501                    dtype = ''
502
503                bitfield_re = KernRe(r'(.*?):(\w+)')
504                for param in args:
505                    #
506                    # For pointers, shift the star(s) from the variable name to the
507                    # type declaration.
508                    #
509                    r = KernRe(r'^(\*+)\s*(.*)')
510                    if r.match(param):
511                        self.push_parameter(ln, decl_type, r.group(2),
512                                            f"{dtype} {r.group(1)}",
513                                            arg, declaration_name)
514                    #
515                    # Perform a similar shift for bitfields.
516                    #
517                    elif bitfield_re.search(param):
518                        if dtype != "":  # Skip unnamed bit-fields
519                            self.push_parameter(ln, decl_type, bitfield_re.group(1),
520                                                f"{dtype}:{bitfield_re.group(2)}",
521                                                arg, declaration_name)
522                    else:
523                        self.push_parameter(ln, decl_type, param, dtype,
524                                            arg, declaration_name)
525
526    def check_sections(self, ln, decl_name, decl_type):
527        """
528        Check for errors inside sections, emitting warnings if not found
529        parameters are described.
530        """
531        for section in self.entry.sections:
532            if section not in self.entry.parameterlist and \
533               not known_sections.search(section):
534                if decl_type == 'function':
535                    dname = f"{decl_type} parameter"
536                else:
537                    dname = f"{decl_type} member"
538                self.emit_msg(ln,
539                              f"Excess {dname} '{section}' description in '{decl_name}'")
540
541    def check_return_section(self, ln, declaration_name, return_type):
542        """
543        If the function doesn't return void, warns about the lack of a
544        return description.
545        """
546
547        if not self.config.wreturn:
548            return
549
550        # Ignore an empty return type (It's a macro)
551        # Ignore functions with a "void" return type (but not "void *")
552        if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type):
553            return
554
555        if not self.entry.sections.get("Return", None):
556            self.emit_msg(ln,
557                          f"No description found for return value of '{declaration_name}'")
558
559    def split_struct_proto(self, proto):
560        """
561        Split apart a structure prototype; returns (struct|union, name,
562        members) or ``None``.
563        """
564
565        type_pattern = r'(struct|union)'
566        qualifiers = [
567            "__attribute__",
568            "__packed",
569            "__aligned",
570            "____cacheline_aligned_in_smp",
571            "____cacheline_aligned",
572        ]
573        definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?"
574
575        r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body)
576        if r.search(proto):
577            return (r.group(1), r.group(2), r.group(3))
578        else:
579            r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;')
580            if r.search(proto):
581                return (r.group(1), r.group(3), r.group(2))
582        return None
583
584    def rewrite_struct_members(self, members):
585        """
586        Process ``struct``/``union`` members from the most deeply nested
587        outward.
588
589        Rewrite the members of a ``struct`` or ``union`` for easier formatting
590        later on. Among other things, this function will turn a member like::
591
592          struct { inner_members; } foo;
593
594        into::
595
596          struct foo; inner_members;
597        """
598
599        #
600        # The trick is in the ``^{`` below - it prevents a match of an outer
601        # ``struct``/``union`` until the inner one has been munged
602        # (removing the ``{`` in the process).
603        #
604        struct_members = KernRe(r'(struct|union)'   # 0: declaration type
605                                r'([^\{\};]+)' 	    # 1: possible name
606                                r'(\{)'
607                                r'([^\{\}]*)'       # 3: Contents of declaration
608                                r'(\})'
609                                r'([^\{\};]*)(;)')  # 5: Remaining stuff after declaration
610        tuples = struct_members.findall(members)
611        while tuples:
612            for t in tuples:
613                newmember = ""
614                oldmember = "".join(t) # Reconstruct the original formatting
615                dtype, name, lbr, content, rbr, rest, semi = t
616                #
617                # Pass through each field name, normalizing the form and formatting.
618                #
619                for s_id in rest.split(','):
620                    s_id = s_id.strip()
621                    newmember += f"{dtype} {s_id}; "
622                    #
623                    # Remove bitfield/array/pointer info, getting the bare name.
624                    #
625                    s_id = KernRe(r'[:\[].*').sub('', s_id)
626                    s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id)
627                    #
628                    # Pass through the members of this inner structure/union.
629                    #
630                    for arg in content.split(';'):
631                        arg = arg.strip()
632                        #
633                        # Look for (type)(*name)(args) - pointer to function
634                        #
635                        r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)')
636                        if r.match(arg):
637                            dtype, name, extra = r.group(1), r.group(2), r.group(3)
638                            # Pointer-to-function
639                            if not s_id:
640                                # Anonymous struct/union
641                                newmember += f"{dtype}{name}{extra}; "
642                            else:
643                                newmember += f"{dtype}{s_id}.{name}{extra}; "
644                        #
645                        # Otherwise a non-function member.
646                        #
647                        else:
648                            #
649                            # Remove bitmap and array portions and spaces around commas
650                            #
651                            arg = KernRe(r':\s*\d+\s*').sub('', arg)
652                            arg = KernRe(r'\[.*\]').sub('', arg)
653                            arg = KernRe(r'\s*,\s*').sub(',', arg)
654                            #
655                            # Look for a normal decl - "type name[,name...]"
656                            #
657                            r = KernRe(r'(.*)\s+([\S+,]+)')
658                            if r.search(arg):
659                                for name in r.group(2).split(','):
660                                    name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name)
661                                    if not s_id:
662                                        # Anonymous struct/union
663                                        newmember += f"{r.group(1)} {name}; "
664                                    else:
665                                        newmember += f"{r.group(1)} {s_id}.{name}; "
666                            else:
667                                newmember += f"{arg}; "
668                #
669                # At the end of the s_id loop, replace the original declaration with
670                # the munged version.
671                #
672                members = members.replace(oldmember, newmember)
673            #
674            # End of the tuple loop - search again and see if there are outer members
675            # that now turn up.
676            #
677            tuples = struct_members.findall(members)
678        return members
679
680    def format_struct_decl(self, declaration):
681        """
682        Format the ``struct`` declaration into a standard form for inclusion
683        in the resulting docs.
684        """
685
686        #
687        # Insert newlines, get rid of extra spaces.
688        #
689        declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration)
690        declaration = KernRe(r'\}\s+;').sub('};', declaration)
691        #
692        # Format inline enums with each member on its own line.
693        #
694        r = KernRe(r'(enum\s+\{[^\}]+),([^\n])')
695        while r.search(declaration):
696            declaration = r.sub(r'\1,\n\2', declaration)
697        #
698        # Now go through and supply the right number of tabs
699        # for each line.
700        #
701        def_args = declaration.split('\n')
702        level = 1
703        declaration = ""
704        for clause in def_args:
705            clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1)
706            if clause:
707                if '}' in clause and level > 1:
708                    level -= 1
709                if not clause.startswith('#'):
710                    declaration += "\t" * level
711                declaration += "\t" + clause + "\n"
712                if "{" in clause and "}" not in clause:
713                    level += 1
714        return declaration
715
716
717    def dump_struct(self, ln, proto):
718        """
719        Store an entry for a ``struct`` or ``union``
720        """
721        #
722        # Do the basic parse to get the pieces of the declaration.
723        #
724        struct_parts = self.split_struct_proto(proto)
725        if not struct_parts:
726            self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
727            return
728        decl_type, declaration_name, members = struct_parts
729
730        if self.entry.identifier != declaration_name:
731            self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. "
732                          f"Prototype was for {decl_type} {declaration_name} instead\n")
733            return
734        #
735        # Go through the list of members applying all of our transformations.
736        #
737        members = trim_private_members(members)
738        members = self.xforms.apply("struct", members)
739
740        #
741        # Deal with embedded struct and union members, and drop enums entirely.
742        #
743        declaration = members
744        members = self.rewrite_struct_members(members)
745        members = re.sub(r'(\{[^\{\}]*\})', '', members)
746        #
747        # Output the result and we are done.
748        #
749        self.create_parameter_list(ln, decl_type, members, ';',
750                                   declaration_name)
751        self.check_sections(ln, declaration_name, decl_type)
752        self.output_declaration(decl_type, declaration_name,
753                                definition=self.format_struct_decl(declaration),
754                                purpose=self.entry.declaration_purpose)
755
756    def dump_enum(self, ln, proto):
757        """
758        Store an ``enum`` inside self.entries array.
759        """
760        #
761        # Strip preprocessor directives.  Note that this depends on the
762        # trailing semicolon we added in process_proto_type().
763        #
764        proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
765        #
766        # Parse out the name and members of the enum.  Typedef form first.
767        #
768        r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')
769        if r.search(proto):
770            declaration_name = r.group(2)
771            members = trim_private_members(r.group(1))
772        #
773        # Failing that, look for a straight enum
774        #
775        else:
776            r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')
777            if r.match(proto):
778                declaration_name = r.group(1)
779                members = trim_private_members(r.group(2))
780        #
781        # OK, this isn't going to work.
782        #
783            else:
784                self.emit_msg(ln, f"{proto}: error: Cannot parse enum!")
785                return
786        #
787        # Make sure we found what we were expecting.
788        #
789        if self.entry.identifier != declaration_name:
790            if self.entry.identifier == "":
791                self.emit_msg(ln,
792                              f"{proto}: wrong kernel-doc identifier on prototype")
793            else:
794                self.emit_msg(ln,
795                              f"expecting prototype for enum {self.entry.identifier}. "
796                              f"Prototype was for enum {declaration_name} instead")
797            return
798
799        if not declaration_name:
800            declaration_name = "(anonymous)"
801        #
802        # Parse out the name of each enum member, and verify that we
803        # have a description for it.
804        #
805        member_set = set()
806        members = KernRe(r'\([^;)]*\)').sub('', members)
807        for arg in members.split(','):
808            arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg)
809            if not arg.strip():
810                continue
811
812            self.entry.parameterlist.append(arg)
813            if arg not in self.entry.parameterdescs:
814                self.entry.parameterdescs[arg] = self.undescribed
815                self.emit_msg(ln,
816                              f"Enum value '{arg}' not described in enum '{declaration_name}'")
817            member_set.add(arg)
818        #
819        # Ensure that every described member actually exists in the enum.
820        #
821        for k in self.entry.parameterdescs:
822            if k not in member_set:
823                self.emit_msg(ln,
824                              f"Excess enum value '@{k}' description in '{declaration_name}'")
825
826        self.output_declaration('enum', declaration_name,
827                                purpose=self.entry.declaration_purpose)
828
829    def dump_var(self, ln, proto):
830        """
831        Store variables that are part of kAPI.
832        """
833        VAR_ATTRIBS = [
834            "extern",
835            "const",
836        ]
837        OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*"
838
839        #
840        # Store the full prototype before modifying it
841        #
842        full_proto = proto
843        declaration_name = None
844
845        #
846        # Handle macro definitions
847        #
848        macro_prefixes = [
849            KernRe(r"DEFINE_[\w_]+\s*\(([\w_]+)\)"),
850        ]
851
852        for r in macro_prefixes:
853            match = r.search(proto)
854            if match:
855                declaration_name = match.group(1)
856                break
857
858        #
859        # Drop comments and macros to have a pure C prototype
860        #
861        if not declaration_name:
862            proto = self.xforms.apply("var", proto)
863
864        proto = proto.rstrip()
865
866        #
867        # Variable name is at the end of the declaration
868        #
869
870        default_val = None
871
872        r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
873        if r.match(proto):
874            if not declaration_name:
875                declaration_name = r.group(1)
876
877            default_val = r.group(2)
878        else:
879            r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
880
881            if r.match(proto):
882                default_val = r.group(1)
883        if not declaration_name:
884           self.emit_msg(ln,f"{proto}: can't parse variable")
885           return
886
887        if default_val:
888            default_val = default_val.lstrip("=").strip()
889
890        self.output_declaration("var", declaration_name,
891                                full_proto=full_proto,
892                                default_val=default_val,
893                                purpose=self.entry.declaration_purpose)
894
895    def dump_declaration(self, ln, prototype):
896        """
897        Store a data declaration inside self.entries array.
898        """
899
900        if self.entry.decl_type == "enum":
901            self.dump_enum(ln, prototype)
902        elif self.entry.decl_type == "typedef":
903            self.dump_typedef(ln, prototype)
904        elif self.entry.decl_type in ["union", "struct"]:
905            self.dump_struct(ln, prototype)
906        elif self.entry.decl_type == "var":
907            self.dump_var(ln, prototype)
908        else:
909            # This would be a bug
910            self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}')
911
912    def dump_function(self, ln, prototype):
913        """
914        Store a function or function macro inside self.entries array.
915        """
916
917        found = func_macro = False
918        return_type = ''
919        decl_type = 'function'
920
921        #
922        # If we have a macro, remove the "#define" at the front.
923        #
924        new_proto = KernRe(r"^#\s*define\s+").sub("", prototype)
925        if new_proto != prototype:
926            prototype = new_proto
927            #
928            # Dispense with the simple "#define A B" case here; the key
929            # is the space after the name of the symbol being defined.
930            # NOTE that the seemingly misnamed "func_macro" indicates a
931            # macro *without* arguments.
932            #
933            r = KernRe(r'^(\w+)\s+')
934            if r.search(prototype):
935                return_type = ''
936                declaration_name = r.group(1)
937                func_macro = True
938                found = True
939        else:
940            #
941            # Apply the initial transformations.
942            #
943            prototype = self.xforms.apply("func", prototype)
944
945        # Yes, this truly is vile.  We are looking for:
946        # 1. Return type (may be nothing if we're looking at a macro)
947        # 2. Function name
948        # 3. Function parameters.
949        #
950        # All the while we have to watch out for function pointer parameters
951        # (which IIRC is what the two sections are for), C types (these
952        # regexps don't even start to express all the possibilities), and
953        # so on.
954        #
955        # If you mess with these regexps, it's a good idea to check that
956        # the following functions' documentation still comes out right:
957        # - parport_register_device (function pointer parameters)
958        # - atomic_set (macro)
959        # - pci_match_device, __copy_to_user (long return type)
960
961        name = r'\w+'
962        type1 = r'(?:[\w\s]+)?'
963        type2 = r'(?:[\w\s]+\*+)+'
964        #
965        # Attempt to match first on (args) with no internal parentheses; this
966        # lets us easily filter out __acquires() and other post-args stuff.  If
967        # that fails, just grab the rest of the line to the last closing
968        # parenthesis.
969        #
970        proto_args = r'\(([^\(]*|.*)\)'
971        #
972        # (Except for the simple macro case) attempt to split up the prototype
973        # in the various ways we understand.
974        #
975        if not found:
976            patterns = [
977                rf'^()({name})\s*{proto_args}',
978                rf'^({type1})\s+({name})\s*{proto_args}',
979                rf'^({type2})\s*({name})\s*{proto_args}',
980            ]
981
982            for p in patterns:
983                r = KernRe(p)
984                if r.match(prototype):
985                    return_type = r.group(1)
986                    declaration_name = r.group(2)
987                    args = r.group(3)
988                    self.create_parameter_list(ln, decl_type, args, ',',
989                                               declaration_name)
990                    found = True
991                    break
992        #
993        # Parsing done; make sure that things are as we expect.
994        #
995        if not found:
996            self.emit_msg(ln,
997                          f"cannot understand function prototype: '{prototype}'")
998            return
999        if self.entry.identifier != declaration_name:
1000            self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). "
1001                          f"Prototype was for {declaration_name}() instead")
1002            return
1003        self.check_sections(ln, declaration_name, "function")
1004        self.check_return_section(ln, declaration_name, return_type)
1005        #
1006        # Store the result.
1007        #
1008        self.output_declaration(decl_type, declaration_name,
1009                                typedef=('typedef' in return_type),
1010                                functiontype=return_type,
1011                                purpose=self.entry.declaration_purpose,
1012                                func_macro=func_macro)
1013
1014
1015    def dump_typedef(self, ln, proto):
1016        """
1017        Store a ``typedef`` inside self.entries array.
1018        """
1019        #
1020        # We start by looking for function typedefs.
1021        #
1022        typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*'
1023        typedef_ident = r'\*?\s*(\w\S+)\s*'
1024        typedef_args = r'\s*\((.*)\);'
1025
1026        typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args)
1027        typedef2 = KernRe(typedef_type + typedef_ident + typedef_args)
1028
1029        # Parse function typedef prototypes
1030        for r in [typedef1, typedef2]:
1031            if not r.match(proto):
1032                continue
1033
1034            return_type = r.group(1).strip()
1035            declaration_name = r.group(2)
1036            args = r.group(3)
1037
1038            if self.entry.identifier != declaration_name:
1039                self.emit_msg(ln,
1040                              f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
1041                return
1042
1043            self.create_parameter_list(ln, 'function', args, ',', declaration_name)
1044
1045            self.output_declaration('function', declaration_name,
1046                                    typedef=True,
1047                                    functiontype=return_type,
1048                                    purpose=self.entry.declaration_purpose)
1049            return
1050        #
1051        # Not a function, try to parse a simple typedef.
1052        #
1053        r = KernRe(r'typedef.*\s+(\w+)\s*;')
1054        if r.match(proto):
1055            declaration_name = r.group(1)
1056
1057            if self.entry.identifier != declaration_name:
1058                self.emit_msg(ln,
1059                              f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
1060                return
1061
1062            self.output_declaration('typedef', declaration_name,
1063                                    purpose=self.entry.declaration_purpose)
1064            return
1065
1066        self.emit_msg(ln, "error: Cannot parse typedef!")
1067
1068    @staticmethod
1069    def process_export(function_set, line):
1070        """
1071        process ``EXPORT_SYMBOL*`` tags
1072
1073        This method doesn't use any variable from the class, so declare it
1074        with a staticmethod decorator.
1075        """
1076
1077        # We support documenting some exported symbols with different
1078        # names.  A horrible hack.
1079        suffixes = [ '_noprof' ]
1080
1081        # Note: it accepts only one EXPORT_SYMBOL* per line, as having
1082        # multiple export lines would violate Kernel coding style.
1083
1084        if export_symbol.search(line):
1085            symbol = export_symbol.group(2)
1086        elif export_symbol_ns.search(line):
1087            symbol = export_symbol_ns.group(2)
1088        else:
1089            return False
1090        #
1091        # Found an export, trim out any special suffixes
1092        #
1093        for suffix in suffixes:
1094            # Be backward compatible with Python < 3.9
1095            if symbol.endswith(suffix):
1096                symbol = symbol[:-len(suffix)]
1097        function_set.add(symbol)
1098        return True
1099
1100    def process_normal(self, ln, line):
1101        """
1102        STATE_NORMAL: looking for the ``/**`` to begin everything.
1103        """
1104
1105        if not doc_start.match(line):
1106            return
1107
1108        # start a new entry
1109        self.reset_state(ln)
1110
1111        # next line is always the function name
1112        self.state = state.NAME
1113
1114    def process_name(self, ln, line):
1115        """
1116        STATE_NAME: Looking for the "name - description" line
1117        """
1118        #
1119        # Check for a DOC: block and handle them specially.
1120        #
1121        if doc_block.search(line):
1122
1123            if not doc_block.group(1):
1124                self.entry.begin_section(ln, "Introduction")
1125            else:
1126                self.entry.begin_section(ln, doc_block.group(1))
1127
1128            self.entry.identifier = self.entry.section
1129            self.state = state.DOCBLOCK
1130        #
1131        # Otherwise we're looking for a normal kerneldoc declaration line.
1132        #
1133        elif doc_decl.search(line):
1134            self.entry.identifier = doc_decl.group(1)
1135
1136            # Test for data declaration
1137            if doc_begin_data.search(line):
1138                self.entry.decl_type = doc_begin_data.group(1)
1139                self.entry.identifier = doc_begin_data.group(2)
1140            #
1141            # Look for a function description
1142            #
1143            elif doc_begin_func.search(line):
1144                self.entry.identifier = doc_begin_func.group(1)
1145                self.entry.decl_type = "function"
1146            #
1147            # We struck out.
1148            #
1149            else:
1150                self.emit_msg(ln,
1151                              f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}")
1152                self.state = state.NORMAL
1153                return
1154            #
1155            # OK, set up for a new kerneldoc entry.
1156            #
1157            self.state = state.BODY
1158            self.entry.identifier = self.entry.identifier.strip(" ")
1159            # if there's no @param blocks need to set up default section here
1160            self.entry.begin_section(ln + 1)
1161            #
1162            # Find the description portion, which *should* be there but
1163            # isn't always.
1164            # (We should be able to capture this from the previous parsing - someday)
1165            #
1166            r = KernRe("[-:](.*)")
1167            if r.search(line):
1168                self.entry.declaration_purpose = trim_whitespace(r.group(1))
1169                self.state = state.DECLARATION
1170            else:
1171                self.entry.declaration_purpose = ""
1172
1173            if not self.entry.declaration_purpose and self.config.wshort_desc:
1174                self.emit_msg(ln,
1175                              f"missing initial short description on line:\n{line}")
1176
1177            if not self.entry.identifier and self.entry.decl_type != "enum":
1178                self.emit_msg(ln,
1179                              f"wrong kernel-doc identifier on line:\n{line}")
1180                self.state = state.NORMAL
1181
1182            if self.config.verbose:
1183                self.emit_msg(ln,
1184                              f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}",
1185                                  warning=False)
1186        #
1187        # Failed to find an identifier. Emit a warning
1188        #
1189        else:
1190            self.emit_msg(ln, f"Cannot find identifier on line:\n{line}")
1191
1192    def is_new_section(self, ln, line):
1193        """
1194        Helper function to determine if a new section is being started.
1195        """
1196        if doc_sect.search(line):
1197            self.state = state.BODY
1198            #
1199            # Pick out the name of our new section, tweaking it if need be.
1200            #
1201            newsection = doc_sect.group(1)
1202            if newsection.lower() == 'description':
1203                newsection = 'Description'
1204            elif newsection.lower() == 'context':
1205                newsection = 'Context'
1206                self.state = state.SPECIAL_SECTION
1207            elif newsection.lower() in ["@return", "@returns",
1208                                        "return", "returns"]:
1209                newsection = "Return"
1210                self.state = state.SPECIAL_SECTION
1211            elif newsection[0] == '@':
1212                self.state = state.SPECIAL_SECTION
1213            #
1214            # Initialize the contents, and get the new section going.
1215            #
1216            newcontents = doc_sect.group(2)
1217            if not newcontents:
1218                newcontents = ""
1219            self.dump_section()
1220            self.entry.begin_section(ln, newsection)
1221            self.entry.leading_space = None
1222
1223            self.entry.add_text(newcontents.lstrip())
1224            return True
1225        return False
1226
1227    def is_comment_end(self, ln, line):
1228        """
1229        Helper function to detect (and effect) the end of a kerneldoc comment.
1230        """
1231        if doc_end.search(line):
1232            self.dump_section()
1233
1234            # Look for doc_com + <text> + doc_end:
1235            r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/')
1236            if r.match(line):
1237                self.emit_msg(ln, f"suspicious ending line: {line}")
1238
1239            self.entry.prototype = ""
1240            self.entry.new_start_line = ln + 1
1241
1242            self.state = state.PROTO
1243            return True
1244        return False
1245
1246
1247    def process_decl(self, ln, line):
1248        """
1249        STATE_DECLARATION: We've seen the beginning of a declaration.
1250        """
1251        if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
1252            return
1253        #
1254        # Look for anything with the " * " line beginning.
1255        #
1256        if doc_content.search(line):
1257            cont = doc_content.group(1)
1258            #
1259            # A blank line means that we have moved out of the declaration
1260            # part of the comment (without any "special section" parameter
1261            # descriptions).
1262            #
1263            if cont == "":
1264                self.state = state.BODY
1265            #
1266            # Otherwise we have more of the declaration section to soak up.
1267            #
1268            else:
1269                self.entry.declaration_purpose = \
1270                    trim_whitespace(self.entry.declaration_purpose + ' ' + cont)
1271        else:
1272            # Unknown line, ignore
1273            self.emit_msg(ln, f"bad line: {line}")
1274
1275
1276    def process_special(self, ln, line):
1277        """
1278        STATE_SPECIAL_SECTION: a section ending with a blank line.
1279        """
1280        #
1281        # If we have hit a blank line (only the " * " marker), then this
1282        # section is done.
1283        #
1284        if KernRe(r"\s*\*\s*$").match(line):
1285            self.entry.begin_section(ln, dump = True)
1286            self.state = state.BODY
1287            return
1288        #
1289        # Not a blank line, look for the other ways to end the section.
1290        #
1291        if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
1292            return
1293        #
1294        # OK, we should have a continuation of the text for this section.
1295        #
1296        if doc_content.search(line):
1297            cont = doc_content.group(1)
1298            #
1299            # If the lines of text after the first in a special section have
1300            # leading white space, we need to trim it out or Sphinx will get
1301            # confused.  For the second line (the None case), see what we
1302            # find there and remember it.
1303            #
1304            if self.entry.leading_space is None:
1305                r = KernRe(r'^(\s+)')
1306                if r.match(cont):
1307                    self.entry.leading_space = len(r.group(1))
1308                else:
1309                    self.entry.leading_space = 0
1310            #
1311            # Otherwise, before trimming any leading chars, be *sure*
1312            # that they are white space.  We should maybe warn if this
1313            # isn't the case.
1314            #
1315            for i in range(0, self.entry.leading_space):
1316                if cont[i] != " ":
1317                    self.entry.leading_space = i
1318                    break
1319            #
1320            # Add the trimmed result to the section and we're done.
1321            #
1322            self.entry.add_text(cont[self.entry.leading_space:])
1323        else:
1324            # Unknown line, ignore
1325            self.emit_msg(ln, f"bad line: {line}")
1326
1327    def process_body(self, ln, line):
1328        """
1329        STATE_BODY: the bulk of a kerneldoc comment.
1330        """
1331        if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
1332            return
1333
1334        if doc_content.search(line):
1335            cont = doc_content.group(1)
1336            self.entry.add_text(cont)
1337        else:
1338            # Unknown line, ignore
1339            self.emit_msg(ln, f"bad line: {line}")
1340
1341    def process_inline_name(self, ln, line):
1342        """STATE_INLINE_NAME: beginning of docbook comments within a prototype."""
1343
1344        if doc_inline_sect.search(line):
1345            self.entry.begin_section(ln, doc_inline_sect.group(1))
1346            self.entry.add_text(doc_inline_sect.group(2).lstrip())
1347            self.state = state.INLINE_TEXT
1348        elif doc_inline_end.search(line):
1349            self.dump_section()
1350            self.state = state.PROTO
1351        elif doc_content.search(line):
1352            self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}")
1353            self.state = state.PROTO
1354
1355            #
1356            # Don't let it add partial comments at the code, as breaks the
1357            # logic meant to remove comments from prototypes.
1358            #
1359            self.process_proto_type(ln, "/**\n" + line)
1360        # else ... ??
1361
1362    def process_inline_text(self, ln, line):
1363        """STATE_INLINE_TEXT: docbook comments within a prototype."""
1364
1365        if doc_inline_end.search(line):
1366            self.dump_section()
1367            self.state = state.PROTO
1368        elif doc_content.search(line):
1369            self.entry.add_text(doc_content.group(1))
1370        # else ... ??
1371
1372    def syscall_munge(self, ln, proto):         # pylint: disable=W0613
1373        """
1374        Handle syscall definitions.
1375        """
1376
1377        is_void = False
1378
1379        # Strip newlines/CR's
1380        proto = re.sub(r'[\r\n]+', ' ', proto)
1381
1382        # Check if it's a SYSCALL_DEFINE0
1383        if 'SYSCALL_DEFINE0' in proto:
1384            is_void = True
1385
1386        # Replace SYSCALL_DEFINE with correct return type & function name
1387        proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto)
1388
1389        r = KernRe(r'long\s+(sys_.*?),')
1390        if r.search(proto):
1391            proto = KernRe(',').sub('(', proto, count=1)
1392        elif is_void:
1393            proto = KernRe(r'\)').sub('(void)', proto, count=1)
1394
1395        # Now delete all of the odd-numbered commas in the proto
1396        # so that argument types & names don't have a comma between them
1397        count = 0
1398        length = len(proto)
1399
1400        if is_void:
1401            length = 0  # skip the loop if is_void
1402
1403        for ix in range(length):
1404            if proto[ix] == ',':
1405                count += 1
1406                if count % 2 == 1:
1407                    proto = proto[:ix] + ' ' + proto[ix + 1:]
1408
1409        return proto
1410
1411    def tracepoint_munge(self, ln, proto):
1412        """
1413        Handle tracepoint definitions.
1414        """
1415
1416        tracepointname = None
1417        tracepointargs = None
1418
1419        # Match tracepoint name based on different patterns
1420        r = KernRe(r'TRACE_EVENT\((.*?),')
1421        if r.search(proto):
1422            tracepointname = r.group(1)
1423
1424        r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),')
1425        if r.search(proto):
1426            tracepointname = r.group(1)
1427
1428        r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),')
1429        if r.search(proto):
1430            tracepointname = r.group(2)
1431
1432        if tracepointname:
1433            tracepointname = tracepointname.lstrip()
1434
1435        r = KernRe(r'TP_PROTO\((.*?)\)')
1436        if r.search(proto):
1437            tracepointargs = r.group(1)
1438
1439        if not tracepointname or not tracepointargs:
1440            self.emit_msg(ln,
1441                          f"Unrecognized tracepoint format:\n{proto}\n")
1442        else:
1443            proto = f"static inline void trace_{tracepointname}({tracepointargs})"
1444            self.entry.identifier = f"trace_{self.entry.identifier}"
1445
1446        return proto
1447
1448    def process_proto_function(self, ln, line):
1449        """Ancillary routine to process a function prototype."""
1450
1451        # strip C99-style comments to end of line
1452        line = KernRe(r"//.*$", re.S).sub('', line)
1453        #
1454        # Soak up the line's worth of prototype text, stopping at { or ; if present.
1455        #
1456        if KernRe(r'\s*#\s*define').match(line):
1457            self.entry.prototype = line
1458        elif not line.startswith('#'):   # skip other preprocessor stuff
1459            r = KernRe(r'([^\{]*)')
1460            if r.match(line):
1461                self.entry.prototype += r.group(1) + " "
1462        #
1463        # If we now have the whole prototype, clean it up and declare victory.
1464        #
1465        if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line):
1466            # strip comments and surrounding spaces
1467            self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip()
1468            #
1469            # Handle self.entry.prototypes for function pointers like:
1470            #       int (*pcs_config)(struct foo)
1471            # by turning it into
1472            #	    int pcs_config(struct foo)
1473            #
1474            r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)')
1475            self.entry.prototype = r.sub(r'\1\2', self.entry.prototype)
1476            #
1477            # Handle special declaration syntaxes
1478            #
1479            if 'SYSCALL_DEFINE' in self.entry.prototype:
1480                self.entry.prototype = self.syscall_munge(ln,
1481                                                          self.entry.prototype)
1482            else:
1483                r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT')
1484                if r.search(self.entry.prototype):
1485                    self.entry.prototype = self.tracepoint_munge(ln,
1486                                                                 self.entry.prototype)
1487            #
1488            # ... and we're done
1489            #
1490            self.dump_function(ln, self.entry.prototype)
1491            self.reset_state(ln)
1492
1493    def process_proto_type(self, ln, line):
1494        """
1495        Ancillary routine to process a type.
1496        """
1497
1498        # Strip C99-style comments and surrounding whitespace
1499        line = KernRe(r"//.*$", re.S).sub('', line).strip()
1500        if not line:
1501            return # nothing to see here
1502
1503        # To distinguish preprocessor directive from regular declaration later.
1504        if line.startswith('#'):
1505            line += ";"
1506        #
1507        # Split the declaration on any of { } or ;, and accumulate pieces
1508        # until we hit a semicolon while not inside {brackets}
1509        #
1510        r = KernRe(r'(.*?)([{};])')
1511        for chunk in r.split(line):
1512            if chunk:  # Ignore empty matches
1513                self.entry.prototype += chunk
1514                #
1515                # This cries out for a match statement ... someday after we can
1516                # drop Python 3.9 ...
1517                #
1518                if chunk == '{':
1519                    self.entry.brcount += 1
1520                elif chunk == '}':
1521                    self.entry.brcount -= 1
1522                elif chunk == ';' and self.entry.brcount <= 0:
1523                    self.dump_declaration(ln, self.entry.prototype)
1524                    self.reset_state(ln)
1525                    return
1526        #
1527        # We hit the end of the line while still in the declaration; put
1528        # in a space to represent the newline.
1529        #
1530        self.entry.prototype += ' '
1531
1532    def process_proto(self, ln, line):
1533        """STATE_PROTO: reading a function/whatever prototype."""
1534
1535        if doc_inline_oneline.search(line):
1536            self.entry.begin_section(ln, doc_inline_oneline.group(1))
1537            self.entry.add_text(doc_inline_oneline.group(2))
1538            self.dump_section()
1539
1540        elif doc_inline_start.search(line):
1541            self.state = state.INLINE_NAME
1542
1543        elif self.entry.decl_type == 'function':
1544            self.process_proto_function(ln, line)
1545
1546        else:
1547            self.process_proto_type(ln, line)
1548
1549    def process_docblock(self, ln, line):
1550        """STATE_DOCBLOCK: within a ``DOC:`` block."""
1551
1552        if doc_end.search(line):
1553            self.dump_section()
1554            self.output_declaration("doc", self.entry.identifier)
1555            self.reset_state(ln)
1556
1557        elif doc_content.search(line):
1558            self.entry.add_text(doc_content.group(1))
1559
1560    def parse_export(self):
1561        """
1562        Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file.
1563        """
1564
1565        export_table = set()
1566
1567        try:
1568            with open(self.fname, "r", encoding="utf8",
1569                      errors="backslashreplace") as fp:
1570
1571                for line in fp:
1572                    self.process_export(export_table, line)
1573
1574        except IOError:
1575            return None
1576
1577        return export_table
1578
1579    #: The state/action table telling us which function to invoke in each state.
1580    state_actions = {
1581        state.NORMAL:			process_normal,
1582        state.NAME:			process_name,
1583        state.BODY:			process_body,
1584        state.DECLARATION:		process_decl,
1585        state.SPECIAL_SECTION:		process_special,
1586        state.INLINE_NAME:		process_inline_name,
1587        state.INLINE_TEXT:		process_inline_text,
1588        state.PROTO:			process_proto,
1589        state.DOCBLOCK:			process_docblock,
1590        }
1591
1592    def parse_kdoc(self):
1593        """
1594        Open and process each line of a C source file.
1595        The parsing is controlled via a state machine, and the line is passed
1596        to a different process function depending on the state. The process
1597        function may update the state as needed.
1598
1599        Besides parsing kernel-doc tags, it also parses export symbols.
1600        """
1601
1602        prev = ""
1603        prev_ln = None
1604        export_table = set()
1605
1606        try:
1607            with open(self.fname, "r", encoding="utf8",
1608                      errors="backslashreplace") as fp:
1609                for ln, line in enumerate(fp):
1610
1611                    line = line.expandtabs().strip("\n")
1612
1613                    # Group continuation lines on prototypes
1614                    if self.state == state.PROTO:
1615                        if line.endswith("\\"):
1616                            prev += line.rstrip("\\")
1617                            if not prev_ln:
1618                                prev_ln = ln
1619                            continue
1620
1621                        if prev:
1622                            ln = prev_ln
1623                            line = prev + line
1624                            prev = ""
1625                            prev_ln = None
1626
1627                    self.config.log.debug("%d %s: %s",
1628                                          ln, state.name[self.state],
1629                                          line)
1630
1631                    # This is an optimization over the original script.
1632                    # There, when export_file was used for the same file,
1633                    # it was read twice. Here, we use the already-existing
1634                    # loop to parse exported symbols as well.
1635                    #
1636                    if (self.state != state.NORMAL) or \
1637                       not self.process_export(export_table, line):
1638                        # Hand this line to the appropriate state handler
1639                        self.state_actions[self.state](self, ln, line)
1640
1641            self.emit_unused_warnings()
1642
1643        except OSError:
1644            self.config.log.error(f"Error: Cannot open file {self.fname}")
1645
1646        return export_table, self.entries
1647