xref: /linux/tools/lib/python/kdoc/kdoc_parser.py (revision fc44c0a0b2a72f2e9331063a311a548634ae18af)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4#
5# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702
6
7"""
8Classes and functions related to reading a C language source or header FILE
9and extract embedded documentation comments from it.
10"""
11
12import sys
13import re
14from pprint import pformat
15
16from kdoc.kdoc_re import NestedMatch, KernRe
17from kdoc.kdoc_item import KdocItem
18
19#
20# Regular expressions used to parse kernel-doc markups at KernelDoc class.
21#
22# Let's declare them in lowercase outside any class to make it easier to
23# convert from the Perl script.
24#
25# As those are evaluated at the beginning, no need to cache them
26#
27
28# Allow whitespace at end of comment start.
29doc_start = KernRe(r'^/\*\*\s*$', cache=False)
30
31doc_end = KernRe(r'\*/', cache=False)
32doc_com = KernRe(r'\s*\*\s*', cache=False)
33doc_com_body = KernRe(r'\s*\* ?', cache=False)
34doc_decl = doc_com + KernRe(r'(\w+)', cache=False)
35
36# @params and a strictly limited set of supported section names
37# Specifically:
38#   Match @word:
39#         @...:
40#         @{section-name}:
41# while trying to not match literal block starts like "example::"
42#
43known_section_names = 'description|context|returns?|notes?|examples?'
44known_sections = KernRe(known_section_names, flags = re.I)
45doc_sect = doc_com + \
46    KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$',
47           flags=re.I, cache=False)
48
49doc_content = doc_com_body + KernRe(r'(.*)', cache=False)
50doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False)
51doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False)
52doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False)
53doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@\s*[\w][\w\.]*\s*):\s*(.*)\s*\*/\s*$', cache=False)
54
55export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False)
56export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False)
57
58type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)
59
60#
61# Tests for the beginning of a kerneldoc block in its various forms.
62#
63doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False)
64doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef|var)\b\s*(\w*)", cache = False)
65doc_begin_func = KernRe(str(doc_com) +			# initial " * '
66                        r"(?:\w+\s*\*\s*)?" + 		# type (not captured)
67                        r'(?:define\s+)?' + 		# possible "define" (not captured)
68                        r'(\w+)\s*(?:\(\w*\))?\s*' +	# name and optional "(...)"
69                        r'(?:[-:].*)?$',		# description (not captured)
70                        cache = False)
71
72#
73# Regexes here are guaranteed to have the end delimiter matching
74# the start delimiter. Yet, right now, only one replace group
75# is allowed.
76#
77struct_nested_prefixes = [
78    (NestedMatch(r"__cond_acquires\s*\("), ""),
79    (NestedMatch(r"__cond_releases\s*\("), ""),
80    (NestedMatch(r"__acquires\s*\("), ""),
81    (NestedMatch(r"__releases\s*\("), ""),
82    (NestedMatch(r"__must_hold\s*\("), ""),
83    (NestedMatch(r"__must_not_hold\s*\("), ""),
84    (NestedMatch(r"__must_hold_shared\s*\("), ""),
85    (NestedMatch(r"__cond_acquires_shared\s*\("), ""),
86    (NestedMatch(r"__acquires_shared\s*\("), ""),
87    (NestedMatch(r"__releases_shared\s*\("), ""),
88    (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'),
89]
90
91#
92# Ancillary functions
93#
94
95multi_space = KernRe(r'\s\s+')
96def trim_whitespace(s):
97    """
98    A little helper to get rid of excess white space.
99    """
100    return multi_space.sub(' ', s.strip())
101
102def trim_private_members(text):
103    """
104    Remove ``struct``/``enum`` members that have been marked "private".
105    """
106    # First look for a "public:" block that ends a private region, then
107    # handle the "private until the end" case.
108    #
109    text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text)
110    text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text)
111    #
112    # We needed the comments to do the above, but now we can take them out.
113    #
114    return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip()
115
116class state:
117    """
118    States used by the parser's state machine.
119    """
120
121    # Parser states
122    NORMAL        = 0        #: Normal code.
123    NAME          = 1        #: Looking for function name.
124    DECLARATION   = 2        #: We have seen a declaration which might not be done.
125    BODY          = 3        #: The body of the comment.
126    SPECIAL_SECTION = 4      #: Doc section ending with a blank line.
127    PROTO         = 5        #: Scanning prototype.
128    DOCBLOCK      = 6        #: Documentation block.
129    INLINE_NAME   = 7        #: Gathering doc outside main block.
130    INLINE_TEXT   = 8	     #: Reading the body of inline docs.
131
132    #: Names for each parser state.
133    name = [
134        "NORMAL",
135        "NAME",
136        "DECLARATION",
137        "BODY",
138        "SPECIAL_SECTION",
139        "PROTO",
140        "DOCBLOCK",
141        "INLINE_NAME",
142        "INLINE_TEXT",
143    ]
144
145
146SECTION_DEFAULT = "Description"  #: Default section.
147
148class KernelEntry:
149    """
150    Encapsulates a Kernel documentation entry.
151    """
152
153    def __init__(self, config, fname, ln):
154        self.config = config
155        self.fname = fname
156
157        self._contents = []
158        self.prototype = ""
159
160        self.warnings = []
161
162        self.parameterlist = []
163        self.parameterdescs = {}
164        self.parametertypes = {}
165        self.parameterdesc_start_lines = {}
166
167        self.section_start_lines = {}
168        self.sections = {}
169
170        self.anon_struct_union = False
171
172        self.leading_space = None
173
174        self.fname = fname
175
176        # State flags
177        self.brcount = 0
178        self.declaration_start_line = ln + 1
179
180    #
181    # Management of section contents
182    #
183    def add_text(self, text):
184        """Add a new text to the entry contents list."""
185        self._contents.append(text)
186
187    def contents(self):
188        """Returns a string with all content texts that were added."""
189        return '\n'.join(self._contents) + '\n'
190
191    # TODO: rename to emit_message after removal of kernel-doc.pl
192    def emit_msg(self, ln, msg, *, warning=True):
193        """Emit a message."""
194
195        log_msg = f"{self.fname}:{ln} {msg}"
196
197        if not warning:
198            self.config.log.info(log_msg)
199            return
200
201        # Delegate warning output to output logic, as this way it
202        # will report warnings/info only for symbols that are output
203
204        self.warnings.append(log_msg)
205        return
206
207    def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False):
208        """
209        Begin a new section.
210        """
211        if dump:
212            self.dump_section(start_new = True)
213        self.section = title
214        self.new_start_line = line_no
215
216    def dump_section(self, start_new=True):
217        """
218        Dumps section contents to arrays/hashes intended for that purpose.
219        """
220        #
221        # If we have accumulated no contents in the default ("description")
222        # section, don't bother.
223        #
224        if self.section == SECTION_DEFAULT and not self._contents:
225            return
226        name = self.section
227        contents = self.contents()
228
229        if type_param.match(name):
230            name = type_param.group(1)
231
232            self.parameterdescs[name] = contents
233            self.parameterdesc_start_lines[name] = self.new_start_line
234
235            self.new_start_line = 0
236
237        else:
238            if name in self.sections and self.sections[name] != "":
239                # Only warn on user-specified duplicate section names
240                if name != SECTION_DEFAULT:
241                    self.emit_msg(self.new_start_line,
242                                  f"duplicate section name '{name}'")
243                # Treat as a new paragraph - add a blank line
244                self.sections[name] += '\n' + contents
245            else:
246                self.sections[name] = contents
247                self.section_start_lines[name] = self.new_start_line
248                self.new_start_line = 0
249
250#        self.config.log.debug("Section: %s : %s", name, pformat(vars(self)))
251
252        if start_new:
253            self.section = SECTION_DEFAULT
254            self._contents = []
255
256python_warning = False
257
258class KernelDoc:
259    """
260    Read a C language source or header FILE and extract embedded
261    documentation comments.
262    """
263
264    #: Name of context section.
265    section_context = "Context"
266
267    #: Name of return section.
268    section_return = "Return"
269
270    #: String to write when a parameter is not described.
271    undescribed = "-- undescribed --"
272
273    def __init__(self, config, fname, xforms):
274        """Initialize internal variables"""
275
276        self.fname = fname
277        self.config = config
278        self.xforms = xforms
279
280        # Initial state for the state machines
281        self.state = state.NORMAL
282
283        # Store entry currently being processed
284        self.entry = None
285
286        # Place all potential outputs into an array
287        self.entries = []
288
289        #
290        # We need Python 3.7 for its "dicts remember the insertion
291        # order" guarantee
292        #
293        global python_warning
294        if (not python_warning and
295            sys.version_info.major == 3 and sys.version_info.minor < 7):
296
297            self.emit_msg(0,
298                          'Python 3.7 or later is required for correct results')
299            python_warning = True
300
301    def emit_msg(self, ln, msg, *, warning=True):
302        """Emit a message"""
303
304        if self.entry:
305            self.entry.emit_msg(ln, msg, warning=warning)
306            return
307
308        log_msg = f"{self.fname}:{ln} {msg}"
309
310        if warning:
311            self.config.log.warning(log_msg)
312        else:
313            self.config.log.info(log_msg)
314
315    def dump_section(self, start_new=True):
316        """
317        Dump section contents to arrays/hashes intended for that purpose.
318        """
319
320        if self.entry:
321            self.entry.dump_section(start_new)
322
323    # TODO: rename it to store_declaration after removal of kernel-doc.pl
324    def output_declaration(self, dtype, name, **args):
325        """
326        Store the entry into an entry array.
327
328        The actual output and output filters will be handled elsewhere.
329        """
330
331        item = KdocItem(name, self.fname, dtype,
332                        self.entry.declaration_start_line, **args)
333        item.warnings = self.entry.warnings
334
335        # Drop empty sections
336        # TODO: improve empty sections logic to emit warnings
337        sections = self.entry.sections
338        for section in ["Description", "Return"]:
339            if section in sections and not sections[section].rstrip():
340                del sections[section]
341        item.set_sections(sections, self.entry.section_start_lines)
342        item.set_params(self.entry.parameterlist, self.entry.parameterdescs,
343                        self.entry.parametertypes,
344                        self.entry.parameterdesc_start_lines)
345        self.entries.append(item)
346
347        self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args))
348
349    def emit_unused_warnings(self):
350        """
351        When the parser fails to produce a valid entry, it places some
352        warnings under `entry.warnings` that will be discarded when resetting
353        the state.
354
355        Ensure that those warnings are not lost.
356
357        .. note::
358
359              Because we are calling `config.warning()` here, those
360              warnings are not filtered by the `-W` parameters: they will all
361              be produced even when `-Wreturn`, `-Wshort-desc`, and/or
362              `-Wcontents-before-sections` are used.
363
364              Allowing those warnings to be filtered is complex, because it
365              would require storing them in a buffer and then filtering them
366              during the output step of the code, depending on the
367              selected symbols.
368        """
369        if self.entry and self.entry not in self.entries:
370            for log_msg in self.entry.warnings:
371                self.config.warning(log_msg)
372
373    def reset_state(self, ln):
374        """
375        Ancillary routine to create a new entry. It initializes all
376        variables used by the state machine.
377        """
378
379        self.emit_unused_warnings()
380
381        self.entry = KernelEntry(self.config, self.fname, ln)
382
383        # State flags
384        self.state = state.NORMAL
385
386    def push_parameter(self, ln, decl_type, param, dtype,
387                       org_arg, declaration_name):
388        """
389        Store parameters and their descriptions at self.entry.
390        """
391
392        if self.entry.anon_struct_union and dtype == "" and param == "}":
393            return  # Ignore the ending }; from anonymous struct/union
394
395        self.entry.anon_struct_union = False
396
397        param = KernRe(r'[\[\)].*').sub('', param, count=1)
398
399        #
400        # Look at various "anonymous type" cases.
401        #
402        if dtype == '':
403            if param.endswith("..."):
404                if len(param) > 3: # there is a name provided, use that
405                    param = param[:-3]
406                if not self.entry.parameterdescs.get(param):
407                    self.entry.parameterdescs[param] = "variable arguments"
408
409            elif (not param) or param == "void":
410                param = "void"
411                self.entry.parameterdescs[param] = "no arguments"
412
413            elif param in ["struct", "union"]:
414                # Handle unnamed (anonymous) union or struct
415                dtype = param
416                param = "{unnamed_" + param + "}"
417                self.entry.parameterdescs[param] = "anonymous\n"
418                self.entry.anon_struct_union = True
419
420        # Warn if parameter has no description
421        # (but ignore ones starting with # as these are not parameters
422        # but inline preprocessor statements)
423        if param not in self.entry.parameterdescs and not param.startswith("#"):
424            self.entry.parameterdescs[param] = self.undescribed
425
426            if "." not in param:
427                if decl_type == 'function':
428                    dname = f"{decl_type} parameter"
429                else:
430                    dname = f"{decl_type} member"
431
432                self.emit_msg(ln,
433                              f"{dname} '{param}' not described in '{declaration_name}'")
434
435        # Strip spaces from param so that it is one continuous string on
436        # parameterlist. This fixes a problem where check_sections()
437        # cannot find a parameter like "addr[6 + 2]" because it actually
438        # appears as "addr[6", "+", "2]" on the parameter list.
439        # However, it's better to maintain the param string unchanged for
440        # output, so just weaken the string compare in check_sections()
441        # to ignore "[blah" in a parameter string.
442
443        self.entry.parameterlist.append(param)
444        org_arg = KernRe(r'\s\s+').sub(' ', org_arg)
445        self.entry.parametertypes[param] = org_arg
446
447
448    def create_parameter_list(self, ln, decl_type, args,
449                              splitter, declaration_name):
450        """
451        Creates a list of parameters, storing them at self.entry.
452        """
453
454        # temporarily replace all commas inside function pointer definition
455        arg_expr = KernRe(r'(\([^\),]+),')
456        while arg_expr.search(args):
457            args = arg_expr.sub(r"\1#", args)
458
459        for arg in args.split(splitter):
460            # Ignore argument attributes
461            arg = KernRe(r'\sPOS0?\s').sub(' ', arg)
462
463            # Strip leading/trailing spaces
464            arg = arg.strip()
465            arg = KernRe(r'\s+').sub(' ', arg, count=1)
466
467            if arg.startswith('#'):
468                # Treat preprocessor directive as a typeless variable just to fill
469                # corresponding data structures "correctly". Catch it later in
470                # output_* subs.
471
472                # Treat preprocessor directive as a typeless variable
473                self.push_parameter(ln, decl_type, arg, "",
474                                    "", declaration_name)
475            #
476            # The pointer-to-function case.
477            #
478            elif KernRe(r'\(.+\)\s*\(').search(arg):
479                arg = arg.replace('#', ',')
480                r = KernRe(r'[^\(]+\(\*?\s*'  # Everything up to "(*"
481                           r'([\w\[\].]*)'    # Capture the name and possible [array]
482                           r'\s*\)')	      # Make sure the trailing ")" is there
483                if r.match(arg):
484                    param = r.group(1)
485                else:
486                    self.emit_msg(ln, f"Invalid param: {arg}")
487                    param = arg
488                dtype = arg.replace(param, '')
489                self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
490            #
491            # The array-of-pointers case.  Dig the parameter name out from the middle
492            # of the declaration.
493            #
494            elif KernRe(r'\(.+\)\s*\[').search(arg):
495                r = KernRe(r'[^\(]+\(\s*\*\s*'		# Up to "(" and maybe "*"
496                           r'([\w.]*?)'			# The actual pointer name
497                           r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion]
498                if r.match(arg):
499                    param = r.group(1)
500                else:
501                    self.emit_msg(ln, f"Invalid param: {arg}")
502                    param = arg
503                dtype = arg.replace(param, '')
504                self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
505            elif arg:
506                #
507                # Clean up extraneous spaces and split the string at commas; the first
508                # element of the resulting list will also include the type information.
509                #
510                arg = KernRe(r'\s*:\s*').sub(":", arg)
511                arg = KernRe(r'\s*\[').sub('[', arg)
512                args = KernRe(r'\s*,\s*').split(arg)
513                args[0] = re.sub(r'(\*+)\s*', r' \1', args[0])
514                #
515                # args[0] has a string of "type a".  If "a" includes an [array]
516                # declaration, we want to not be fooled by any white space inside
517                # the brackets, so detect and handle that case specially.
518                #
519                r = KernRe(r'^([^[\]]*\s+)(.*)$')
520                if r.match(args[0]):
521                    args[0] = r.group(2)
522                    dtype = r.group(1)
523                else:
524                    # No space in args[0]; this seems wrong but preserves previous behavior
525                    dtype = ''
526
527                bitfield_re = KernRe(r'(.*?):(\w+)')
528                for param in args:
529                    #
530                    # For pointers, shift the star(s) from the variable name to the
531                    # type declaration.
532                    #
533                    r = KernRe(r'^(\*+)\s*(.*)')
534                    if r.match(param):
535                        self.push_parameter(ln, decl_type, r.group(2),
536                                            f"{dtype} {r.group(1)}",
537                                            arg, declaration_name)
538                    #
539                    # Perform a similar shift for bitfields.
540                    #
541                    elif bitfield_re.search(param):
542                        if dtype != "":  # Skip unnamed bit-fields
543                            self.push_parameter(ln, decl_type, bitfield_re.group(1),
544                                                f"{dtype}:{bitfield_re.group(2)}",
545                                                arg, declaration_name)
546                    else:
547                        self.push_parameter(ln, decl_type, param, dtype,
548                                            arg, declaration_name)
549
550    def check_sections(self, ln, decl_name, decl_type):
551        """
552        Check for errors inside sections, emitting warnings if not found
553        parameters are described.
554        """
555        for section in self.entry.sections:
556            if section not in self.entry.parameterlist and \
557               not known_sections.search(section):
558                if decl_type == 'function':
559                    dname = f"{decl_type} parameter"
560                else:
561                    dname = f"{decl_type} member"
562                self.emit_msg(ln,
563                              f"Excess {dname} '{section}' description in '{decl_name}'")
564
565    def check_return_section(self, ln, declaration_name, return_type):
566        """
567        If the function doesn't return void, warns about the lack of a
568        return description.
569        """
570
571        if not self.config.wreturn:
572            return
573
574        # Ignore an empty return type (It's a macro)
575        # Ignore functions with a "void" return type (but not "void *")
576        if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type):
577            return
578
579        if not self.entry.sections.get("Return", None):
580            self.emit_msg(ln,
581                          f"No description found for return value of '{declaration_name}'")
582
583    def split_struct_proto(self, proto):
584        """
585        Split apart a structure prototype; returns (struct|union, name,
586        members) or ``None``.
587        """
588
589        type_pattern = r'(struct|union)'
590        qualifiers = [
591            "__attribute__",
592            "__packed",
593            "__aligned",
594            "____cacheline_aligned_in_smp",
595            "____cacheline_aligned",
596        ]
597        definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?"
598
599        r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body)
600        if r.search(proto):
601            return (r.group(1), r.group(2), r.group(3))
602        else:
603            r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;')
604            if r.search(proto):
605                return (r.group(1), r.group(3), r.group(2))
606        return None
607
608    def rewrite_struct_members(self, members):
609        """
610        Process ``struct``/``union`` members from the most deeply nested
611        outward.
612
613        Rewrite the members of a ``struct`` or ``union`` for easier formatting
614        later on. Among other things, this function will turn a member like::
615
616          struct { inner_members; } foo;
617
618        into::
619
620          struct foo; inner_members;
621        """
622
623        #
624        # The trick is in the ``^{`` below - it prevents a match of an outer
625        # ``struct``/``union`` until the inner one has been munged
626        # (removing the ``{`` in the process).
627        #
628        struct_members = KernRe(r'(struct|union)'   # 0: declaration type
629                                r'([^\{\};]+)' 	    # 1: possible name
630                                r'(\{)'
631                                r'([^\{\}]*)'       # 3: Contents of declaration
632                                r'(\})'
633                                r'([^\{\};]*)(;)')  # 5: Remaining stuff after declaration
634        tuples = struct_members.findall(members)
635        while tuples:
636            for t in tuples:
637                newmember = ""
638                oldmember = "".join(t) # Reconstruct the original formatting
639                dtype, name, lbr, content, rbr, rest, semi = t
640                #
641                # Pass through each field name, normalizing the form and formatting.
642                #
643                for s_id in rest.split(','):
644                    s_id = s_id.strip()
645                    newmember += f"{dtype} {s_id}; "
646                    #
647                    # Remove bitfield/array/pointer info, getting the bare name.
648                    #
649                    s_id = KernRe(r'[:\[].*').sub('', s_id)
650                    s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id)
651                    #
652                    # Pass through the members of this inner structure/union.
653                    #
654                    for arg in content.split(';'):
655                        arg = arg.strip()
656                        #
657                        # Look for (type)(*name)(args) - pointer to function
658                        #
659                        r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)')
660                        if r.match(arg):
661                            dtype, name, extra = r.group(1), r.group(2), r.group(3)
662                            # Pointer-to-function
663                            if not s_id:
664                                # Anonymous struct/union
665                                newmember += f"{dtype}{name}{extra}; "
666                            else:
667                                newmember += f"{dtype}{s_id}.{name}{extra}; "
668                        #
669                        # Otherwise a non-function member.
670                        #
671                        else:
672                            #
673                            # Remove bitmap and array portions and spaces around commas
674                            #
675                            arg = KernRe(r':\s*\d+\s*').sub('', arg)
676                            arg = KernRe(r'\[.*\]').sub('', arg)
677                            arg = KernRe(r'\s*,\s*').sub(',', arg)
678                            #
679                            # Look for a normal decl - "type name[,name...]"
680                            #
681                            r = KernRe(r'(.*)\s+([\S+,]+)')
682                            if r.search(arg):
683                                for name in r.group(2).split(','):
684                                    name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name)
685                                    if not s_id:
686                                        # Anonymous struct/union
687                                        newmember += f"{r.group(1)} {name}; "
688                                    else:
689                                        newmember += f"{r.group(1)} {s_id}.{name}; "
690                            else:
691                                newmember += f"{arg}; "
692                #
693                # At the end of the s_id loop, replace the original declaration with
694                # the munged version.
695                #
696                members = members.replace(oldmember, newmember)
697            #
698            # End of the tuple loop - search again and see if there are outer members
699            # that now turn up.
700            #
701            tuples = struct_members.findall(members)
702        return members
703
704    def format_struct_decl(self, declaration):
705        """
706        Format the ``struct`` declaration into a standard form for inclusion
707        in the resulting docs.
708        """
709
710        #
711        # Insert newlines, get rid of extra spaces.
712        #
713        declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration)
714        declaration = KernRe(r'\}\s+;').sub('};', declaration)
715        #
716        # Format inline enums with each member on its own line.
717        #
718        r = KernRe(r'(enum\s+\{[^\}]+),([^\n])')
719        while r.search(declaration):
720            declaration = r.sub(r'\1,\n\2', declaration)
721        #
722        # Now go through and supply the right number of tabs
723        # for each line.
724        #
725        def_args = declaration.split('\n')
726        level = 1
727        declaration = ""
728        for clause in def_args:
729            clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1)
730            if clause:
731                if '}' in clause and level > 1:
732                    level -= 1
733                if not clause.startswith('#'):
734                    declaration += "\t" * level
735                declaration += "\t" + clause + "\n"
736                if "{" in clause and "}" not in clause:
737                    level += 1
738        return declaration
739
740
741    def dump_struct(self, ln, proto):
742        """
743        Store an entry for a ``struct`` or ``union``
744        """
745        #
746        # Do the basic parse to get the pieces of the declaration.
747        #
748        struct_parts = self.split_struct_proto(proto)
749        if not struct_parts:
750            self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
751            return
752        decl_type, declaration_name, members = struct_parts
753
754        if self.entry.identifier != declaration_name:
755            self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. "
756                          f"Prototype was for {decl_type} {declaration_name} instead\n")
757            return
758        #
759        # Go through the list of members applying all of our transformations.
760        #
761        members = trim_private_members(members)
762        members = self.xforms.apply("struct", members)
763
764        for search, sub in struct_nested_prefixes:
765            members = search.sub(search, sub, members)
766        #
767        # Deal with embedded struct and union members, and drop enums entirely.
768        #
769        declaration = members
770        members = self.rewrite_struct_members(members)
771        members = re.sub(r'(\{[^\{\}]*\})', '', members)
772        #
773        # Output the result and we are done.
774        #
775        self.create_parameter_list(ln, decl_type, members, ';',
776                                   declaration_name)
777        self.check_sections(ln, declaration_name, decl_type)
778        self.output_declaration(decl_type, declaration_name,
779                                definition=self.format_struct_decl(declaration),
780                                purpose=self.entry.declaration_purpose)
781
782    def dump_enum(self, ln, proto):
783        """
784        Store an ``enum`` inside self.entries array.
785        """
786        #
787        # Strip preprocessor directives.  Note that this depends on the
788        # trailing semicolon we added in process_proto_type().
789        #
790        proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
791        #
792        # Parse out the name and members of the enum.  Typedef form first.
793        #
794        r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')
795        if r.search(proto):
796            declaration_name = r.group(2)
797            members = trim_private_members(r.group(1))
798        #
799        # Failing that, look for a straight enum
800        #
801        else:
802            r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')
803            if r.match(proto):
804                declaration_name = r.group(1)
805                members = trim_private_members(r.group(2))
806        #
807        # OK, this isn't going to work.
808        #
809            else:
810                self.emit_msg(ln, f"{proto}: error: Cannot parse enum!")
811                return
812        #
813        # Make sure we found what we were expecting.
814        #
815        if self.entry.identifier != declaration_name:
816            if self.entry.identifier == "":
817                self.emit_msg(ln,
818                              f"{proto}: wrong kernel-doc identifier on prototype")
819            else:
820                self.emit_msg(ln,
821                              f"expecting prototype for enum {self.entry.identifier}. "
822                              f"Prototype was for enum {declaration_name} instead")
823            return
824
825        if not declaration_name:
826            declaration_name = "(anonymous)"
827        #
828        # Parse out the name of each enum member, and verify that we
829        # have a description for it.
830        #
831        member_set = set()
832        members = KernRe(r'\([^;)]*\)').sub('', members)
833        for arg in members.split(','):
834            if not arg:
835                continue
836            arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg)
837            self.entry.parameterlist.append(arg)
838            if arg not in self.entry.parameterdescs:
839                self.entry.parameterdescs[arg] = self.undescribed
840                self.emit_msg(ln,
841                              f"Enum value '{arg}' not described in enum '{declaration_name}'")
842            member_set.add(arg)
843        #
844        # Ensure that every described member actually exists in the enum.
845        #
846        for k in self.entry.parameterdescs:
847            if k not in member_set:
848                self.emit_msg(ln,
849                              f"Excess enum value '@{k}' description in '{declaration_name}'")
850
851        self.output_declaration('enum', declaration_name,
852                                purpose=self.entry.declaration_purpose)
853
854    def dump_var(self, ln, proto):
855        """
856        Store variables that are part of kAPI.
857        """
858        VAR_ATTRIBS = [
859            "extern",
860            "const",
861        ]
862        OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*"
863
864        #
865        # Store the full prototype before modifying it
866        #
867        full_proto = proto
868        declaration_name = None
869
870        #
871        # Handle macro definitions
872        #
873        macro_prefixes = [
874            KernRe(r"DEFINE_[\w_]+\s*\(([\w_]+)\)"),
875        ]
876
877        for r in macro_prefixes:
878            match = r.search(proto)
879            if match:
880                declaration_name = match.group(1)
881                break
882
883        #
884        # Drop comments and macros to have a pure C prototype
885        #
886        if not declaration_name:
887            proto = self.xforms.apply("var", proto)
888
889        proto = proto.rstrip()
890
891        #
892        # Variable name is at the end of the declaration
893        #
894
895        default_val = None
896
897        r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
898        if r.match(proto):
899            if not declaration_name:
900                declaration_name = r.group(1)
901
902            default_val = r.group(2)
903        else:
904            r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
905
906            if r.match(proto):
907                default_val = r.group(1)
908        if not declaration_name:
909           self.emit_msg(ln,f"{proto}: can't parse variable")
910           return
911
912        if default_val:
913            default_val = default_val.lstrip("=").strip()
914
915        self.output_declaration("var", declaration_name,
916                                full_proto=full_proto,
917                                default_val=default_val,
918                                purpose=self.entry.declaration_purpose)
919
920    def dump_declaration(self, ln, prototype):
921        """
922        Store a data declaration inside self.entries array.
923        """
924
925        if self.entry.decl_type == "enum":
926            self.dump_enum(ln, prototype)
927        elif self.entry.decl_type == "typedef":
928            self.dump_typedef(ln, prototype)
929        elif self.entry.decl_type in ["union", "struct"]:
930            self.dump_struct(ln, prototype)
931        elif self.entry.decl_type == "var":
932            self.dump_var(ln, prototype)
933        else:
934            # This would be a bug
935            self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}')
936
937    def dump_function(self, ln, prototype):
938        """
939        Store a function or function macro inside self.entries array.
940        """
941
942        found = func_macro = False
943        return_type = ''
944        decl_type = 'function'
945
946        #
947        # If we have a macro, remove the "#define" at the front.
948        #
949        new_proto = KernRe(r"^#\s*define\s+").sub("", prototype)
950        if new_proto != prototype:
951            prototype = new_proto
952            #
953            # Dispense with the simple "#define A B" case here; the key
954            # is the space after the name of the symbol being defined.
955            # NOTE that the seemingly misnamed "func_macro" indicates a
956            # macro *without* arguments.
957            #
958            r = KernRe(r'^(\w+)\s+')
959            if r.search(prototype):
960                return_type = ''
961                declaration_name = r.group(1)
962                func_macro = True
963                found = True
964        else:
965            #
966            # Apply the initial transformations.
967            #
968            prototype = self.xforms.apply("func", prototype)
969
970        # Yes, this truly is vile.  We are looking for:
971        # 1. Return type (may be nothing if we're looking at a macro)
972        # 2. Function name
973        # 3. Function parameters.
974        #
975        # All the while we have to watch out for function pointer parameters
976        # (which IIRC is what the two sections are for), C types (these
977        # regexps don't even start to express all the possibilities), and
978        # so on.
979        #
980        # If you mess with these regexps, it's a good idea to check that
981        # the following functions' documentation still comes out right:
982        # - parport_register_device (function pointer parameters)
983        # - atomic_set (macro)
984        # - pci_match_device, __copy_to_user (long return type)
985
986        name = r'\w+'
987        type1 = r'(?:[\w\s]+)?'
988        type2 = r'(?:[\w\s]+\*+)+'
989        #
990        # Attempt to match first on (args) with no internal parentheses; this
991        # lets us easily filter out __acquires() and other post-args stuff.  If
992        # that fails, just grab the rest of the line to the last closing
993        # parenthesis.
994        #
995        proto_args = r'\(([^\(]*|.*)\)'
996        #
997        # (Except for the simple macro case) attempt to split up the prototype
998        # in the various ways we understand.
999        #
1000        if not found:
1001            patterns = [
1002                rf'^()({name})\s*{proto_args}',
1003                rf'^({type1})\s+({name})\s*{proto_args}',
1004                rf'^({type2})\s*({name})\s*{proto_args}',
1005            ]
1006
1007            for p in patterns:
1008                r = KernRe(p)
1009                if r.match(prototype):
1010                    return_type = r.group(1)
1011                    declaration_name = r.group(2)
1012                    args = r.group(3)
1013                    self.create_parameter_list(ln, decl_type, args, ',',
1014                                               declaration_name)
1015                    found = True
1016                    break
1017        #
1018        # Parsing done; make sure that things are as we expect.
1019        #
1020        if not found:
1021            self.emit_msg(ln,
1022                          f"cannot understand function prototype: '{prototype}'")
1023            return
1024        if self.entry.identifier != declaration_name:
1025            self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). "
1026                          f"Prototype was for {declaration_name}() instead")
1027            return
1028        self.check_sections(ln, declaration_name, "function")
1029        self.check_return_section(ln, declaration_name, return_type)
1030        #
1031        # Store the result.
1032        #
1033        self.output_declaration(decl_type, declaration_name,
1034                                typedef=('typedef' in return_type),
1035                                functiontype=return_type,
1036                                purpose=self.entry.declaration_purpose,
1037                                func_macro=func_macro)
1038
1039
1040    def dump_typedef(self, ln, proto):
1041        """
1042        Store a ``typedef`` inside self.entries array.
1043        """
1044        #
1045        # We start by looking for function typedefs.
1046        #
1047        typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*'
1048        typedef_ident = r'\*?\s*(\w\S+)\s*'
1049        typedef_args = r'\s*\((.*)\);'
1050
1051        typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args)
1052        typedef2 = KernRe(typedef_type + typedef_ident + typedef_args)
1053
1054        # Parse function typedef prototypes
1055        for r in [typedef1, typedef2]:
1056            if not r.match(proto):
1057                continue
1058
1059            return_type = r.group(1).strip()
1060            declaration_name = r.group(2)
1061            args = r.group(3)
1062
1063            if self.entry.identifier != declaration_name:
1064                self.emit_msg(ln,
1065                              f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
1066                return
1067
1068            self.create_parameter_list(ln, 'function', args, ',', declaration_name)
1069
1070            self.output_declaration('function', declaration_name,
1071                                    typedef=True,
1072                                    functiontype=return_type,
1073                                    purpose=self.entry.declaration_purpose)
1074            return
1075        #
1076        # Not a function, try to parse a simple typedef.
1077        #
1078        r = KernRe(r'typedef.*\s+(\w+)\s*;')
1079        if r.match(proto):
1080            declaration_name = r.group(1)
1081
1082            if self.entry.identifier != declaration_name:
1083                self.emit_msg(ln,
1084                              f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
1085                return
1086
1087            self.output_declaration('typedef', declaration_name,
1088                                    purpose=self.entry.declaration_purpose)
1089            return
1090
1091        self.emit_msg(ln, "error: Cannot parse typedef!")
1092
1093    @staticmethod
1094    def process_export(function_set, line):
1095        """
1096        process ``EXPORT_SYMBOL*`` tags
1097
1098        This method doesn't use any variable from the class, so declare it
1099        with a staticmethod decorator.
1100        """
1101
1102        # We support documenting some exported symbols with different
1103        # names.  A horrible hack.
1104        suffixes = [ '_noprof' ]
1105
1106        # Note: it accepts only one EXPORT_SYMBOL* per line, as having
1107        # multiple export lines would violate Kernel coding style.
1108
1109        if export_symbol.search(line):
1110            symbol = export_symbol.group(2)
1111        elif export_symbol_ns.search(line):
1112            symbol = export_symbol_ns.group(2)
1113        else:
1114            return False
1115        #
1116        # Found an export, trim out any special suffixes
1117        #
1118        for suffix in suffixes:
1119            # Be backward compatible with Python < 3.9
1120            if symbol.endswith(suffix):
1121                symbol = symbol[:-len(suffix)]
1122        function_set.add(symbol)
1123        return True
1124
1125    def process_normal(self, ln, line):
1126        """
1127        STATE_NORMAL: looking for the ``/**`` to begin everything.
1128        """
1129
1130        if not doc_start.match(line):
1131            return
1132
1133        # start a new entry
1134        self.reset_state(ln)
1135
1136        # next line is always the function name
1137        self.state = state.NAME
1138
1139    def process_name(self, ln, line):
1140        """
1141        STATE_NAME: Looking for the "name - description" line
1142        """
1143        #
1144        # Check for a DOC: block and handle them specially.
1145        #
1146        if doc_block.search(line):
1147
1148            if not doc_block.group(1):
1149                self.entry.begin_section(ln, "Introduction")
1150            else:
1151                self.entry.begin_section(ln, doc_block.group(1))
1152
1153            self.entry.identifier = self.entry.section
1154            self.state = state.DOCBLOCK
1155        #
1156        # Otherwise we're looking for a normal kerneldoc declaration line.
1157        #
1158        elif doc_decl.search(line):
1159            self.entry.identifier = doc_decl.group(1)
1160
1161            # Test for data declaration
1162            if doc_begin_data.search(line):
1163                self.entry.decl_type = doc_begin_data.group(1)
1164                self.entry.identifier = doc_begin_data.group(2)
1165            #
1166            # Look for a function description
1167            #
1168            elif doc_begin_func.search(line):
1169                self.entry.identifier = doc_begin_func.group(1)
1170                self.entry.decl_type = "function"
1171            #
1172            # We struck out.
1173            #
1174            else:
1175                self.emit_msg(ln,
1176                              f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}")
1177                self.state = state.NORMAL
1178                return
1179            #
1180            # OK, set up for a new kerneldoc entry.
1181            #
1182            self.state = state.BODY
1183            self.entry.identifier = self.entry.identifier.strip(" ")
1184            # if there's no @param blocks need to set up default section here
1185            self.entry.begin_section(ln + 1)
1186            #
1187            # Find the description portion, which *should* be there but
1188            # isn't always.
1189            # (We should be able to capture this from the previous parsing - someday)
1190            #
1191            r = KernRe("[-:](.*)")
1192            if r.search(line):
1193                self.entry.declaration_purpose = trim_whitespace(r.group(1))
1194                self.state = state.DECLARATION
1195            else:
1196                self.entry.declaration_purpose = ""
1197
1198            if not self.entry.declaration_purpose and self.config.wshort_desc:
1199                self.emit_msg(ln,
1200                              f"missing initial short description on line:\n{line}")
1201
1202            if not self.entry.identifier and self.entry.decl_type != "enum":
1203                self.emit_msg(ln,
1204                              f"wrong kernel-doc identifier on line:\n{line}")
1205                self.state = state.NORMAL
1206
1207            if self.config.verbose:
1208                self.emit_msg(ln,
1209                              f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}",
1210                                  warning=False)
1211        #
1212        # Failed to find an identifier. Emit a warning
1213        #
1214        else:
1215            self.emit_msg(ln, f"Cannot find identifier on line:\n{line}")
1216
1217    def is_new_section(self, ln, line):
1218        """
1219        Helper function to determine if a new section is being started.
1220        """
1221        if doc_sect.search(line):
1222            self.state = state.BODY
1223            #
1224            # Pick out the name of our new section, tweaking it if need be.
1225            #
1226            newsection = doc_sect.group(1)
1227            if newsection.lower() == 'description':
1228                newsection = 'Description'
1229            elif newsection.lower() == 'context':
1230                newsection = 'Context'
1231                self.state = state.SPECIAL_SECTION
1232            elif newsection.lower() in ["@return", "@returns",
1233                                        "return", "returns"]:
1234                newsection = "Return"
1235                self.state = state.SPECIAL_SECTION
1236            elif newsection[0] == '@':
1237                self.state = state.SPECIAL_SECTION
1238            #
1239            # Initialize the contents, and get the new section going.
1240            #
1241            newcontents = doc_sect.group(2)
1242            if not newcontents:
1243                newcontents = ""
1244            self.dump_section()
1245            self.entry.begin_section(ln, newsection)
1246            self.entry.leading_space = None
1247
1248            self.entry.add_text(newcontents.lstrip())
1249            return True
1250        return False
1251
1252    def is_comment_end(self, ln, line):
1253        """
1254        Helper function to detect (and effect) the end of a kerneldoc comment.
1255        """
1256        if doc_end.search(line):
1257            self.dump_section()
1258
1259            # Look for doc_com + <text> + doc_end:
1260            r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/')
1261            if r.match(line):
1262                self.emit_msg(ln, f"suspicious ending line: {line}")
1263
1264            self.entry.prototype = ""
1265            self.entry.new_start_line = ln + 1
1266
1267            self.state = state.PROTO
1268            return True
1269        return False
1270
1271
1272    def process_decl(self, ln, line):
1273        """
1274        STATE_DECLARATION: We've seen the beginning of a declaration.
1275        """
1276        if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
1277            return
1278        #
1279        # Look for anything with the " * " line beginning.
1280        #
1281        if doc_content.search(line):
1282            cont = doc_content.group(1)
1283            #
1284            # A blank line means that we have moved out of the declaration
1285            # part of the comment (without any "special section" parameter
1286            # descriptions).
1287            #
1288            if cont == "":
1289                self.state = state.BODY
1290            #
1291            # Otherwise we have more of the declaration section to soak up.
1292            #
1293            else:
1294                self.entry.declaration_purpose = \
1295                    trim_whitespace(self.entry.declaration_purpose + ' ' + cont)
1296        else:
1297            # Unknown line, ignore
1298            self.emit_msg(ln, f"bad line: {line}")
1299
1300
1301    def process_special(self, ln, line):
1302        """
1303        STATE_SPECIAL_SECTION: a section ending with a blank line.
1304        """
1305        #
1306        # If we have hit a blank line (only the " * " marker), then this
1307        # section is done.
1308        #
1309        if KernRe(r"\s*\*\s*$").match(line):
1310            self.entry.begin_section(ln, dump = True)
1311            self.state = state.BODY
1312            return
1313        #
1314        # Not a blank line, look for the other ways to end the section.
1315        #
1316        if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
1317            return
1318        #
1319        # OK, we should have a continuation of the text for this section.
1320        #
1321        if doc_content.search(line):
1322            cont = doc_content.group(1)
1323            #
1324            # If the lines of text after the first in a special section have
1325            # leading white space, we need to trim it out or Sphinx will get
1326            # confused.  For the second line (the None case), see what we
1327            # find there and remember it.
1328            #
1329            if self.entry.leading_space is None:
1330                r = KernRe(r'^(\s+)')
1331                if r.match(cont):
1332                    self.entry.leading_space = len(r.group(1))
1333                else:
1334                    self.entry.leading_space = 0
1335            #
1336            # Otherwise, before trimming any leading chars, be *sure*
1337            # that they are white space.  We should maybe warn if this
1338            # isn't the case.
1339            #
1340            for i in range(0, self.entry.leading_space):
1341                if cont[i] != " ":
1342                    self.entry.leading_space = i
1343                    break
1344            #
1345            # Add the trimmed result to the section and we're done.
1346            #
1347            self.entry.add_text(cont[self.entry.leading_space:])
1348        else:
1349            # Unknown line, ignore
1350            self.emit_msg(ln, f"bad line: {line}")
1351
1352    def process_body(self, ln, line):
1353        """
1354        STATE_BODY: the bulk of a kerneldoc comment.
1355        """
1356        if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
1357            return
1358
1359        if doc_content.search(line):
1360            cont = doc_content.group(1)
1361            self.entry.add_text(cont)
1362        else:
1363            # Unknown line, ignore
1364            self.emit_msg(ln, f"bad line: {line}")
1365
1366    def process_inline_name(self, ln, line):
1367        """STATE_INLINE_NAME: beginning of docbook comments within a prototype."""
1368
1369        if doc_inline_sect.search(line):
1370            self.entry.begin_section(ln, doc_inline_sect.group(1))
1371            self.entry.add_text(doc_inline_sect.group(2).lstrip())
1372            self.state = state.INLINE_TEXT
1373        elif doc_inline_end.search(line):
1374            self.dump_section()
1375            self.state = state.PROTO
1376        elif doc_content.search(line):
1377            self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}")
1378            self.state = state.PROTO
1379        # else ... ??
1380
1381    def process_inline_text(self, ln, line):
1382        """STATE_INLINE_TEXT: docbook comments within a prototype."""
1383
1384        if doc_inline_end.search(line):
1385            self.dump_section()
1386            self.state = state.PROTO
1387        elif doc_content.search(line):
1388            self.entry.add_text(doc_content.group(1))
1389        # else ... ??
1390
1391    def syscall_munge(self, ln, proto):         # pylint: disable=W0613
1392        """
1393        Handle syscall definitions.
1394        """
1395
1396        is_void = False
1397
1398        # Strip newlines/CR's
1399        proto = re.sub(r'[\r\n]+', ' ', proto)
1400
1401        # Check if it's a SYSCALL_DEFINE0
1402        if 'SYSCALL_DEFINE0' in proto:
1403            is_void = True
1404
1405        # Replace SYSCALL_DEFINE with correct return type & function name
1406        proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto)
1407
1408        r = KernRe(r'long\s+(sys_.*?),')
1409        if r.search(proto):
1410            proto = KernRe(',').sub('(', proto, count=1)
1411        elif is_void:
1412            proto = KernRe(r'\)').sub('(void)', proto, count=1)
1413
1414        # Now delete all of the odd-numbered commas in the proto
1415        # so that argument types & names don't have a comma between them
1416        count = 0
1417        length = len(proto)
1418
1419        if is_void:
1420            length = 0  # skip the loop if is_void
1421
1422        for ix in range(length):
1423            if proto[ix] == ',':
1424                count += 1
1425                if count % 2 == 1:
1426                    proto = proto[:ix] + ' ' + proto[ix + 1:]
1427
1428        return proto
1429
1430    def tracepoint_munge(self, ln, proto):
1431        """
1432        Handle tracepoint definitions.
1433        """
1434
1435        tracepointname = None
1436        tracepointargs = None
1437
1438        # Match tracepoint name based on different patterns
1439        r = KernRe(r'TRACE_EVENT\((.*?),')
1440        if r.search(proto):
1441            tracepointname = r.group(1)
1442
1443        r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),')
1444        if r.search(proto):
1445            tracepointname = r.group(1)
1446
1447        r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),')
1448        if r.search(proto):
1449            tracepointname = r.group(2)
1450
1451        if tracepointname:
1452            tracepointname = tracepointname.lstrip()
1453
1454        r = KernRe(r'TP_PROTO\((.*?)\)')
1455        if r.search(proto):
1456            tracepointargs = r.group(1)
1457
1458        if not tracepointname or not tracepointargs:
1459            self.emit_msg(ln,
1460                          f"Unrecognized tracepoint format:\n{proto}\n")
1461        else:
1462            proto = f"static inline void trace_{tracepointname}({tracepointargs})"
1463            self.entry.identifier = f"trace_{self.entry.identifier}"
1464
1465        return proto
1466
1467    def process_proto_function(self, ln, line):
1468        """Ancillary routine to process a function prototype."""
1469
1470        # strip C99-style comments to end of line
1471        line = KernRe(r"//.*$", re.S).sub('', line)
1472        #
1473        # Soak up the line's worth of prototype text, stopping at { or ; if present.
1474        #
1475        if KernRe(r'\s*#\s*define').match(line):
1476            self.entry.prototype = line
1477        elif not line.startswith('#'):   # skip other preprocessor stuff
1478            r = KernRe(r'([^\{]*)')
1479            if r.match(line):
1480                self.entry.prototype += r.group(1) + " "
1481        #
1482        # If we now have the whole prototype, clean it up and declare victory.
1483        #
1484        if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line):
1485            # strip comments and surrounding spaces
1486            self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip()
1487            #
1488            # Handle self.entry.prototypes for function pointers like:
1489            #       int (*pcs_config)(struct foo)
1490            # by turning it into
1491            #	    int pcs_config(struct foo)
1492            #
1493            r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)')
1494            self.entry.prototype = r.sub(r'\1\2', self.entry.prototype)
1495            #
1496            # Handle special declaration syntaxes
1497            #
1498            if 'SYSCALL_DEFINE' in self.entry.prototype:
1499                self.entry.prototype = self.syscall_munge(ln,
1500                                                          self.entry.prototype)
1501            else:
1502                r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT')
1503                if r.search(self.entry.prototype):
1504                    self.entry.prototype = self.tracepoint_munge(ln,
1505                                                                 self.entry.prototype)
1506            #
1507            # ... and we're done
1508            #
1509            self.dump_function(ln, self.entry.prototype)
1510            self.reset_state(ln)
1511
1512    def process_proto_type(self, ln, line):
1513        """
1514        Ancillary routine to process a type.
1515        """
1516
1517        # Strip C99-style comments and surrounding whitespace
1518        line = KernRe(r"//.*$", re.S).sub('', line).strip()
1519        if not line:
1520            return # nothing to see here
1521
1522        # To distinguish preprocessor directive from regular declaration later.
1523        if line.startswith('#'):
1524            line += ";"
1525        #
1526        # Split the declaration on any of { } or ;, and accumulate pieces
1527        # until we hit a semicolon while not inside {brackets}
1528        #
1529        r = KernRe(r'(.*?)([{};])')
1530        for chunk in r.split(line):
1531            if chunk:  # Ignore empty matches
1532                self.entry.prototype += chunk
1533                #
1534                # This cries out for a match statement ... someday after we can
1535                # drop Python 3.9 ...
1536                #
1537                if chunk == '{':
1538                    self.entry.brcount += 1
1539                elif chunk == '}':
1540                    self.entry.brcount -= 1
1541                elif chunk == ';' and self.entry.brcount <= 0:
1542                    self.dump_declaration(ln, self.entry.prototype)
1543                    self.reset_state(ln)
1544                    return
1545        #
1546        # We hit the end of the line while still in the declaration; put
1547        # in a space to represent the newline.
1548        #
1549        self.entry.prototype += ' '
1550
1551    def process_proto(self, ln, line):
1552        """STATE_PROTO: reading a function/whatever prototype."""
1553
1554        if doc_inline_oneline.search(line):
1555            self.entry.begin_section(ln, doc_inline_oneline.group(1))
1556            self.entry.add_text(doc_inline_oneline.group(2))
1557            self.dump_section()
1558
1559        elif doc_inline_start.search(line):
1560            self.state = state.INLINE_NAME
1561
1562        elif self.entry.decl_type == 'function':
1563            self.process_proto_function(ln, line)
1564
1565        else:
1566            self.process_proto_type(ln, line)
1567
1568    def process_docblock(self, ln, line):
1569        """STATE_DOCBLOCK: within a ``DOC:`` block."""
1570
1571        if doc_end.search(line):
1572            self.dump_section()
1573            self.output_declaration("doc", self.entry.identifier)
1574            self.reset_state(ln)
1575
1576        elif doc_content.search(line):
1577            self.entry.add_text(doc_content.group(1))
1578
1579    def parse_export(self):
1580        """
1581        Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file.
1582        """
1583
1584        export_table = set()
1585
1586        try:
1587            with open(self.fname, "r", encoding="utf8",
1588                      errors="backslashreplace") as fp:
1589
1590                for line in fp:
1591                    self.process_export(export_table, line)
1592
1593        except IOError:
1594            return None
1595
1596        return export_table
1597
1598    #: The state/action table telling us which function to invoke in each state.
1599    state_actions = {
1600        state.NORMAL:			process_normal,
1601        state.NAME:			process_name,
1602        state.BODY:			process_body,
1603        state.DECLARATION:		process_decl,
1604        state.SPECIAL_SECTION:		process_special,
1605        state.INLINE_NAME:		process_inline_name,
1606        state.INLINE_TEXT:		process_inline_text,
1607        state.PROTO:			process_proto,
1608        state.DOCBLOCK:			process_docblock,
1609        }
1610
1611    def parse_kdoc(self):
1612        """
1613        Open and process each line of a C source file.
1614        The parsing is controlled via a state machine, and the line is passed
1615        to a different process function depending on the state. The process
1616        function may update the state as needed.
1617
1618        Besides parsing kernel-doc tags, it also parses export symbols.
1619        """
1620
1621        prev = ""
1622        prev_ln = None
1623        export_table = set()
1624
1625        try:
1626            with open(self.fname, "r", encoding="utf8",
1627                      errors="backslashreplace") as fp:
1628                for ln, line in enumerate(fp):
1629
1630                    line = line.expandtabs().strip("\n")
1631
1632                    # Group continuation lines on prototypes
1633                    if self.state == state.PROTO:
1634                        if line.endswith("\\"):
1635                            prev += line.rstrip("\\")
1636                            if not prev_ln:
1637                                prev_ln = ln
1638                            continue
1639
1640                        if prev:
1641                            ln = prev_ln
1642                            line = prev + line
1643                            prev = ""
1644                            prev_ln = None
1645
1646                    self.config.log.debug("%d %s: %s",
1647                                          ln, state.name[self.state],
1648                                          line)
1649
1650                    # This is an optimization over the original script.
1651                    # There, when export_file was used for the same file,
1652                    # it was read twice. Here, we use the already-existing
1653                    # loop to parse exported symbols as well.
1654                    #
1655                    if (self.state != state.NORMAL) or \
1656                       not self.process_export(export_table, line):
1657                        # Hand this line to the appropriate state handler
1658                        self.state_actions[self.state](self, ln, line)
1659
1660            self.emit_unused_warnings()
1661
1662        except OSError:
1663            self.config.log.error(f"Error: Cannot open file {self.fname}")
1664
1665        return export_table, self.entries
1666