xref: /linux/scripts/lib/kdoc/kdoc_output.py (revision b1cce98493a095925fb51be045ccf6e08edb4aa0)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4#
5# pylint: disable=C0301,R0902,R0911,R0912,R0913,R0914,R0915,R0917
6
7"""
8Implement output filters to print kernel-doc documentation.
9
10The implementation uses a virtual base class (OutputFormat) which
11contains a dispatches to virtual methods, and some code to filter
12out output messages.
13
14The actual implementation is done on one separate class per each type
15of output. Currently, there are output classes for ReST and man/troff.
16"""
17
18import os
19import re
20from datetime import datetime
21
22from kdoc_parser import KernelDoc, type_param
23from kdoc_re import KernRe
24
25
26function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False)
27
28# match expressions used to find embedded type information
29type_constant = KernRe(r"\b``([^\`]+)``\b", cache=False)
30type_constant2 = KernRe(r"\%([-_*\w]+)", cache=False)
31type_func = KernRe(r"(\w+)\(\)", cache=False)
32type_param_ref = KernRe(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)
33
34# Special RST handling for func ptr params
35type_fp_param = KernRe(r"\@(\w+)\(\)", cache=False)
36
37# Special RST handling for structs with func ptr params
38type_fp_param2 = KernRe(r"\@(\w+->\S+)\(\)", cache=False)
39
40type_env = KernRe(r"(\$\w+)", cache=False)
41type_enum = KernRe(r"\&(enum\s*([_\w]+))", cache=False)
42type_struct = KernRe(r"\&(struct\s*([_\w]+))", cache=False)
43type_typedef = KernRe(r"\&(typedef\s*([_\w]+))", cache=False)
44type_union = KernRe(r"\&(union\s*([_\w]+))", cache=False)
45type_member = KernRe(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False)
46type_fallback = KernRe(r"\&([_\w]+)", cache=False)
47type_member_func = type_member + KernRe(r"\(\)", cache=False)
48
49
50class OutputFormat:
51    """
52    Base class for OutputFormat. If used as-is, it means that only
53    warnings will be displayed.
54    """
55
56    # output mode.
57    OUTPUT_ALL          = 0 # output all symbols and doc sections
58    OUTPUT_INCLUDE      = 1 # output only specified symbols
59    OUTPUT_EXPORTED     = 2 # output exported symbols
60    OUTPUT_INTERNAL     = 3 # output non-exported symbols
61
62    # Virtual member to be overriden at the  inherited classes
63    highlights = []
64
65    def __init__(self):
66        """Declare internal vars and set mode to OUTPUT_ALL"""
67
68        self.out_mode = self.OUTPUT_ALL
69        self.enable_lineno = None
70        self.nosymbol = {}
71        self.symbol = None
72        self.function_table = None
73        self.config = None
74        self.no_doc_sections = False
75
76        self.data = ""
77
78    def set_config(self, config):
79        """
80        Setup global config variables used by both parser and output.
81        """
82
83        self.config = config
84
85    def set_filter(self, export, internal, symbol, nosymbol, function_table,
86                   enable_lineno, no_doc_sections):
87        """
88        Initialize filter variables according with the requested mode.
89
90        Only one choice is valid between export, internal and symbol.
91
92        The nosymbol filter can be used on all modes.
93        """
94
95        self.enable_lineno = enable_lineno
96        self.no_doc_sections = no_doc_sections
97        self.function_table = function_table
98
99        if symbol:
100            self.out_mode = self.OUTPUT_INCLUDE
101        elif export:
102            self.out_mode = self.OUTPUT_EXPORTED
103        elif internal:
104            self.out_mode = self.OUTPUT_INTERNAL
105        else:
106            self.out_mode = self.OUTPUT_ALL
107
108        if nosymbol:
109            self.nosymbol = set(nosymbol)
110
111
112    def highlight_block(self, block):
113        """
114        Apply the RST highlights to a sub-block of text.
115        """
116
117        for r, sub in self.highlights:
118            block = r.sub(sub, block)
119
120        return block
121
122    def out_warnings(self, args):
123        """
124        Output warnings for identifiers that will be displayed.
125        """
126
127        for log_msg in args.warnings:
128            self.config.warning(log_msg)
129
130    def check_doc(self, name, args):
131        """Check if DOC should be output"""
132
133        if self.no_doc_sections:
134            return False
135
136        if name in self.nosymbol:
137            return False
138
139        if self.out_mode == self.OUTPUT_ALL:
140            self.out_warnings(args)
141            return True
142
143        if self.out_mode == self.OUTPUT_INCLUDE:
144            if name in self.function_table:
145                self.out_warnings(args)
146                return True
147
148        return False
149
150    def check_declaration(self, dtype, name, args):
151        """
152        Checks if a declaration should be output or not based on the
153        filtering criteria.
154        """
155
156        if name in self.nosymbol:
157            return False
158
159        if self.out_mode == self.OUTPUT_ALL:
160            self.out_warnings(args)
161            return True
162
163        if self.out_mode in [self.OUTPUT_INCLUDE, self.OUTPUT_EXPORTED]:
164            if name in self.function_table:
165                return True
166
167        if self.out_mode == self.OUTPUT_INTERNAL:
168            if dtype != "function":
169                self.out_warnings(args)
170                return True
171
172            if name not in self.function_table:
173                self.out_warnings(args)
174                return True
175
176        return False
177
178    def msg(self, fname, name, args):
179        """
180        Handles a single entry from kernel-doc parser
181        """
182
183        self.data = ""
184
185        dtype = args.type
186
187        if dtype == "doc":
188            self.out_doc(fname, name, args)
189            return self.data
190
191        if not self.check_declaration(dtype, name, args):
192            return self.data
193
194        if dtype == "function":
195            self.out_function(fname, name, args)
196            return self.data
197
198        if dtype == "enum":
199            self.out_enum(fname, name, args)
200            return self.data
201
202        if dtype == "typedef":
203            self.out_typedef(fname, name, args)
204            return self.data
205
206        if dtype in ["struct", "union"]:
207            self.out_struct(fname, name, args)
208            return self.data
209
210        # Warn if some type requires an output logic
211        self.config.log.warning("doesn't now how to output '%s' block",
212                                dtype)
213
214        return None
215
216    # Virtual methods to be overridden by inherited classes
217    # At the base class, those do nothing.
218    def out_doc(self, fname, name, args):
219        """Outputs a DOC block"""
220
221    def out_function(self, fname, name, args):
222        """Outputs a function"""
223
224    def out_enum(self, fname, name, args):
225        """Outputs an enum"""
226
227    def out_typedef(self, fname, name, args):
228        """Outputs a typedef"""
229
230    def out_struct(self, fname, name, args):
231        """Outputs a struct"""
232
233
234class RestFormat(OutputFormat):
235    """Consts and functions used by ReST output"""
236
237    highlights = [
238        (type_constant, r"``\1``"),
239        (type_constant2, r"``\1``"),
240
241        # Note: need to escape () to avoid func matching later
242        (type_member_func, r":c:type:`\1\2\3\\(\\) <\1>`"),
243        (type_member, r":c:type:`\1\2\3 <\1>`"),
244        (type_fp_param, r"**\1\\(\\)**"),
245        (type_fp_param2, r"**\1\\(\\)**"),
246        (type_func, r"\1()"),
247        (type_enum, r":c:type:`\1 <\2>`"),
248        (type_struct, r":c:type:`\1 <\2>`"),
249        (type_typedef, r":c:type:`\1 <\2>`"),
250        (type_union, r":c:type:`\1 <\2>`"),
251
252        # in rst this can refer to any type
253        (type_fallback, r":c:type:`\1`"),
254        (type_param_ref, r"**\1\2**")
255    ]
256    blankline = "\n"
257
258    sphinx_literal = KernRe(r'^[^.].*::$', cache=False)
259    sphinx_cblock = KernRe(r'^\.\.\ +code-block::', cache=False)
260
261    def __init__(self):
262        """
263        Creates class variables.
264
265        Not really mandatory, but it is a good coding style and makes
266        pylint happy.
267        """
268
269        super().__init__()
270        self.lineprefix = ""
271
272    def print_lineno(self, ln):
273        """Outputs a line number"""
274
275        if self.enable_lineno and ln is not None:
276            ln += 1
277            self.data += f".. LINENO {ln}\n"
278
279    def output_highlight(self, args):
280        """
281        Outputs a C symbol that may require being converted to ReST using
282        the self.highlights variable
283        """
284
285        input_text = args
286        output = ""
287        in_literal = False
288        litprefix = ""
289        block = ""
290
291        for line in input_text.strip("\n").split("\n"):
292
293            # If we're in a literal block, see if we should drop out of it.
294            # Otherwise, pass the line straight through unmunged.
295            if in_literal:
296                if line.strip():  # If the line is not blank
297                    # If this is the first non-blank line in a literal block,
298                    # figure out the proper indent.
299                    if not litprefix:
300                        r = KernRe(r'^(\s*)')
301                        if r.match(line):
302                            litprefix = '^' + r.group(1)
303                        else:
304                            litprefix = ""
305
306                        output += line + "\n"
307                    elif not KernRe(litprefix).match(line):
308                        in_literal = False
309                    else:
310                        output += line + "\n"
311                else:
312                    output += line + "\n"
313
314            # Not in a literal block (or just dropped out)
315            if not in_literal:
316                block += line + "\n"
317                if self.sphinx_literal.match(line) or self.sphinx_cblock.match(line):
318                    in_literal = True
319                    litprefix = ""
320                    output += self.highlight_block(block)
321                    block = ""
322
323        # Handle any remaining block
324        if block:
325            output += self.highlight_block(block)
326
327        # Print the output with the line prefix
328        for line in output.strip("\n").split("\n"):
329            self.data += self.lineprefix + line + "\n"
330
331    def out_section(self, args, out_docblock=False):
332        """
333        Outputs a block section.
334
335        This could use some work; it's used to output the DOC: sections, and
336        starts by putting out the name of the doc section itself, but that
337        tends to duplicate a header already in the template file.
338        """
339        for section, text in args.sections.items():
340            # Skip sections that are in the nosymbol_table
341            if section in self.nosymbol:
342                continue
343
344            if out_docblock:
345                if not self.out_mode == self.OUTPUT_INCLUDE:
346                    self.data += f".. _{section}:\n\n"
347                    self.data += f'{self.lineprefix}**{section}**\n\n'
348            else:
349                self.data += f'{self.lineprefix}**{section}**\n\n'
350
351            self.print_lineno(args.section_start_lines.get(section, 0))
352            self.output_highlight(text)
353            self.data += "\n"
354        self.data += "\n"
355
356    def out_doc(self, fname, name, args):
357        if not self.check_doc(name, args):
358            return
359        self.out_section(args, out_docblock=True)
360
361    def out_function(self, fname, name, args):
362
363        oldprefix = self.lineprefix
364        signature = ""
365
366        func_macro = args.get('func_macro', False)
367        if func_macro:
368            signature = name
369        else:
370            if args.get('functiontype'):
371                signature = args['functiontype'] + " "
372            signature += name + " ("
373
374        ln = args.declaration_start_line
375        count = 0
376        for parameter in args.parameterlist:
377            if count != 0:
378                signature += ", "
379            count += 1
380            dtype = args.parametertypes.get(parameter, "")
381
382            if function_pointer.search(dtype):
383                signature += function_pointer.group(1) + parameter + function_pointer.group(3)
384            else:
385                signature += dtype
386
387        if not func_macro:
388            signature += ")"
389
390        self.print_lineno(ln)
391        if args.get('typedef') or not args.get('functiontype'):
392            self.data += f".. c:macro:: {name}\n\n"
393
394            if args.get('typedef'):
395                self.data += "   **Typedef**: "
396                self.lineprefix = ""
397                self.output_highlight(args.get('purpose', ""))
398                self.data += "\n\n**Syntax**\n\n"
399                self.data += f"  ``{signature}``\n\n"
400            else:
401                self.data += f"``{signature}``\n\n"
402        else:
403            self.data += f".. c:function:: {signature}\n\n"
404
405        if not args.get('typedef'):
406            self.print_lineno(ln)
407            self.lineprefix = "   "
408            self.output_highlight(args.get('purpose', ""))
409            self.data += "\n"
410
411        # Put descriptive text into a container (HTML <div>) to help set
412        # function prototypes apart
413        self.lineprefix = "  "
414
415        if args.parameterlist:
416            self.data += ".. container:: kernelindent\n\n"
417            self.data += f"{self.lineprefix}**Parameters**\n\n"
418
419        for parameter in args.parameterlist:
420            parameter_name = KernRe(r'\[.*').sub('', parameter)
421            dtype = args.parametertypes.get(parameter, "")
422
423            if dtype:
424                self.data += f"{self.lineprefix}``{dtype}``\n"
425            else:
426                self.data += f"{self.lineprefix}``{parameter}``\n"
427
428            self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0))
429
430            self.lineprefix = "    "
431            if parameter_name in args.parameterdescs and \
432               args.parameterdescs[parameter_name] != KernelDoc.undescribed:
433
434                self.output_highlight(args.parameterdescs[parameter_name])
435                self.data += "\n"
436            else:
437                self.data += f"{self.lineprefix}*undescribed*\n\n"
438            self.lineprefix = "  "
439
440        self.out_section(args)
441        self.lineprefix = oldprefix
442
443    def out_enum(self, fname, name, args):
444
445        oldprefix = self.lineprefix
446        ln = args.declaration_start_line
447
448        self.data += f"\n\n.. c:enum:: {name}\n\n"
449
450        self.print_lineno(ln)
451        self.lineprefix = "  "
452        self.output_highlight(args.get('purpose', ''))
453        self.data += "\n"
454
455        self.data += ".. container:: kernelindent\n\n"
456        outer = self.lineprefix + "  "
457        self.lineprefix = outer + "  "
458        self.data += f"{outer}**Constants**\n\n"
459
460        for parameter in args.parameterlist:
461            self.data += f"{outer}``{parameter}``\n"
462
463            if args.parameterdescs.get(parameter, '') != KernelDoc.undescribed:
464                self.output_highlight(args.parameterdescs[parameter])
465            else:
466                self.data += f"{self.lineprefix}*undescribed*\n\n"
467            self.data += "\n"
468
469        self.lineprefix = oldprefix
470        self.out_section(args)
471
472    def out_typedef(self, fname, name, args):
473
474        oldprefix = self.lineprefix
475        ln = args.declaration_start_line
476
477        self.data += f"\n\n.. c:type:: {name}\n\n"
478
479        self.print_lineno(ln)
480        self.lineprefix = "   "
481
482        self.output_highlight(args.get('purpose', ''))
483
484        self.data += "\n"
485
486        self.lineprefix = oldprefix
487        self.out_section(args)
488
489    def out_struct(self, fname, name, args):
490
491        purpose = args.get('purpose', "")
492        declaration = args.get('definition', "")
493        dtype = args.type
494        ln = args.declaration_start_line
495
496        self.data += f"\n\n.. c:{dtype}:: {name}\n\n"
497
498        self.print_lineno(ln)
499
500        oldprefix = self.lineprefix
501        self.lineprefix += "  "
502
503        self.output_highlight(purpose)
504        self.data += "\n"
505
506        self.data += ".. container:: kernelindent\n\n"
507        self.data += f"{self.lineprefix}**Definition**::\n\n"
508
509        self.lineprefix = self.lineprefix + "  "
510
511        declaration = declaration.replace("\t", self.lineprefix)
512
513        self.data += f"{self.lineprefix}{dtype} {name}" + ' {' + "\n"
514        self.data += f"{declaration}{self.lineprefix}" + "};\n\n"
515
516        self.lineprefix = "  "
517        self.data += f"{self.lineprefix}**Members**\n\n"
518        for parameter in args.parameterlist:
519            if not parameter or parameter.startswith("#"):
520                continue
521
522            parameter_name = parameter.split("[", maxsplit=1)[0]
523
524            if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed:
525                continue
526
527            self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0))
528
529            self.data += f"{self.lineprefix}``{parameter}``\n"
530
531            self.lineprefix = "    "
532            self.output_highlight(args.parameterdescs[parameter_name])
533            self.lineprefix = "  "
534
535            self.data += "\n"
536
537        self.data += "\n"
538
539        self.lineprefix = oldprefix
540        self.out_section(args)
541
542
543class ManFormat(OutputFormat):
544    """Consts and functions used by man pages output"""
545
546    highlights = (
547        (type_constant, r"\1"),
548        (type_constant2, r"\1"),
549        (type_func, r"\\fB\1\\fP"),
550        (type_enum, r"\\fI\1\\fP"),
551        (type_struct, r"\\fI\1\\fP"),
552        (type_typedef, r"\\fI\1\\fP"),
553        (type_union, r"\\fI\1\\fP"),
554        (type_param, r"\\fI\1\\fP"),
555        (type_param_ref, r"\\fI\1\2\\fP"),
556        (type_member, r"\\fI\1\2\3\\fP"),
557        (type_fallback, r"\\fI\1\\fP")
558    )
559    blankline = ""
560
561    date_formats = [
562        "%a %b %d %H:%M:%S %Z %Y",
563        "%a %b %d %H:%M:%S %Y",
564        "%Y-%m-%d",
565        "%b %d %Y",
566        "%B %d %Y",
567        "%m %d %Y",
568    ]
569
570    def __init__(self, modulename):
571        """
572        Creates class variables.
573
574        Not really mandatory, but it is a good coding style and makes
575        pylint happy.
576        """
577
578        super().__init__()
579        self.modulename = modulename
580
581        dt = None
582        tstamp = os.environ.get("KBUILD_BUILD_TIMESTAMP")
583        if tstamp:
584            for fmt in self.date_formats:
585                try:
586                    dt = datetime.strptime(tstamp, fmt)
587                    break
588                except ValueError:
589                    pass
590
591        if not dt:
592            dt = datetime.now()
593
594        self.man_date = dt.strftime("%B %Y")
595
596    def output_highlight(self, block):
597        """
598        Outputs a C symbol that may require being highlighted with
599        self.highlights variable using troff syntax
600        """
601
602        contents = self.highlight_block(block)
603
604        if isinstance(contents, list):
605            contents = "\n".join(contents)
606
607        for line in contents.strip("\n").split("\n"):
608            line = KernRe(r"^\s*").sub("", line)
609            if not line:
610                continue
611
612            if line[0] == ".":
613                self.data += "\\&" + line + "\n"
614            else:
615                self.data += line + "\n"
616
617    def out_doc(self, fname, name, args):
618        if not self.check_doc(name, args):
619            return
620
621        self.data += f'.TH "{self.modulename}" 9 "{self.modulename}" "{self.man_date}" "API Manual" LINUX' + "\n"
622
623        for section, text in args.sections.items():
624            self.data += f'.SH "{section}"' + "\n"
625            self.output_highlight(text)
626
627    def out_function(self, fname, name, args):
628        """output function in man"""
629
630        self.data += f'.TH "{name}" 9 "{name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n"
631
632        self.data += ".SH NAME\n"
633        self.data += f"{name} \\- {args['purpose']}\n"
634
635        self.data += ".SH SYNOPSIS\n"
636        if args.get('functiontype', ''):
637            self.data += f'.B "{args["functiontype"]}" {name}' + "\n"
638        else:
639            self.data += f'.B "{name}' + "\n"
640
641        count = 0
642        parenth = "("
643        post = ","
644
645        for parameter in args.parameterlist:
646            if count == len(args.parameterlist) - 1:
647                post = ");"
648
649            dtype = args.parametertypes.get(parameter, "")
650            if function_pointer.match(dtype):
651                # Pointer-to-function
652                self.data += f'".BI "{parenth}{function_pointer.group(1)}" " ") ({function_pointer.group(2)}){post}"' + "\n"
653            else:
654                dtype = KernRe(r'([^\*])$').sub(r'\1 ', dtype)
655
656                self.data += f'.BI "{parenth}{dtype}"  "{post}"' + "\n"
657            count += 1
658            parenth = ""
659
660        if args.parameterlist:
661            self.data += ".SH ARGUMENTS\n"
662
663        for parameter in args.parameterlist:
664            parameter_name = re.sub(r'\[.*', '', parameter)
665
666            self.data += f'.IP "{parameter}" 12' + "\n"
667            self.output_highlight(args.parameterdescs.get(parameter_name, ""))
668
669        for section, text in args.sections.items():
670            self.data += f'.SH "{section.upper()}"' + "\n"
671            self.output_highlight(text)
672
673    def out_enum(self, fname, name, args):
674        self.data += f'.TH "{self.modulename}" 9 "enum {name}" "{self.man_date}" "API Manual" LINUX' + "\n"
675
676        self.data += ".SH NAME\n"
677        self.data += f"enum {name} \\- {args['purpose']}\n"
678
679        self.data += ".SH SYNOPSIS\n"
680        self.data += f"enum {name}" + " {\n"
681
682        count = 0
683        for parameter in args.parameterlist:
684            self.data += f'.br\n.BI "    {parameter}"' + "\n"
685            if count == len(args.parameterlist) - 1:
686                self.data += "\n};\n"
687            else:
688                self.data += ", \n.br\n"
689
690            count += 1
691
692        self.data += ".SH Constants\n"
693
694        for parameter in args.parameterlist:
695            parameter_name = KernRe(r'\[.*').sub('', parameter)
696            self.data += f'.IP "{parameter}" 12' + "\n"
697            self.output_highlight(args.parameterdescs.get(parameter_name, ""))
698
699        for section, text in args.sections.items():
700            self.data += f'.SH "{section}"' + "\n"
701            self.output_highlight(text)
702
703    def out_typedef(self, fname, name, args):
704        module = self.modulename
705        purpose = args.get('purpose')
706
707        self.data += f'.TH "{module}" 9 "{name}" "{self.man_date}" "API Manual" LINUX' + "\n"
708
709        self.data += ".SH NAME\n"
710        self.data += f"typedef {name} \\- {purpose}\n"
711
712        for section, text in args.sections.items():
713            self.data += f'.SH "{section}"' + "\n"
714            self.output_highlight(text)
715
716    def out_struct(self, fname, name, args):
717        module = self.modulename
718        purpose = args.get('purpose')
719        definition = args.get('definition')
720
721        self.data += f'.TH "{module}" 9 "{args.type} {name}" "{self.man_date}" "API Manual" LINUX' + "\n"
722
723        self.data += ".SH NAME\n"
724        self.data += f"{args.type} {name} \\- {purpose}\n"
725
726        # Replace tabs with two spaces and handle newlines
727        declaration = definition.replace("\t", "  ")
728        declaration = KernRe(r"\n").sub('"\n.br\n.BI "', declaration)
729
730        self.data += ".SH SYNOPSIS\n"
731        self.data += f"{args.type} {name} " + "{" + "\n.br\n"
732        self.data += f'.BI "{declaration}\n' + "};\n.br\n\n"
733
734        self.data += ".SH Members\n"
735        for parameter in args.parameterlist:
736            if parameter.startswith("#"):
737                continue
738
739            parameter_name = re.sub(r"\[.*", "", parameter)
740
741            if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed:
742                continue
743
744            self.data += f'.IP "{parameter}" 12' + "\n"
745            self.output_highlight(args.parameterdescs.get(parameter_name))
746
747        for section, text in args.sections.items():
748            self.data += f'.SH "{section}"' + "\n"
749            self.output_highlight(text)
750