xref: /linux/Documentation/sphinx/kernel_include.py (revision 6093a688a07da07808f0122f9aa2a3eed250d853)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3# pylint: disable=R0903, R0912, R0914, R0915, C0209,W0707
4
5
6"""
7Implementation of the ``kernel-include`` reST-directive.
8
9:copyright:  Copyright (C) 2016  Markus Heiser
10:license:    GPL Version 2, June 1991 see linux/COPYING for details.
11
12The ``kernel-include`` reST-directive is a replacement for the ``include``
13directive. The ``kernel-include`` directive expand environment variables in
14the path name and allows to include files from arbitrary locations.
15
16.. hint::
17
18    Including files from arbitrary locations (e.g. from ``/etc``) is a
19    security risk for builders. This is why the ``include`` directive from
20    docutils *prohibit* pathnames pointing to locations *above* the filesystem
21    tree where the reST document with the include directive is placed.
22
23Substrings of the form $name or ${name} are replaced by the value of
24environment variable name. Malformed variable names and references to
25non-existing variables are left unchanged.
26
27**Supported Sphinx Include Options**:
28
29:param literal:
30    If present, the included file is inserted as a literal block.
31
32:param code:
33    Specify the language for syntax highlighting (e.g., 'c', 'python').
34
35:param encoding:
36    Specify the encoding of the included file (default: 'utf-8').
37
38:param tab-width:
39    Specify the number of spaces that a tab represents.
40
41:param start-line:
42    Line number at which to start including the file (1-based).
43
44:param end-line:
45    Line number at which to stop including the file (inclusive).
46
47:param start-after:
48    Include lines after the first line matching this text.
49
50:param end-before:
51    Include lines before the first line matching this text.
52
53:param number-lines:
54    Number the included lines (integer specifies start number).
55    Only effective with 'literal' or 'code' options.
56
57:param class:
58    Specify HTML class attribute for the included content.
59
60**Kernel-specific Extensions**:
61
62:param generate-cross-refs:
63    If present, instead of directly including the file, it calls
64    ParseDataStructs() to convert C data structures into cross-references
65    that link to comprehensive documentation in other ReST files.
66
67:param exception-file:
68    (Used with generate-cross-refs)
69
70    Path to a file containing rules for handling special cases:
71    - Ignore specific C data structures
72    - Use alternative reference names
73    - Specify different reference types
74
75:param warn-broken:
76    (Used with generate-cross-refs)
77
78    Enables warnings when auto-generated cross-references don't point to
79    existing documentation targets.
80"""
81
82# ==============================================================================
83# imports
84# ==============================================================================
85
86import os.path
87import re
88import sys
89
90from docutils import io, nodes, statemachine
91from docutils.statemachine import ViewList
92from docutils.parsers.rst import Directive, directives
93from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
94
95from sphinx.util import logging
96
97srctree = os.path.abspath(os.environ["srctree"])
98sys.path.insert(0, os.path.join(srctree, "tools/docs/lib"))
99
100from parse_data_structs import ParseDataStructs
101
102__version__ = "1.0"
103logger = logging.getLogger(__name__)
104
105RE_DOMAIN_REF = re.compile(r'\\ :(ref|c:type|c:func):`([^<`]+)(?:<([^>]+)>)?`\\')
106RE_SIMPLE_REF = re.compile(r'`([^`]+)`')
107
108def ErrorString(exc):  # Shamelessly stolen from docutils
109    return f'{exc.__class__.__name}: {exc}'
110
111
112# ==============================================================================
113class KernelInclude(Directive):
114    """
115    KernelInclude (``kernel-include``) directive
116
117    Most of the stuff here came from Include directive defined at:
118        docutils/parsers/rst/directives/misc.py
119
120    Yet, overriding the class don't has any benefits: the original class
121    only have run() and argument list. Not all of them are implemented,
122    when checked against latest Sphinx version, as with time more arguments
123    were added.
124
125    So, keep its own list of supported arguments
126    """
127
128    required_arguments = 1
129    optional_arguments = 0
130    final_argument_whitespace = True
131    option_spec = {
132        'literal': directives.flag,
133        'code': directives.unchanged,
134        'encoding': directives.encoding,
135        'tab-width': int,
136        'start-line': int,
137        'end-line': int,
138        'start-after': directives.unchanged_required,
139        'end-before': directives.unchanged_required,
140        # ignored except for 'literal' or 'code':
141        'number-lines': directives.unchanged,  # integer or None
142        'class': directives.class_option,
143
144        # Arguments that aren't from Sphinx Include directive
145        'generate-cross-refs': directives.flag,
146        'warn-broken': directives.flag,
147        'toc': directives.flag,
148        'exception-file': directives.unchanged,
149    }
150
151    def read_rawtext(self, path, encoding):
152            """Read and process file content with error handling"""
153            try:
154                self.state.document.settings.record_dependencies.add(path)
155                include_file = io.FileInput(source_path=path,
156                                            encoding=encoding,
157                                            error_handler=self.state.document.settings.input_encoding_error_handler)
158            except UnicodeEncodeError:
159                raise self.severe('Problems with directive path:\n'
160                                'Cannot encode input file path "%s" '
161                                '(wrong locale?).' % path)
162            except IOError as error:
163                raise self.severe('Problems with directive path:\n%s.' % ErrorString(error))
164
165            try:
166                return include_file.read()
167            except UnicodeError as error:
168                raise self.severe('Problem with directive:\n%s' % ErrorString(error))
169
170    def apply_range(self, rawtext):
171        """
172        Handles start-line, end-line, start-after and end-before parameters
173        """
174
175        # Get to-be-included content
176        startline = self.options.get('start-line', None)
177        endline = self.options.get('end-line', None)
178        try:
179            if startline or (endline is not None):
180                lines = rawtext.splitlines()
181                rawtext = '\n'.join(lines[startline:endline])
182        except UnicodeError as error:
183            raise self.severe(f'Problem with "{self.name}" directive:\n'
184                              + io.error_string(error))
185        # start-after/end-before: no restrictions on newlines in match-text,
186        # and no restrictions on matching inside lines vs. line boundaries
187        after_text = self.options.get("start-after", None)
188        if after_text:
189            # skip content in rawtext before *and incl.* a matching text
190            after_index = rawtext.find(after_text)
191            if after_index < 0:
192                raise self.severe('Problem with "start-after" option of "%s" '
193                                  "directive:\nText not found." % self.name)
194            rawtext = rawtext[after_index + len(after_text) :]
195        before_text = self.options.get("end-before", None)
196        if before_text:
197            # skip content in rawtext after *and incl.* a matching text
198            before_index = rawtext.find(before_text)
199            if before_index < 0:
200                raise self.severe('Problem with "end-before" option of "%s" '
201                                  "directive:\nText not found." % self.name)
202            rawtext = rawtext[:before_index]
203
204        return rawtext
205
206    def xref_text(self, env, path, tab_width):
207        """
208        Read and add contents from a C file parsed to have cross references.
209
210        There are two types of supported output here:
211        - A C source code with cross-references;
212        - a TOC table containing cross references.
213        """
214        parser = ParseDataStructs()
215        parser.parse_file(path)
216
217        if 'exception-file' in self.options:
218            source_dir = os.path.dirname(os.path.abspath(
219                self.state_machine.input_lines.source(
220                    self.lineno - self.state_machine.input_offset - 1)))
221            exceptions_file = os.path.join(source_dir, self.options['exception-file'])
222            parser.process_exceptions(exceptions_file)
223
224        # Store references on a symbol dict to be used at check time
225        if 'warn-broken' in self.options:
226            env._xref_files.add(path)
227
228        if "toc" not in self.options:
229
230            rawtext = ".. parsed-literal::\n\n" + parser.gen_output()
231            self.apply_range(rawtext)
232
233            include_lines = statemachine.string2lines(rawtext, tab_width,
234                                                      convert_whitespace=True)
235
236            # Sphinx always blame the ".. <directive>", so placing
237            # line numbers here won't make any difference
238
239            self.state_machine.insert_input(include_lines, path)
240            return []
241
242        # TOC output is a ReST file, not a literal. So, we can add line
243        # numbers
244
245        rawtext = parser.gen_toc()
246
247        include_lines = statemachine.string2lines(rawtext, tab_width,
248                                                  convert_whitespace=True)
249
250        # Append line numbers data
251
252        startline = self.options.get('start-line', None)
253
254        result = ViewList()
255        if startline and startline > 0:
256            offset = startline - 1
257        else:
258            offset = 0
259
260        for ln, line in enumerate(include_lines, start=offset):
261            result.append(line, path, ln)
262
263        self.state_machine.insert_input(result, path)
264
265        return []
266
267    def literal(self, path, tab_width, rawtext):
268        """Output a literal block"""
269
270        # Convert tabs to spaces, if `tab_width` is positive.
271        if tab_width >= 0:
272            text = rawtext.expandtabs(tab_width)
273        else:
274            text = rawtext
275        literal_block = nodes.literal_block(rawtext, source=path,
276                                            classes=self.options.get("class", []))
277        literal_block.line = 1
278        self.add_name(literal_block)
279        if "number-lines" in self.options:
280            try:
281                startline = int(self.options["number-lines"] or 1)
282            except ValueError:
283                raise self.error(":number-lines: with non-integer start value")
284            endline = startline + len(include_lines)
285            if text.endswith("\n"):
286                text = text[:-1]
287            tokens = NumberLines([([], text)], startline, endline)
288            for classes, value in tokens:
289                if classes:
290                    literal_block += nodes.inline(value, value,
291                                                    classes=classes)
292                else:
293                    literal_block += nodes.Text(value, value)
294        else:
295            literal_block += nodes.Text(text, text)
296        return [literal_block]
297
298    def code(self, path, tab_width):
299        """Output a code block"""
300
301        include_lines = statemachine.string2lines(rawtext, tab_width,
302                                                  convert_whitespace=True)
303
304        self.options["source"] = path
305        codeblock = CodeBlock(self.name,
306                                [self.options.pop("code")],  # arguments
307                                self.options,
308                                include_lines,
309                                self.lineno,
310                                self.content_offset,
311                                self.block_text,
312                                self.state,
313                                self.state_machine)
314        return codeblock.run()
315
316    def run(self):
317        """Include a file as part of the content of this reST file."""
318        env = self.state.document.settings.env
319
320        #
321        # The include logic accepts only patches relative to the
322        # Kernel source tree.  The logic does check it to prevent
323        # directory traverse issues.
324        #
325
326        srctree = os.path.abspath(os.environ["srctree"])
327
328        path = os.path.expandvars(self.arguments[0])
329        src_path = os.path.join(srctree, path)
330
331        if os.path.isfile(src_path):
332            base = srctree
333            path = src_path
334        else:
335            raise self.warning(f'File "%s" doesn\'t exist', path)
336
337        abs_base = os.path.abspath(base)
338        abs_full_path = os.path.abspath(os.path.join(base, path))
339
340        try:
341            if os.path.commonpath([abs_full_path, abs_base]) != abs_base:
342                raise self.severe('Problems with "%s" directive, prohibited path: %s' %
343                                  (self.name, path))
344        except ValueError:
345            # Paths don't have the same drive (Windows) or other incompatibility
346            raise self.severe('Problems with "%s" directive, invalid path: %s' %
347                            (self.name, path))
348
349        self.arguments[0] = path
350
351        #
352        # Add path location to Sphinx dependencies to ensure proper cache
353        # invalidation check.
354        #
355
356        env.note_dependency(os.path.abspath(path))
357
358        if not self.state.document.settings.file_insertion_enabled:
359            raise self.warning('"%s" directive disabled.' % self.name)
360        source = self.state_machine.input_lines.source(self.lineno -
361                                                       self.state_machine.input_offset - 1)
362        source_dir = os.path.dirname(os.path.abspath(source))
363        path = directives.path(self.arguments[0])
364        if path.startswith("<") and path.endswith(">"):
365            path = os.path.join(self.standard_include_path, path[1:-1])
366        path = os.path.normpath(os.path.join(source_dir, path))
367
368        # HINT: this is the only line I had to change / commented out:
369        # path = utils.relative_path(None, path)
370
371        encoding = self.options.get("encoding",
372                                    self.state.document.settings.input_encoding)
373        tab_width = self.options.get("tab-width",
374                                     self.state.document.settings.tab_width)
375
376        # Get optional arguments to related to cross-references generation
377        if "generate-cross-refs" in self.options:
378            return self.xref_text(env, path, tab_width)
379
380        rawtext = self.read_rawtext(path, encoding)
381        rawtext = self.apply_range(rawtext)
382
383        if "code" in self.options:
384            return self.code(path, tab_width, rawtext)
385
386        return self.literal(path, tab_width, rawtext)
387
388# ==============================================================================
389
390reported = set()
391
392def check_missing_refs(app, env, node, contnode):
393    """Check broken refs for the files it creates xrefs"""
394    if not node.source:
395        return None
396
397    try:
398        xref_files = env._xref_files
399    except AttributeError:
400        logger.critical("FATAL: _xref_files not initialized!")
401        raise
402
403    # Only show missing references for kernel-include reference-parsed files
404    if node.source not in xref_files:
405        return None
406
407    target = node.get('reftarget', '')
408    domain = node.get('refdomain', 'std')
409    reftype = node.get('reftype', '')
410
411    msg = f"can't link to: {domain}:{reftype}:: {target}"
412
413    # Don't duplicate warnings
414    data = (node.source, msg)
415    if data in reported:
416        return None
417    reported.add(data)
418
419    logger.warning(msg, location=node, type='ref', subtype='missing')
420
421    return None
422
423def merge_xref_info(app, env, docnames, other):
424    """
425    As each process modify env._xref_files, we need to merge them back.
426    """
427    if not hasattr(other, "_xref_files"):
428        return
429    env._xref_files.update(getattr(other, "_xref_files", set()))
430
431def init_xref_docs(app, env, docnames):
432    """Initialize a list of files that we're generating cross references¨"""
433    app.env._xref_files = set()
434
435# ==============================================================================
436
437def setup(app):
438    """Setup Sphinx exension"""
439
440    app.connect("env-before-read-docs", init_xref_docs)
441    app.connect("env-merge-info", merge_xref_info)
442    app.add_directive("kernel-include", KernelInclude)
443    app.connect("missing-reference", check_missing_refs)
444
445    return {
446        "version": __version__,
447        "parallel_read_safe": True,
448        "parallel_write_safe": True,
449    }
450