xref: /linux/Documentation/sphinx/kernel_include.py (revision f0eb1b4ce75f00e2711fc369cbbcc66d4e90b488)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3# pylint: disable=R0903, R0912, R0914, R0915, C0209,W0707
4
5
6"""
7Implementation of the ``kernel-include`` reST-directive.
8
9:copyright:  Copyright (C) 2016  Markus Heiser
10:license:    GPL Version 2, June 1991 see linux/COPYING for details.
11
12The ``kernel-include`` reST-directive is a replacement for the ``include``
13directive. The ``kernel-include`` directive expand environment variables in
14the path name and allows to include files from arbitrary locations.
15
16.. hint::
17
18    Including files from arbitrary locations (e.g. from ``/etc``) is a
19    security risk for builders. This is why the ``include`` directive from
20    docutils *prohibit* pathnames pointing to locations *above* the filesystem
21    tree where the reST document with the include directive is placed.
22
23Substrings of the form $name or ${name} are replaced by the value of
24environment variable name. Malformed variable names and references to
25non-existing variables are left unchanged.
26
27**Supported Sphinx Include Options**:
28
29:param literal:
30    If present, the included file is inserted as a literal block.
31
32:param code:
33    Specify the language for syntax highlighting (e.g., 'c', 'python').
34
35:param encoding:
36    Specify the encoding of the included file (default: 'utf-8').
37
38:param tab-width:
39    Specify the number of spaces that a tab represents.
40
41:param start-line:
42    Line number at which to start including the file (1-based).
43
44:param end-line:
45    Line number at which to stop including the file (inclusive).
46
47:param start-after:
48    Include lines after the first line matching this text.
49
50:param end-before:
51    Include lines before the first line matching this text.
52
53:param number-lines:
54    Number the included lines (integer specifies start number).
55    Only effective with 'literal' or 'code' options.
56
57:param class:
58    Specify HTML class attribute for the included content.
59
60**Kernel-specific Extensions**:
61
62:param generate-cross-refs:
63    If present, instead of directly including the file, it calls
64    ParseDataStructs() to convert C data structures into cross-references
65    that link to comprehensive documentation in other ReST files.
66
67:param exception-file:
68    (Used with generate-cross-refs)
69
70    Path to a file containing rules for handling special cases:
71    - Ignore specific C data structures
72    - Use alternative reference names
73    - Specify different reference types
74
75:param warn-broken:
76    (Used with generate-cross-refs)
77
78    Enables warnings when auto-generated cross-references don't point to
79    existing documentation targets.
80"""
81
82# ==============================================================================
83# imports
84# ==============================================================================
85
86import os.path
87import re
88import sys
89
90from docutils import io, nodes, statemachine
91from docutils.statemachine import ViewList
92from docutils.parsers.rst import Directive, directives
93from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
94
95from sphinx.util import logging
96
97srctree = os.path.abspath(os.environ["srctree"])
98sys.path.insert(0, os.path.join(srctree, "tools/docs/lib"))
99
100from parse_data_structs import ParseDataStructs
101
102__version__ = "1.0"
103logger = logging.getLogger(__name__)
104
105RE_DOMAIN_REF = re.compile(r'\\ :(ref|c:type|c:func):`([^<`]+)(?:<([^>]+)>)?`\\')
106RE_SIMPLE_REF = re.compile(r'`([^`]+)`')
107RE_LINENO_REF = re.compile(r'^\s*-\s+LINENO_(\d+):\s+(.*)')
108RE_SPLIT_DOMAIN = re.compile(r"(.*)\.(.*)")
109
110def ErrorString(exc):  # Shamelessly stolen from docutils
111    return f'{exc.__class__.__name}: {exc}'
112
113
114# ==============================================================================
115class KernelInclude(Directive):
116    """
117    KernelInclude (``kernel-include``) directive
118
119    Most of the stuff here came from Include directive defined at:
120        docutils/parsers/rst/directives/misc.py
121
122    Yet, overriding the class don't has any benefits: the original class
123    only have run() and argument list. Not all of them are implemented,
124    when checked against latest Sphinx version, as with time more arguments
125    were added.
126
127    So, keep its own list of supported arguments
128    """
129
130    required_arguments = 1
131    optional_arguments = 0
132    final_argument_whitespace = True
133    option_spec = {
134        'literal': directives.flag,
135        'code': directives.unchanged,
136        'encoding': directives.encoding,
137        'tab-width': int,
138        'start-line': int,
139        'end-line': int,
140        'start-after': directives.unchanged_required,
141        'end-before': directives.unchanged_required,
142        # ignored except for 'literal' or 'code':
143        'number-lines': directives.unchanged,  # integer or None
144        'class': directives.class_option,
145
146        # Arguments that aren't from Sphinx Include directive
147        'generate-cross-refs': directives.flag,
148        'warn-broken': directives.flag,
149        'toc': directives.flag,
150        'exception-file': directives.unchanged,
151    }
152
153    def read_rawtext(self, path, encoding):
154            """Read and process file content with error handling"""
155            try:
156                self.state.document.settings.record_dependencies.add(path)
157                include_file = io.FileInput(source_path=path,
158                                            encoding=encoding,
159                                            error_handler=self.state.document.settings.input_encoding_error_handler)
160            except UnicodeEncodeError:
161                raise self.severe('Problems with directive path:\n'
162                                'Cannot encode input file path "%s" '
163                                '(wrong locale?).' % path)
164            except IOError as error:
165                raise self.severe('Problems with directive path:\n%s.' % ErrorString(error))
166
167            try:
168                return include_file.read()
169            except UnicodeError as error:
170                raise self.severe('Problem with directive:\n%s' % ErrorString(error))
171
172    def apply_range(self, rawtext):
173        """
174        Handles start-line, end-line, start-after and end-before parameters
175        """
176
177        # Get to-be-included content
178        startline = self.options.get('start-line', None)
179        endline = self.options.get('end-line', None)
180        try:
181            if startline or (endline is not None):
182                lines = rawtext.splitlines()
183                rawtext = '\n'.join(lines[startline:endline])
184        except UnicodeError as error:
185            raise self.severe(f'Problem with "{self.name}" directive:\n'
186                              + io.error_string(error))
187        # start-after/end-before: no restrictions on newlines in match-text,
188        # and no restrictions on matching inside lines vs. line boundaries
189        after_text = self.options.get("start-after", None)
190        if after_text:
191            # skip content in rawtext before *and incl.* a matching text
192            after_index = rawtext.find(after_text)
193            if after_index < 0:
194                raise self.severe('Problem with "start-after" option of "%s" '
195                                  "directive:\nText not found." % self.name)
196            rawtext = rawtext[after_index + len(after_text) :]
197        before_text = self.options.get("end-before", None)
198        if before_text:
199            # skip content in rawtext after *and incl.* a matching text
200            before_index = rawtext.find(before_text)
201            if before_index < 0:
202                raise self.severe('Problem with "end-before" option of "%s" '
203                                  "directive:\nText not found." % self.name)
204            rawtext = rawtext[:before_index]
205
206        return rawtext
207
208    def xref_text(self, env, path, tab_width):
209        """
210        Read and add contents from a C file parsed to have cross references.
211
212        There are two types of supported output here:
213        - A C source code with cross-references;
214        - a TOC table containing cross references.
215        """
216        parser = ParseDataStructs()
217
218        if 'exception-file' in self.options:
219            source_dir = os.path.dirname(os.path.abspath(
220                self.state_machine.input_lines.source(
221                    self.lineno - self.state_machine.input_offset - 1)))
222            exceptions_file = os.path.join(source_dir, self.options['exception-file'])
223        else:
224            exceptions_file = None
225
226        parser.parse_file(path, exceptions_file)
227
228        # Store references on a symbol dict to be used at check time
229        if 'warn-broken' in self.options:
230            env._xref_files.add(path)
231
232        if "toc" not in self.options:
233
234            rawtext = ".. parsed-literal::\n\n" + parser.gen_output()
235            self.apply_range(rawtext)
236
237            include_lines = statemachine.string2lines(rawtext, tab_width,
238                                                      convert_whitespace=True)
239
240            # Sphinx always blame the ".. <directive>", so placing
241            # line numbers here won't make any difference
242
243            self.state_machine.insert_input(include_lines, path)
244            return []
245
246        # TOC output is a ReST file, not a literal. So, we can add line
247        # numbers
248
249        startline = self.options.get('start-line', None)
250        endline = self.options.get('end-line', None)
251
252        relpath = os.path.relpath(path, srctree)
253
254        result = ViewList()
255        for line in parser.gen_toc().split("\n"):
256            match = RE_LINENO_REF.match(line)
257            if not match:
258                result.append(line, path)
259                continue
260
261            ln, ref = match.groups()
262            ln = int(ln)
263
264            # Filter line range if needed
265            if startline and (ln < startline):
266                continue
267
268            if endline and (ln > endline):
269                continue
270
271            # Sphinx numerates starting with zero, but text editors
272            # and other tools start from one
273            realln = ln + 1
274            result.append(f"- {ref}: {relpath}#{realln}", path, ln)
275
276        self.state_machine.insert_input(result, path)
277
278        return []
279
280    def literal(self, path, tab_width, rawtext):
281        """Output a literal block"""
282
283        # Convert tabs to spaces, if `tab_width` is positive.
284        if tab_width >= 0:
285            text = rawtext.expandtabs(tab_width)
286        else:
287            text = rawtext
288        literal_block = nodes.literal_block(rawtext, source=path,
289                                            classes=self.options.get("class", []))
290        literal_block.line = 1
291        self.add_name(literal_block)
292        if "number-lines" in self.options:
293            try:
294                startline = int(self.options["number-lines"] or 1)
295            except ValueError:
296                raise self.error(":number-lines: with non-integer start value")
297            endline = startline + len(include_lines)
298            if text.endswith("\n"):
299                text = text[:-1]
300            tokens = NumberLines([([], text)], startline, endline)
301            for classes, value in tokens:
302                if classes:
303                    literal_block += nodes.inline(value, value,
304                                                    classes=classes)
305                else:
306                    literal_block += nodes.Text(value, value)
307        else:
308            literal_block += nodes.Text(text, text)
309        return [literal_block]
310
311    def code(self, path, tab_width):
312        """Output a code block"""
313
314        include_lines = statemachine.string2lines(rawtext, tab_width,
315                                                  convert_whitespace=True)
316
317        self.options["source"] = path
318        codeblock = CodeBlock(self.name,
319                                [self.options.pop("code")],  # arguments
320                                self.options,
321                                include_lines,
322                                self.lineno,
323                                self.content_offset,
324                                self.block_text,
325                                self.state,
326                                self.state_machine)
327        return codeblock.run()
328
329    def run(self):
330        """Include a file as part of the content of this reST file."""
331        env = self.state.document.settings.env
332
333        #
334        # The include logic accepts only patches relative to the
335        # Kernel source tree.  The logic does check it to prevent
336        # directory traverse issues.
337        #
338
339        srctree = os.path.abspath(os.environ["srctree"])
340
341        path = os.path.expandvars(self.arguments[0])
342        src_path = os.path.join(srctree, path)
343
344        if os.path.isfile(src_path):
345            base = srctree
346            path = src_path
347        else:
348            raise self.warning(f'File "%s" doesn\'t exist', path)
349
350        abs_base = os.path.abspath(base)
351        abs_full_path = os.path.abspath(os.path.join(base, path))
352
353        try:
354            if os.path.commonpath([abs_full_path, abs_base]) != abs_base:
355                raise self.severe('Problems with "%s" directive, prohibited path: %s' %
356                                  (self.name, path))
357        except ValueError:
358            # Paths don't have the same drive (Windows) or other incompatibility
359            raise self.severe('Problems with "%s" directive, invalid path: %s' %
360                            (self.name, path))
361
362        self.arguments[0] = path
363
364        #
365        # Add path location to Sphinx dependencies to ensure proper cache
366        # invalidation check.
367        #
368
369        env.note_dependency(os.path.abspath(path))
370
371        if not self.state.document.settings.file_insertion_enabled:
372            raise self.warning('"%s" directive disabled.' % self.name)
373        source = self.state_machine.input_lines.source(self.lineno -
374                                                       self.state_machine.input_offset - 1)
375        source_dir = os.path.dirname(os.path.abspath(source))
376        path = directives.path(self.arguments[0])
377        if path.startswith("<") and path.endswith(">"):
378            path = os.path.join(self.standard_include_path, path[1:-1])
379        path = os.path.normpath(os.path.join(source_dir, path))
380
381        # HINT: this is the only line I had to change / commented out:
382        # path = utils.relative_path(None, path)
383
384        encoding = self.options.get("encoding",
385                                    self.state.document.settings.input_encoding)
386        tab_width = self.options.get("tab-width",
387                                     self.state.document.settings.tab_width)
388
389        # Get optional arguments to related to cross-references generation
390        if "generate-cross-refs" in self.options:
391            return self.xref_text(env, path, tab_width)
392
393        rawtext = self.read_rawtext(path, encoding)
394        rawtext = self.apply_range(rawtext)
395
396        if "code" in self.options:
397            return self.code(path, tab_width, rawtext)
398
399        return self.literal(path, tab_width, rawtext)
400
401# ==============================================================================
402
403reported = set()
404
405DOMAIN_INFO = {}
406
407def fill_domain_info(env):
408    """
409    Get supported reference types for each Sphinx domain and C namespaces
410    """
411    if DOMAIN_INFO:
412        return
413
414    for domain_name, domain_instance in env.domains.items():
415        try:
416            object_types = list(domain_instance.object_types.keys())
417            DOMAIN_INFO[domain_name] = object_types
418        except AttributeError:
419            # Ignore domains that we can't retrieve object types, if any
420            pass
421
422def get_suggestions(app, env, node,
423                    original_target, original_domain, original_reftype):
424    """Check if target exists in the other domain or with different reftypes."""
425    original_target = original_target.lower()
426
427    # Remove namespace if present
428    if '.' in original_target:
429        original_target = original_target.split(".")[-1]
430
431    targets = set([
432        original_target,
433        original_target.replace("-", "_"),
434        original_target.replace("_", "-"),
435    ])
436
437    # Propose some suggestions, if possible
438    # The code below checks not only variants of the target, but also it
439    # works when .. c:namespace:: targets setting a different C namespace
440    # is in place
441
442    suggestions = []
443    for target in sorted(targets):
444        for domain in DOMAIN_INFO.keys():
445            domain_obj = env.get_domain(domain)
446            for name, dispname, objtype, docname, anchor, priority in domain_obj.get_objects():
447                lower_name = name.lower()
448
449                if domain == "c":
450                    # Check if name belongs to a different C namespace
451                    match = RE_SPLIT_DOMAIN.match(name)
452                    if match:
453                        if target != match.group(2).lower():
454                            continue
455                    else:
456                        if target !=  lower_name:
457                            continue
458                else:
459                    if target != lower_name:
460                        continue
461
462                suggestions.append(f"\t{domain}:{objtype}:`{name}` (from {docname})")
463
464    return suggestions
465
466def check_missing_refs(app, env, node, contnode):
467    """Check broken refs for the files it creates xrefs"""
468    if not node.source:
469        return None
470
471    try:
472        xref_files = env._xref_files
473    except AttributeError:
474        logger.critical("FATAL: _xref_files not initialized!")
475        raise
476
477    # Only show missing references for kernel-include reference-parsed files
478    if node.source not in xref_files:
479        return None
480
481    fill_domain_info(env)
482
483    target = node.get('reftarget', '')
484    domain = node.get('refdomain', 'std')
485    reftype = node.get('reftype', '')
486
487    msg = f"Invalid xref: {domain}:{reftype}:`{target}`"
488
489    # Don't duplicate warnings
490    data = (node.source, msg)
491    if data in reported:
492        return None
493    reported.add(data)
494
495    suggestions = get_suggestions(app, env, node, target, domain, reftype)
496    if suggestions:
497        msg += ". Possible alternatives:\n" + '\n'.join(suggestions)
498
499    logger.warning(msg, location=node, type='ref', subtype='missing')
500
501    return None
502
503def merge_xref_info(app, env, docnames, other):
504    """
505    As each process modify env._xref_files, we need to merge them back.
506    """
507    if not hasattr(other, "_xref_files"):
508        return
509    env._xref_files.update(getattr(other, "_xref_files", set()))
510
511def init_xref_docs(app, env, docnames):
512    """Initialize a list of files that we're generating cross references¨"""
513    app.env._xref_files = set()
514
515# ==============================================================================
516
517def setup(app):
518    """Setup Sphinx exension"""
519
520    app.connect("env-before-read-docs", init_xref_docs)
521    app.connect("env-merge-info", merge_xref_info)
522    app.add_directive("kernel-include", KernelInclude)
523    app.connect("missing-reference", check_missing_refs)
524
525    return {
526        "version": __version__,
527        "parallel_read_safe": True,
528        "parallel_write_safe": True,
529    }
530