xref: /linux/Documentation/sphinx/kernel_include.py (revision f96163865a1346b199cc38e827269296f0f24ab0)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3# pylint: disable=R0903, R0912, R0914, R0915, C0209,W0707
4
5
6"""
7Implementation of the ``kernel-include`` reST-directive.
8
9:copyright:  Copyright (C) 2016  Markus Heiser
10:license:    GPL Version 2, June 1991 see linux/COPYING for details.
11
12The ``kernel-include`` reST-directive is a replacement for the ``include``
13directive. The ``kernel-include`` directive expand environment variables in
14the path name and allows to include files from arbitrary locations.
15
16.. hint::
17
18    Including files from arbitrary locations (e.g. from ``/etc``) is a
19    security risk for builders. This is why the ``include`` directive from
20    docutils *prohibit* pathnames pointing to locations *above* the filesystem
21    tree where the reST document with the include directive is placed.
22
23Substrings of the form $name or ${name} are replaced by the value of
24environment variable name. Malformed variable names and references to
25non-existing variables are left unchanged.
26
27**Supported Sphinx Include Options**:
28
29:param literal:
30    If present, the included file is inserted as a literal block.
31
32:param code:
33    Specify the language for syntax highlighting (e.g., 'c', 'python').
34
35:param encoding:
36    Specify the encoding of the included file (default: 'utf-8').
37
38:param tab-width:
39    Specify the number of spaces that a tab represents.
40
41:param start-line:
42    Line number at which to start including the file (1-based).
43
44:param end-line:
45    Line number at which to stop including the file (inclusive).
46
47:param start-after:
48    Include lines after the first line matching this text.
49
50:param end-before:
51    Include lines before the first line matching this text.
52
53:param number-lines:
54    Number the included lines (integer specifies start number).
55    Only effective with 'literal' or 'code' options.
56
57:param class:
58    Specify HTML class attribute for the included content.
59
60**Kernel-specific Extensions**:
61
62:param generate-cross-refs:
63    If present, instead of directly including the file, it calls
64    ParseDataStructs() to convert C data structures into cross-references
65    that link to comprehensive documentation in other ReST files.
66
67:param exception-file:
68    (Used with generate-cross-refs)
69
70    Path to a file containing rules for handling special cases:
71    - Ignore specific C data structures
72    - Use alternative reference names
73    - Specify different reference types
74
75:param warn-broken:
76    (Used with generate-cross-refs)
77
78    Enables warnings when auto-generated cross-references don't point to
79    existing documentation targets.
80"""
81
82# ==============================================================================
83# imports
84# ==============================================================================
85
86import os.path
87import re
88import sys
89
90from difflib import get_close_matches
91
92from docutils import io, nodes, statemachine
93from docutils.statemachine import ViewList
94from docutils.parsers.rst import Directive, directives
95from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
96
97from sphinx.util import logging
98
99srctree = os.path.abspath(os.environ["srctree"])
100sys.path.insert(0, os.path.join(srctree, "tools/lib/python"))
101
102from kdoc.parse_data_structs import ParseDataStructs
103
104__version__ = "1.0"
105logger = logging.getLogger(__name__)
106
107RE_DOMAIN_REF = re.compile(r'\\ :(ref|c:type|c:func):`([^<`]+)(?:<([^>]+)>)?`\\')
108RE_SIMPLE_REF = re.compile(r'`([^`]+)`')
109RE_LINENO_REF = re.compile(r'^\s*-\s+LINENO_(\d+):\s+(.*)')
110RE_SPLIT_DOMAIN = re.compile(r"(.*)\.(.*)")
111
112def ErrorString(exc):  # Shamelessly stolen from docutils
113    return f'{exc.__class__.__name}: {exc}'
114
115
116# ==============================================================================
117class KernelInclude(Directive):
118    """
119    KernelInclude (``kernel-include``) directive
120
121    Most of the stuff here came from Include directive defined at:
122        docutils/parsers/rst/directives/misc.py
123
124    Yet, overriding the class don't has any benefits: the original class
125    only have run() and argument list. Not all of them are implemented,
126    when checked against latest Sphinx version, as with time more arguments
127    were added.
128
129    So, keep its own list of supported arguments
130    """
131
132    required_arguments = 1
133    optional_arguments = 0
134    final_argument_whitespace = True
135    option_spec = {
136        'literal': directives.flag,
137        'code': directives.unchanged,
138        'encoding': directives.encoding,
139        'tab-width': int,
140        'start-line': int,
141        'end-line': int,
142        'start-after': directives.unchanged_required,
143        'end-before': directives.unchanged_required,
144        # ignored except for 'literal' or 'code':
145        'number-lines': directives.unchanged,  # integer or None
146        'class': directives.class_option,
147
148        # Arguments that aren't from Sphinx Include directive
149        'generate-cross-refs': directives.flag,
150        'warn-broken': directives.flag,
151        'toc': directives.flag,
152        'exception-file': directives.unchanged,
153    }
154
155    def read_rawtext(self, path, encoding):
156            """Read and process file content with error handling"""
157            try:
158                self.state.document.settings.record_dependencies.add(path)
159                include_file = io.FileInput(source_path=path,
160                                            encoding=encoding,
161                                            error_handler=self.state.document.settings.input_encoding_error_handler)
162            except UnicodeEncodeError:
163                raise self.severe('Problems with directive path:\n'
164                                'Cannot encode input file path "%s" '
165                                '(wrong locale?).' % path)
166            except IOError as error:
167                raise self.severe('Problems with directive path:\n%s.' % ErrorString(error))
168
169            try:
170                return include_file.read()
171            except UnicodeError as error:
172                raise self.severe('Problem with directive:\n%s' % ErrorString(error))
173
174    def apply_range(self, rawtext):
175        """
176        Handles start-line, end-line, start-after and end-before parameters
177        """
178
179        # Get to-be-included content
180        startline = self.options.get('start-line', None)
181        endline = self.options.get('end-line', None)
182        try:
183            if startline or (endline is not None):
184                lines = rawtext.splitlines()
185                rawtext = '\n'.join(lines[startline:endline])
186        except UnicodeError as error:
187            raise self.severe(f'Problem with "{self.name}" directive:\n'
188                              + io.error_string(error))
189        # start-after/end-before: no restrictions on newlines in match-text,
190        # and no restrictions on matching inside lines vs. line boundaries
191        after_text = self.options.get("start-after", None)
192        if after_text:
193            # skip content in rawtext before *and incl.* a matching text
194            after_index = rawtext.find(after_text)
195            if after_index < 0:
196                raise self.severe('Problem with "start-after" option of "%s" '
197                                  "directive:\nText not found." % self.name)
198            rawtext = rawtext[after_index + len(after_text) :]
199        before_text = self.options.get("end-before", None)
200        if before_text:
201            # skip content in rawtext after *and incl.* a matching text
202            before_index = rawtext.find(before_text)
203            if before_index < 0:
204                raise self.severe('Problem with "end-before" option of "%s" '
205                                  "directive:\nText not found." % self.name)
206            rawtext = rawtext[:before_index]
207
208        return rawtext
209
210    def xref_text(self, env, path, tab_width):
211        """
212        Read and add contents from a C file parsed to have cross references.
213
214        There are two types of supported output here:
215        - A C source code with cross-references;
216        - a TOC table containing cross references.
217        """
218        parser = ParseDataStructs()
219
220        if 'exception-file' in self.options:
221            source_dir = os.path.dirname(os.path.abspath(
222                self.state_machine.input_lines.source(
223                    self.lineno - self.state_machine.input_offset - 1)))
224            exceptions_file = os.path.join(source_dir, self.options['exception-file'])
225        else:
226            exceptions_file = None
227
228        parser.parse_file(path, exceptions_file)
229
230        # Store references on a symbol dict to be used at check time
231        if 'warn-broken' in self.options:
232            env._xref_files.add(path)
233
234        if "toc" not in self.options:
235
236            rawtext = ".. parsed-literal::\n\n" + parser.gen_output()
237            self.apply_range(rawtext)
238
239            include_lines = statemachine.string2lines(rawtext, tab_width,
240                                                      convert_whitespace=True)
241
242            # Sphinx always blame the ".. <directive>", so placing
243            # line numbers here won't make any difference
244
245            self.state_machine.insert_input(include_lines, path)
246            return []
247
248        # TOC output is a ReST file, not a literal. So, we can add line
249        # numbers
250
251        startline = self.options.get('start-line', None)
252        endline = self.options.get('end-line', None)
253
254        relpath = os.path.relpath(path, srctree)
255
256        result = ViewList()
257        for line in parser.gen_toc().split("\n"):
258            match = RE_LINENO_REF.match(line)
259            if not match:
260                result.append(line, path)
261                continue
262
263            ln, ref = match.groups()
264            ln = int(ln)
265
266            # Filter line range if needed
267            if startline and (ln < startline):
268                continue
269
270            if endline and (ln > endline):
271                continue
272
273            # Sphinx numerates starting with zero, but text editors
274            # and other tools start from one
275            realln = ln + 1
276            result.append(f"- {ref}: {relpath}#{realln}", path, ln)
277
278        self.state_machine.insert_input(result, path)
279
280        return []
281
282    def literal(self, path, tab_width, rawtext):
283        """Output a literal block"""
284
285        # Convert tabs to spaces, if `tab_width` is positive.
286        if tab_width >= 0:
287            text = rawtext.expandtabs(tab_width)
288        else:
289            text = rawtext
290        literal_block = nodes.literal_block(rawtext, source=path,
291                                            classes=self.options.get("class", []))
292        literal_block.line = 1
293        self.add_name(literal_block)
294        if "number-lines" in self.options:
295            try:
296                startline = int(self.options["number-lines"] or 1)
297            except ValueError:
298                raise self.error(":number-lines: with non-integer start value")
299            endline = startline + len(include_lines)
300            if text.endswith("\n"):
301                text = text[:-1]
302            tokens = NumberLines([([], text)], startline, endline)
303            for classes, value in tokens:
304                if classes:
305                    literal_block += nodes.inline(value, value,
306                                                    classes=classes)
307                else:
308                    literal_block += nodes.Text(value, value)
309        else:
310            literal_block += nodes.Text(text, text)
311        return [literal_block]
312
313    def code(self, path, tab_width):
314        """Output a code block"""
315
316        include_lines = statemachine.string2lines(rawtext, tab_width,
317                                                  convert_whitespace=True)
318
319        self.options["source"] = path
320        codeblock = CodeBlock(self.name,
321                                [self.options.pop("code")],  # arguments
322                                self.options,
323                                include_lines,
324                                self.lineno,
325                                self.content_offset,
326                                self.block_text,
327                                self.state,
328                                self.state_machine)
329        return codeblock.run()
330
331    def run(self):
332        """Include a file as part of the content of this reST file."""
333        env = self.state.document.settings.env
334
335        #
336        # The include logic accepts only patches relative to the
337        # Kernel source tree.  The logic does check it to prevent
338        # directory traverse issues.
339        #
340
341        srctree = os.path.abspath(os.environ["srctree"])
342
343        path = os.path.expandvars(self.arguments[0])
344        src_path = os.path.join(srctree, path)
345
346        if os.path.isfile(src_path):
347            base = srctree
348            path = src_path
349        else:
350            raise self.warning(f'File "%s" doesn\'t exist', path)
351
352        abs_base = os.path.abspath(base)
353        abs_full_path = os.path.abspath(os.path.join(base, path))
354
355        try:
356            if os.path.commonpath([abs_full_path, abs_base]) != abs_base:
357                raise self.severe('Problems with "%s" directive, prohibited path: %s' %
358                                  (self.name, path))
359        except ValueError:
360            # Paths don't have the same drive (Windows) or other incompatibility
361            raise self.severe('Problems with "%s" directive, invalid path: %s' %
362                            (self.name, path))
363
364        self.arguments[0] = path
365
366        #
367        # Add path location to Sphinx dependencies to ensure proper cache
368        # invalidation check.
369        #
370
371        env.note_dependency(os.path.abspath(path))
372
373        if not self.state.document.settings.file_insertion_enabled:
374            raise self.warning('"%s" directive disabled.' % self.name)
375        source = self.state_machine.input_lines.source(self.lineno -
376                                                       self.state_machine.input_offset - 1)
377        source_dir = os.path.dirname(os.path.abspath(source))
378        path = directives.path(self.arguments[0])
379        if path.startswith("<") and path.endswith(">"):
380            path = os.path.join(self.standard_include_path, path[1:-1])
381        path = os.path.normpath(os.path.join(source_dir, path))
382
383        # HINT: this is the only line I had to change / commented out:
384        # path = utils.relative_path(None, path)
385
386        encoding = self.options.get("encoding",
387                                    self.state.document.settings.input_encoding)
388        tab_width = self.options.get("tab-width",
389                                     self.state.document.settings.tab_width)
390
391        # Get optional arguments to related to cross-references generation
392        if "generate-cross-refs" in self.options:
393            return self.xref_text(env, path, tab_width)
394
395        rawtext = self.read_rawtext(path, encoding)
396        rawtext = self.apply_range(rawtext)
397
398        if "code" in self.options:
399            return self.code(path, tab_width, rawtext)
400
401        return self.literal(path, tab_width, rawtext)
402
403# ==============================================================================
404
405reported = set()
406DOMAIN_INFO = {}
407all_refs = {}
408
409def fill_domain_info(env):
410    """
411    Get supported reference types for each Sphinx domain and C namespaces
412    """
413    if DOMAIN_INFO:
414        return
415
416    for domain_name, domain_instance in env.domains.items():
417        try:
418            object_types = list(domain_instance.object_types.keys())
419            DOMAIN_INFO[domain_name] = object_types
420        except AttributeError:
421            # Ignore domains that we can't retrieve object types, if any
422            pass
423
424    for domain in DOMAIN_INFO.keys():
425        domain_obj = env.get_domain(domain)
426        for name, dispname, objtype, docname, anchor, priority in domain_obj.get_objects():
427            ref_name = name.lower()
428
429            if domain == "c":
430                if '.' in ref_name:
431                    ref_name = ref_name.split(".")[-1]
432
433            if not ref_name in all_refs:
434                all_refs[ref_name] = []
435
436            all_refs[ref_name].append(f"\t{domain}:{objtype}:`{name}` (from {docname})")
437
438def get_suggestions(app, env, node,
439                    original_target, original_domain, original_reftype):
440    """Check if target exists in the other domain or with different reftypes."""
441    original_target = original_target.lower()
442
443    # Remove namespace if present
444    if original_domain == "c":
445        if '.' in original_target:
446            original_target = original_target.split(".")[-1]
447
448    suggestions = []
449
450    # If name exists, propose exact name match on different domains
451    if original_target in all_refs:
452        return all_refs[original_target]
453
454    # If not found, get a close match, using difflib.
455    # Such method is based on Ratcliff-Obershelp Algorithm, which seeks
456    # for a close match within a certain distance. We're using the defaults
457    # here, e.g. cutoff=0.6, proposing 3 alternatives
458    matches = get_close_matches(original_target, all_refs.keys())
459    for match in matches:
460        suggestions += all_refs[match]
461
462    return suggestions
463
464def check_missing_refs(app, env, node, contnode):
465    """Check broken refs for the files it creates xrefs"""
466    if not node.source:
467        return None
468
469    try:
470        xref_files = env._xref_files
471    except AttributeError:
472        logger.critical("FATAL: _xref_files not initialized!")
473        raise
474
475    # Only show missing references for kernel-include reference-parsed files
476    if node.source not in xref_files:
477        return None
478
479    fill_domain_info(env)
480
481    target = node.get('reftarget', '')
482    domain = node.get('refdomain', 'std')
483    reftype = node.get('reftype', '')
484
485    msg = f"Invalid xref: {domain}:{reftype}:`{target}`"
486
487    # Don't duplicate warnings
488    data = (node.source, msg)
489    if data in reported:
490        return None
491    reported.add(data)
492
493    suggestions = get_suggestions(app, env, node, target, domain, reftype)
494    if suggestions:
495        msg += ". Possible alternatives:\n" + '\n'.join(suggestions)
496
497    logger.warning(msg, location=node, type='ref', subtype='missing')
498
499    return None
500
501def merge_xref_info(app, env, docnames, other):
502    """
503    As each process modify env._xref_files, we need to merge them back.
504    """
505    if not hasattr(other, "_xref_files"):
506        return
507    env._xref_files.update(getattr(other, "_xref_files", set()))
508
509def init_xref_docs(app, env, docnames):
510    """Initialize a list of files that we're generating cross references¨"""
511    app.env._xref_files = set()
512
513# ==============================================================================
514
515def setup(app):
516    """Setup Sphinx exension"""
517
518    app.connect("env-before-read-docs", init_xref_docs)
519    app.connect("env-merge-info", merge_xref_info)
520    app.add_directive("kernel-include", KernelInclude)
521    app.connect("missing-reference", check_missing_refs)
522
523    return {
524        "version": __version__,
525        "parallel_read_safe": True,
526        "parallel_write_safe": True,
527    }
528