xref: /linux/Documentation/sphinx/automarkup.py (revision c940816968da6ef9a9462b7c070cc333d609a16c)
1# SPDX-License-Identifier: GPL-2.0
2# Copyright 2019 Jonathan Corbet <corbet@lwn.net>
3#
4# Apply kernel-specific tweaks after the initial document processing
5# has been done.
6#
7from docutils import nodes
8import sphinx
9from sphinx import addnodes
10from sphinx.errors import NoUri
11import re
12from itertools import chain
13
14from kernel_abi import kernel_abi
15
16#
17# Python 2 lacks re.ASCII...
18#
19try:
20    ascii_p3 = re.ASCII
21except AttributeError:
22    ascii_p3 = 0
23
24#
25# Regex nastiness.  Of course.
26# Try to identify "function()" that's not already marked up some
27# other way.  Sphinx doesn't like a lot of stuff right after a
28# :c:func: block (i.e. ":c:func:`mmap()`s" flakes out), so the last
29# bit tries to restrict matches to things that won't create trouble.
30#
31RE_function = re.compile(r'\b(([a-zA-Z_]\w+)\(\))', flags=ascii_p3)
32
33#
34# Sphinx 2 uses the same :c:type role for struct, union, enum and typedef
35#
36RE_generic_type = re.compile(r'\b(struct|union|enum|typedef)\s+([a-zA-Z_]\w+)',
37                             flags=ascii_p3)
38
39#
40# Sphinx 3 uses a different C role for each one of struct, union, enum and
41# typedef
42#
43RE_struct = re.compile(r'\b(struct)\s+([a-zA-Z_]\w+)', flags=ascii_p3)
44RE_union = re.compile(r'\b(union)\s+([a-zA-Z_]\w+)', flags=ascii_p3)
45RE_enum = re.compile(r'\b(enum)\s+([a-zA-Z_]\w+)', flags=ascii_p3)
46RE_typedef = re.compile(r'\b(typedef)\s+([a-zA-Z_]\w+)', flags=ascii_p3)
47
48#
49# Detects a reference to a documentation page of the form Documentation/... with
50# an optional extension
51#
52RE_doc = re.compile(r'(\bDocumentation/)?((\.\./)*[\w\-/]+)\.(rst|txt)')
53RE_abi_file = re.compile(r'(\bDocumentation/ABI/[\w\-/]+)')
54RE_abi_symbol = re.compile(r'(\b/(sys|config|proc)/[\w\-/]+)')
55
56RE_namespace = re.compile(r'^\s*..\s*c:namespace::\s*(\S+)\s*$')
57
58#
59# Reserved C words that we should skip when cross-referencing
60#
61Skipnames = [ 'for', 'if', 'register', 'sizeof', 'struct', 'unsigned' ]
62
63
64#
65# Many places in the docs refer to common system calls.  It is
66# pointless to try to cross-reference them and, as has been known
67# to happen, somebody defining a function by these names can lead
68# to the creation of incorrect and confusing cross references.  So
69# just don't even try with these names.
70#
71Skipfuncs = [ 'open', 'close', 'read', 'write', 'fcntl', 'mmap',
72              'select', 'poll', 'fork', 'execve', 'clone', 'ioctl',
73              'socket' ]
74
75c_namespace = ''
76
77#
78# Detect references to commits.
79#
80RE_git = re.compile(r'commit\s+(?P<rev>[0-9a-f]{12,40})(?:\s+\(".*?"\))?',
81    flags=re.IGNORECASE | re.DOTALL)
82
83def markup_refs(docname, app, node):
84    t = node.astext()
85    done = 0
86    repl = [ ]
87    #
88    # Associate each regex with the function that will markup its matches
89    #
90    markup_func_sphinx2 = {RE_doc: markup_doc_ref,
91                           RE_abi_file: markup_abi_ref,
92                           RE_abi_symbol: markup_abi_ref,
93                           RE_function: markup_c_ref,
94                           RE_generic_type: markup_c_ref}
95
96    markup_func_sphinx3 = {RE_doc: markup_doc_ref,
97                           RE_abi_file: markup_abi_ref,
98                           RE_abi_symbol: markup_abi_ref,
99                           RE_function: markup_func_ref_sphinx3,
100                           RE_struct: markup_c_ref,
101                           RE_union: markup_c_ref,
102                           RE_enum: markup_c_ref,
103                           RE_typedef: markup_c_ref,
104                           RE_git: markup_git}
105
106    if sphinx.version_info[0] >= 3:
107        markup_func = markup_func_sphinx3
108    else:
109        markup_func = markup_func_sphinx2
110
111    match_iterators = [regex.finditer(t) for regex in markup_func]
112    #
113    # Sort all references by the starting position in text
114    #
115    sorted_matches = sorted(chain(*match_iterators), key=lambda m: m.start())
116    for m in sorted_matches:
117        #
118        # Include any text prior to match as a normal text node.
119        #
120        if m.start() > done:
121            repl.append(nodes.Text(t[done:m.start()]))
122
123        #
124        # Call the function associated with the regex that matched this text and
125        # append its return to the text
126        #
127        repl.append(markup_func[m.re](docname, app, m))
128
129        done = m.end()
130    if done < len(t):
131        repl.append(nodes.Text(t[done:]))
132    return repl
133
134#
135# Keep track of cross-reference lookups that failed so we don't have to
136# do them again.
137#
138failed_lookups = { }
139def failure_seen(target):
140    return (target) in failed_lookups
141def note_failure(target):
142    failed_lookups[target] = True
143
144#
145# In sphinx3 we can cross-reference to C macro and function, each one with its
146# own C role, but both match the same regex, so we try both.
147#
148def markup_func_ref_sphinx3(docname, app, match):
149    cdom = app.env.domains['c']
150    #
151    # Go through the dance of getting an xref out of the C domain
152    #
153    base_target = match.group(2)
154    target_text = nodes.Text(match.group(0))
155    xref = None
156    possible_targets = [base_target]
157    # Check if this document has a namespace, and if so, try
158    # cross-referencing inside it first.
159    if c_namespace:
160        possible_targets.insert(0, c_namespace + "." + base_target)
161
162    if base_target not in Skipnames:
163        for target in possible_targets:
164            if (target not in Skipfuncs) and not failure_seen(target):
165                lit_text = nodes.literal(classes=['xref', 'c', 'c-func'])
166                lit_text += target_text
167                pxref = addnodes.pending_xref('', refdomain = 'c',
168                                              reftype = 'function',
169                                              reftarget = target,
170                                              modname = None,
171                                              classname = None)
172                #
173                # XXX The Latex builder will throw NoUri exceptions here,
174                # work around that by ignoring them.
175                #
176                try:
177                    xref = cdom.resolve_xref(app.env, docname, app.builder,
178                                             'function', target, pxref,
179                                             lit_text)
180                except NoUri:
181                    xref = None
182
183                if xref:
184                    return xref
185                note_failure(target)
186
187    return target_text
188
189def markup_c_ref(docname, app, match):
190    class_str = {# Sphinx 2 only
191                 RE_function: 'c-func',
192                 RE_generic_type: 'c-type',
193                 # Sphinx 3+ only
194                 RE_struct: 'c-struct',
195                 RE_union: 'c-union',
196                 RE_enum: 'c-enum',
197                 RE_typedef: 'c-type',
198                 }
199    reftype_str = {# Sphinx 2 only
200                   RE_function: 'function',
201                   RE_generic_type: 'type',
202                   # Sphinx 3+ only
203                   RE_struct: 'struct',
204                   RE_union: 'union',
205                   RE_enum: 'enum',
206                   RE_typedef: 'type',
207                   }
208
209    cdom = app.env.domains['c']
210    #
211    # Go through the dance of getting an xref out of the C domain
212    #
213    base_target = match.group(2)
214    target_text = nodes.Text(match.group(0))
215    xref = None
216    possible_targets = [base_target]
217    # Check if this document has a namespace, and if so, try
218    # cross-referencing inside it first.
219    if c_namespace:
220        possible_targets.insert(0, c_namespace + "." + base_target)
221
222    if base_target not in Skipnames:
223        for target in possible_targets:
224            if not (match.re == RE_function and target in Skipfuncs):
225                lit_text = nodes.literal(classes=['xref', 'c', class_str[match.re]])
226                lit_text += target_text
227                pxref = addnodes.pending_xref('', refdomain = 'c',
228                                              reftype = reftype_str[match.re],
229                                              reftarget = target, modname = None,
230                                              classname = None)
231                #
232                # XXX The Latex builder will throw NoUri exceptions here,
233                # work around that by ignoring them.
234                #
235                try:
236                    xref = cdom.resolve_xref(app.env, docname, app.builder,
237                                             reftype_str[match.re], target, pxref,
238                                             lit_text)
239                except NoUri:
240                    xref = None
241
242                if xref:
243                    return xref
244
245    return target_text
246
247#
248# Try to replace a documentation reference of the form Documentation/... with a
249# cross reference to that page
250#
251def markup_doc_ref(docname, app, match):
252    stddom = app.env.domains['std']
253    #
254    # Go through the dance of getting an xref out of the std domain
255    #
256    absolute = match.group(1)
257    target = match.group(2)
258    if absolute:
259       target = "/" + target
260    xref = None
261    pxref = addnodes.pending_xref('', refdomain = 'std', reftype = 'doc',
262                                  reftarget = target, modname = None,
263                                  classname = None, refexplicit = False)
264    #
265    # XXX The Latex builder will throw NoUri exceptions here,
266    # work around that by ignoring them.
267    #
268    try:
269        xref = stddom.resolve_xref(app.env, docname, app.builder, 'doc',
270                                   target, pxref, None)
271    except NoUri:
272        xref = None
273    #
274    # Return the xref if we got it; otherwise just return the plain text.
275    #
276    if xref:
277        return xref
278    else:
279        return nodes.Text(match.group(0))
280
281#
282# Try to replace a documentation reference of the form Documentation/ABI/...
283# with a cross reference to that page
284#
285def markup_abi_ref(docname, app, match):
286    stddom = app.env.domains['std']
287    #
288    # Go through the dance of getting an xref out of the std domain
289    #
290    fname = match.group(1)
291    target = kernel_abi.xref(fname)
292
293    # Kernel ABI doesn't describe such file or symbol
294    if not target:
295        return nodes.Text(match.group(0))
296
297    pxref = addnodes.pending_xref('', refdomain = 'std', reftype = 'ref',
298                                  reftarget = target, modname = None,
299                                  classname = None, refexplicit = False)
300
301    #
302    # XXX The Latex builder will throw NoUri exceptions here,
303    # work around that by ignoring them.
304    #
305    try:
306        xref = stddom.resolve_xref(app.env, docname, app.builder, 'ref',
307                                   target, pxref, None)
308    except NoUri:
309        xref = None
310    #
311    # Return the xref if we got it; otherwise just return the plain text.
312    #
313    if xref:
314        return xref
315    else:
316        return nodes.Text(match.group(0))
317
318def get_c_namespace(app, docname):
319    source = app.env.doc2path(docname)
320    with open(source) as f:
321        for l in f:
322            match = RE_namespace.search(l)
323            if match:
324                return match.group(1)
325    return ''
326
327def markup_git(docname, app, match):
328    # While we could probably assume that we are running in a git
329    # repository, we can't know for sure, so let's just mechanically
330    # turn them into git.kernel.org links without checking their
331    # validity. (Maybe we can do something in the future to warn about
332    # these references if this is explicitly requested.)
333    text = match.group(0)
334    rev = match.group('rev')
335    return nodes.reference('', nodes.Text(text),
336        refuri=f'https://git.kernel.org/torvalds/c/{rev}')
337
338def auto_markup(app, doctree, name):
339    global c_namespace
340    c_namespace = get_c_namespace(app, name)
341    def text_but_not_a_reference(node):
342        # The nodes.literal test catches ``literal text``, its purpose is to
343        # avoid adding cross-references to functions that have been explicitly
344        # marked with cc:func:.
345        if not isinstance(node, nodes.Text) or isinstance(node.parent, nodes.literal):
346            return False
347
348        child_of_reference = False
349        parent = node.parent
350        while parent:
351            if isinstance(parent, nodes.Referential):
352                child_of_reference = True
353                break
354            parent = parent.parent
355        return not child_of_reference
356
357    #
358    # This loop could eventually be improved on.  Someday maybe we
359    # want a proper tree traversal with a lot of awareness of which
360    # kinds of nodes to prune.  But this works well for now.
361    #
362    for para in doctree.traverse(nodes.paragraph):
363        for node in para.traverse(condition=text_but_not_a_reference):
364            node.parent.replace(node, markup_refs(name, app, node))
365
366def setup(app):
367    app.connect('doctree-resolved', auto_markup)
368    return {
369        'parallel_read_safe': True,
370        'parallel_write_safe': True,
371        }
372