1d74b0d31SJonathan Corbet# SPDX-License-Identifier: GPL-2.0 2d74b0d31SJonathan Corbet# Copyright 2019 Jonathan Corbet <corbet@lwn.net> 3d74b0d31SJonathan Corbet# 4d74b0d31SJonathan Corbet# Apply kernel-specific tweaks after the initial document processing 5d74b0d31SJonathan Corbet# has been done. 6d74b0d31SJonathan Corbet# 7d74b0d31SJonathan Corbetfrom docutils import nodes 8bcac386fSJonathan Corbetimport sphinx 9d74b0d31SJonathan Corbetfrom sphinx import addnodes 10bcac386fSJonathan Corbetfrom sphinx.errors import NoUri 11d74b0d31SJonathan Corbetimport re 12d82b1e83SNícolas F. R. A. Pradofrom itertools import chain 13d74b0d31SJonathan Corbet 14d74b0d31SJonathan Corbet# 154f3e6906SJonathan Corbet# Python 2 lacks re.ASCII... 164f3e6906SJonathan Corbet# 174f3e6906SJonathan Corbettry: 184f3e6906SJonathan Corbet ascii_p3 = re.ASCII 194f3e6906SJonathan Corbetexcept AttributeError: 204f3e6906SJonathan Corbet ascii_p3 = 0 214f3e6906SJonathan Corbet 224f3e6906SJonathan Corbet# 23d74b0d31SJonathan Corbet# Regex nastiness. Of course. 24d74b0d31SJonathan Corbet# Try to identify "function()" that's not already marked up some 25d74b0d31SJonathan Corbet# other way. Sphinx doesn't like a lot of stuff right after a 26d74b0d31SJonathan Corbet# :c:func: block (i.e. ":c:func:`mmap()`s" flakes out), so the last 27d74b0d31SJonathan Corbet# bit tries to restrict matches to things that won't create trouble. 28d74b0d31SJonathan Corbet# 294f3e6906SJonathan CorbetRE_function = re.compile(r'\b(([a-zA-Z_]\w+)\(\))', flags=ascii_p3) 3006dc65b0SNícolas F. R. A. Prado 3106dc65b0SNícolas F. R. A. Prado# 3206dc65b0SNícolas F. R. A. Prado# Sphinx 2 uses the same :c:type role for struct, union, enum and typedef 3306dc65b0SNícolas F. R. A. Prado# 34f66e47f9SNícolas F. R. A. PradoRE_generic_type = re.compile(r'\b(struct|union|enum|typedef)\s+([a-zA-Z_]\w+)', 354f3e6906SJonathan Corbet flags=ascii_p3) 3606dc65b0SNícolas F. R. A. Prado 3706dc65b0SNícolas F. R. A. Prado# 3806dc65b0SNícolas F. R. A. Prado# Sphinx 3 uses a different C role for each one of struct, union, enum and 3906dc65b0SNícolas F. R. A. Prado# typedef 4006dc65b0SNícolas F. R. A. Prado# 414f3e6906SJonathan CorbetRE_struct = re.compile(r'\b(struct)\s+([a-zA-Z_]\w+)', flags=ascii_p3) 424f3e6906SJonathan CorbetRE_union = re.compile(r'\b(union)\s+([a-zA-Z_]\w+)', flags=ascii_p3) 434f3e6906SJonathan CorbetRE_enum = re.compile(r'\b(enum)\s+([a-zA-Z_]\w+)', flags=ascii_p3) 444f3e6906SJonathan CorbetRE_typedef = re.compile(r'\b(typedef)\s+([a-zA-Z_]\w+)', flags=ascii_p3) 4506dc65b0SNícolas F. R. A. Prado 46d18b0178SNícolas F. R. A. Prado# 47d18b0178SNícolas F. R. A. Prado# Detects a reference to a documentation page of the form Documentation/... with 48d18b0178SNícolas F. R. A. Prado# an optional extension 49d18b0178SNícolas F. R. A. Prado# 50ea1d8389SNícolas F. R. A. PradoRE_doc = re.compile(r'(\bDocumentation/)?((\.\./)*[\w\-/]+)\.(rst|txt)') 51d74b0d31SJonathan Corbet 52fb568273SNícolas F. R. A. PradoRE_namespace = re.compile(r'^\s*..\s*c:namespace::\s*(\S+)\s*$') 53fb568273SNícolas F. R. A. Prado 54d74b0d31SJonathan Corbet# 553050edfdSNícolas F. R. A. Prado# Reserved C words that we should skip when cross-referencing 563050edfdSNícolas F. R. A. Prado# 573050edfdSNícolas F. R. A. PradoSkipnames = [ 'for', 'if', 'register', 'sizeof', 'struct', 'unsigned' ] 583050edfdSNícolas F. R. A. Prado 593050edfdSNícolas F. R. A. Prado 603050edfdSNícolas F. R. A. Prado# 61d74b0d31SJonathan Corbet# Many places in the docs refer to common system calls. It is 62d74b0d31SJonathan Corbet# pointless to try to cross-reference them and, as has been known 63d74b0d31SJonathan Corbet# to happen, somebody defining a function by these names can lead 64d74b0d31SJonathan Corbet# to the creation of incorrect and confusing cross references. So 65d74b0d31SJonathan Corbet# just don't even try with these names. 66d74b0d31SJonathan Corbet# 6711fec009SJonathan NeuschäferSkipfuncs = [ 'open', 'close', 'read', 'write', 'fcntl', 'mmap', 6882bf829bSJonathan Neuschäfer 'select', 'poll', 'fork', 'execve', 'clone', 'ioctl', 6982bf829bSJonathan Neuschäfer 'socket' ] 70d74b0d31SJonathan Corbet 71fb568273SNícolas F. R. A. Pradoc_namespace = '' 72fb568273SNícolas F. R. A. Prado 73*86b17aafSVegard Nossum# 74*86b17aafSVegard Nossum# Detect references to commits. 75*86b17aafSVegard Nossum# 76*86b17aafSVegard NossumRE_git = re.compile(r'commit\s+(?P<rev>[0-9a-f]{12,40})(?:\s+\(".*?"\))?', 77*86b17aafSVegard Nossum flags=re.IGNORECASE | re.DOTALL) 78*86b17aafSVegard Nossum 791ac4cfb2SNícolas F. R. A. Pradodef markup_refs(docname, app, node): 80d74b0d31SJonathan Corbet t = node.astext() 81d74b0d31SJonathan Corbet done = 0 82d74b0d31SJonathan Corbet repl = [ ] 83d74b0d31SJonathan Corbet # 841ac4cfb2SNícolas F. R. A. Prado # Associate each regex with the function that will markup its matches 85d82b1e83SNícolas F. R. A. Prado # 8606dc65b0SNícolas F. R. A. Prado markup_func_sphinx2 = {RE_doc: markup_doc_ref, 87d18b0178SNícolas F. R. A. Prado RE_function: markup_c_ref, 8806dc65b0SNícolas F. R. A. Prado RE_generic_type: markup_c_ref} 8906dc65b0SNícolas F. R. A. Prado 9006dc65b0SNícolas F. R. A. Prado markup_func_sphinx3 = {RE_doc: markup_doc_ref, 91c51d9b04SNícolas F. R. A. Prado RE_function: markup_func_ref_sphinx3, 9206dc65b0SNícolas F. R. A. Prado RE_struct: markup_c_ref, 9306dc65b0SNícolas F. R. A. Prado RE_union: markup_c_ref, 9406dc65b0SNícolas F. R. A. Prado RE_enum: markup_c_ref, 95*86b17aafSVegard Nossum RE_typedef: markup_c_ref, 96*86b17aafSVegard Nossum RE_git: markup_git} 9706dc65b0SNícolas F. R. A. Prado 9806dc65b0SNícolas F. R. A. Prado if sphinx.version_info[0] >= 3: 9906dc65b0SNícolas F. R. A. Prado markup_func = markup_func_sphinx3 10006dc65b0SNícolas F. R. A. Prado else: 10106dc65b0SNícolas F. R. A. Prado markup_func = markup_func_sphinx2 10206dc65b0SNícolas F. R. A. Prado 1031ac4cfb2SNícolas F. R. A. Prado match_iterators = [regex.finditer(t) for regex in markup_func] 1041ac4cfb2SNícolas F. R. A. Prado # 1051ac4cfb2SNícolas F. R. A. Prado # Sort all references by the starting position in text 1061ac4cfb2SNícolas F. R. A. Prado # 1071ac4cfb2SNícolas F. R. A. Prado sorted_matches = sorted(chain(*match_iterators), key=lambda m: m.start()) 108d82b1e83SNícolas F. R. A. Prado for m in sorted_matches: 109d82b1e83SNícolas F. R. A. Prado # 110d82b1e83SNícolas F. R. A. Prado # Include any text prior to match as a normal text node. 111d74b0d31SJonathan Corbet # 112d74b0d31SJonathan Corbet if m.start() > done: 113d74b0d31SJonathan Corbet repl.append(nodes.Text(t[done:m.start()])) 1141ac4cfb2SNícolas F. R. A. Prado 1151ac4cfb2SNícolas F. R. A. Prado # 1161ac4cfb2SNícolas F. R. A. Prado # Call the function associated with the regex that matched this text and 1171ac4cfb2SNícolas F. R. A. Prado # append its return to the text 1181ac4cfb2SNícolas F. R. A. Prado # 1191ac4cfb2SNícolas F. R. A. Prado repl.append(markup_func[m.re](docname, app, m)) 1201ac4cfb2SNícolas F. R. A. Prado 1211ac4cfb2SNícolas F. R. A. Prado done = m.end() 1221ac4cfb2SNícolas F. R. A. Prado if done < len(t): 1231ac4cfb2SNícolas F. R. A. Prado repl.append(nodes.Text(t[done:])) 1241ac4cfb2SNícolas F. R. A. Prado return repl 1251ac4cfb2SNícolas F. R. A. Prado 1261ac4cfb2SNícolas F. R. A. Prado# 12726c82972SJonathan Corbet# Keep track of cross-reference lookups that failed so we don't have to 12826c82972SJonathan Corbet# do them again. 12926c82972SJonathan Corbet# 13026c82972SJonathan Corbetfailed_lookups = { } 131309027b5SJonathan Corbetdef failure_seen(target): 132309027b5SJonathan Corbet return (target) in failed_lookups 133309027b5SJonathan Corbetdef note_failure(target): 134309027b5SJonathan Corbet failed_lookups[target] = True 13526c82972SJonathan Corbet 13626c82972SJonathan Corbet# 137c51d9b04SNícolas F. R. A. Prado# In sphinx3 we can cross-reference to C macro and function, each one with its 138c51d9b04SNícolas F. R. A. Prado# own C role, but both match the same regex, so we try both. 1391ac4cfb2SNícolas F. R. A. Prado# 140c51d9b04SNícolas F. R. A. Pradodef markup_func_ref_sphinx3(docname, app, match): 141c51d9b04SNícolas F. R. A. Prado cdom = app.env.domains['c'] 142c51d9b04SNícolas F. R. A. Prado # 143c51d9b04SNícolas F. R. A. Prado # Go through the dance of getting an xref out of the C domain 144c51d9b04SNícolas F. R. A. Prado # 145fb568273SNícolas F. R. A. Prado base_target = match.group(2) 146c51d9b04SNícolas F. R. A. Prado target_text = nodes.Text(match.group(0)) 147c51d9b04SNícolas F. R. A. Prado xref = None 148fb568273SNícolas F. R. A. Prado possible_targets = [base_target] 149fb568273SNícolas F. R. A. Prado # Check if this document has a namespace, and if so, try 150fb568273SNícolas F. R. A. Prado # cross-referencing inside it first. 151fb568273SNícolas F. R. A. Prado if c_namespace: 152fb568273SNícolas F. R. A. Prado possible_targets.insert(0, c_namespace + "." + base_target) 153fb568273SNícolas F. R. A. Prado 154fb568273SNícolas F. R. A. Prado if base_target not in Skipnames: 155fb568273SNícolas F. R. A. Prado for target in possible_targets: 156309027b5SJonathan Corbet if (target not in Skipfuncs) and not failure_seen(target): 157309027b5SJonathan Corbet lit_text = nodes.literal(classes=['xref', 'c', 'c-func']) 158c51d9b04SNícolas F. R. A. Prado lit_text += target_text 159c51d9b04SNícolas F. R. A. Prado pxref = addnodes.pending_xref('', refdomain = 'c', 160309027b5SJonathan Corbet reftype = 'function', 161309027b5SJonathan Corbet reftarget = target, 162309027b5SJonathan Corbet modname = None, 163c51d9b04SNícolas F. R. A. Prado classname = None) 164c51d9b04SNícolas F. R. A. Prado # 165c51d9b04SNícolas F. R. A. Prado # XXX The Latex builder will throw NoUri exceptions here, 166c51d9b04SNícolas F. R. A. Prado # work around that by ignoring them. 167c51d9b04SNícolas F. R. A. Prado # 168c51d9b04SNícolas F. R. A. Prado try: 169c51d9b04SNícolas F. R. A. Prado xref = cdom.resolve_xref(app.env, docname, app.builder, 170309027b5SJonathan Corbet 'function', target, pxref, 171c51d9b04SNícolas F. R. A. Prado lit_text) 172c51d9b04SNícolas F. R. A. Prado except NoUri: 173c51d9b04SNícolas F. R. A. Prado xref = None 174c51d9b04SNícolas F. R. A. Prado 175c51d9b04SNícolas F. R. A. Prado if xref: 176c51d9b04SNícolas F. R. A. Prado return xref 177309027b5SJonathan Corbet note_failure(target) 178c51d9b04SNícolas F. R. A. Prado 179c51d9b04SNícolas F. R. A. Prado return target_text 180c51d9b04SNícolas F. R. A. Prado 1811ac4cfb2SNícolas F. R. A. Pradodef markup_c_ref(docname, app, match): 182c51d9b04SNícolas F. R. A. Prado class_str = {# Sphinx 2 only 183c51d9b04SNícolas F. R. A. Prado RE_function: 'c-func', 18406dc65b0SNícolas F. R. A. Prado RE_generic_type: 'c-type', 18506dc65b0SNícolas F. R. A. Prado # Sphinx 3+ only 18606dc65b0SNícolas F. R. A. Prado RE_struct: 'c-struct', 18706dc65b0SNícolas F. R. A. Prado RE_union: 'c-union', 18806dc65b0SNícolas F. R. A. Prado RE_enum: 'c-enum', 18906dc65b0SNícolas F. R. A. Prado RE_typedef: 'c-type', 19006dc65b0SNícolas F. R. A. Prado } 191c51d9b04SNícolas F. R. A. Prado reftype_str = {# Sphinx 2 only 192c51d9b04SNícolas F. R. A. Prado RE_function: 'function', 19306dc65b0SNícolas F. R. A. Prado RE_generic_type: 'type', 19406dc65b0SNícolas F. R. A. Prado # Sphinx 3+ only 19506dc65b0SNícolas F. R. A. Prado RE_struct: 'struct', 19606dc65b0SNícolas F. R. A. Prado RE_union: 'union', 19706dc65b0SNícolas F. R. A. Prado RE_enum: 'enum', 19806dc65b0SNícolas F. R. A. Prado RE_typedef: 'type', 19906dc65b0SNícolas F. R. A. Prado } 2001ac4cfb2SNícolas F. R. A. Prado 2011ac4cfb2SNícolas F. R. A. Prado cdom = app.env.domains['c'] 202d74b0d31SJonathan Corbet # 203d74b0d31SJonathan Corbet # Go through the dance of getting an xref out of the C domain 204d74b0d31SJonathan Corbet # 205fb568273SNícolas F. R. A. Prado base_target = match.group(2) 2061ac4cfb2SNícolas F. R. A. Prado target_text = nodes.Text(match.group(0)) 207d74b0d31SJonathan Corbet xref = None 208fb568273SNícolas F. R. A. Prado possible_targets = [base_target] 209fb568273SNícolas F. R. A. Prado # Check if this document has a namespace, and if so, try 210fb568273SNícolas F. R. A. Prado # cross-referencing inside it first. 211fb568273SNícolas F. R. A. Prado if c_namespace: 212fb568273SNícolas F. R. A. Prado possible_targets.insert(0, c_namespace + "." + base_target) 213fb568273SNícolas F. R. A. Prado 214fb568273SNícolas F. R. A. Prado if base_target not in Skipnames: 215fb568273SNícolas F. R. A. Prado for target in possible_targets: 216fb568273SNícolas F. R. A. Prado if not (match.re == RE_function and target in Skipfuncs): 2171ac4cfb2SNícolas F. R. A. Prado lit_text = nodes.literal(classes=['xref', 'c', class_str[match.re]]) 218d74b0d31SJonathan Corbet lit_text += target_text 219d74b0d31SJonathan Corbet pxref = addnodes.pending_xref('', refdomain = 'c', 2201ac4cfb2SNícolas F. R. A. Prado reftype = reftype_str[match.re], 221d74b0d31SJonathan Corbet reftarget = target, modname = None, 222d74b0d31SJonathan Corbet classname = None) 223454f96f2SMauro Carvalho Chehab # 224454f96f2SMauro Carvalho Chehab # XXX The Latex builder will throw NoUri exceptions here, 225454f96f2SMauro Carvalho Chehab # work around that by ignoring them. 226454f96f2SMauro Carvalho Chehab # 227454f96f2SMauro Carvalho Chehab try: 228d74b0d31SJonathan Corbet xref = cdom.resolve_xref(app.env, docname, app.builder, 2291ac4cfb2SNícolas F. R. A. Prado reftype_str[match.re], target, pxref, 230d82b1e83SNícolas F. R. A. Prado lit_text) 231454f96f2SMauro Carvalho Chehab except NoUri: 232454f96f2SMauro Carvalho Chehab xref = None 233fb568273SNícolas F. R. A. Prado 234d74b0d31SJonathan Corbet if xref: 2351ac4cfb2SNícolas F. R. A. Prado return xref 236fb568273SNícolas F. R. A. Prado 2371ac4cfb2SNícolas F. R. A. Prado return target_text 238d74b0d31SJonathan Corbet 239d18b0178SNícolas F. R. A. Prado# 240d18b0178SNícolas F. R. A. Prado# Try to replace a documentation reference of the form Documentation/... with a 241d18b0178SNícolas F. R. A. Prado# cross reference to that page 242d18b0178SNícolas F. R. A. Prado# 243d18b0178SNícolas F. R. A. Pradodef markup_doc_ref(docname, app, match): 244d18b0178SNícolas F. R. A. Prado stddom = app.env.domains['std'] 245d18b0178SNícolas F. R. A. Prado # 246d18b0178SNícolas F. R. A. Prado # Go through the dance of getting an xref out of the std domain 247d18b0178SNícolas F. R. A. Prado # 248ea1d8389SNícolas F. R. A. Prado absolute = match.group(1) 249ea1d8389SNícolas F. R. A. Prado target = match.group(2) 250ea1d8389SNícolas F. R. A. Prado if absolute: 251ea1d8389SNícolas F. R. A. Prado target = "/" + target 252d18b0178SNícolas F. R. A. Prado xref = None 253d18b0178SNícolas F. R. A. Prado pxref = addnodes.pending_xref('', refdomain = 'std', reftype = 'doc', 254d18b0178SNícolas F. R. A. Prado reftarget = target, modname = None, 255d18b0178SNícolas F. R. A. Prado classname = None, refexplicit = False) 256d18b0178SNícolas F. R. A. Prado # 257d18b0178SNícolas F. R. A. Prado # XXX The Latex builder will throw NoUri exceptions here, 258d18b0178SNícolas F. R. A. Prado # work around that by ignoring them. 259d18b0178SNícolas F. R. A. Prado # 260d18b0178SNícolas F. R. A. Prado try: 261d18b0178SNícolas F. R. A. Prado xref = stddom.resolve_xref(app.env, docname, app.builder, 'doc', 262d18b0178SNícolas F. R. A. Prado target, pxref, None) 263d18b0178SNícolas F. R. A. Prado except NoUri: 264d18b0178SNícolas F. R. A. Prado xref = None 265d18b0178SNícolas F. R. A. Prado # 266d18b0178SNícolas F. R. A. Prado # Return the xref if we got it; otherwise just return the plain text. 267d18b0178SNícolas F. R. A. Prado # 268d18b0178SNícolas F. R. A. Prado if xref: 269d18b0178SNícolas F. R. A. Prado return xref 270d18b0178SNícolas F. R. A. Prado else: 271d18b0178SNícolas F. R. A. Prado return nodes.Text(match.group(0)) 272d18b0178SNícolas F. R. A. Prado 273fb568273SNícolas F. R. A. Pradodef get_c_namespace(app, docname): 274fb568273SNícolas F. R. A. Prado source = app.env.doc2path(docname) 275fb568273SNícolas F. R. A. Prado with open(source) as f: 276fb568273SNícolas F. R. A. Prado for l in f: 277fb568273SNícolas F. R. A. Prado match = RE_namespace.search(l) 278fb568273SNícolas F. R. A. Prado if match: 279fb568273SNícolas F. R. A. Prado return match.group(1) 280fb568273SNícolas F. R. A. Prado return '' 281fb568273SNícolas F. R. A. Prado 282*86b17aafSVegard Nossumdef markup_git(docname, app, match): 283*86b17aafSVegard Nossum # While we could probably assume that we are running in a git 284*86b17aafSVegard Nossum # repository, we can't know for sure, so let's just mechanically 285*86b17aafSVegard Nossum # turn them into git.kernel.org links without checking their 286*86b17aafSVegard Nossum # validity. (Maybe we can do something in the future to warn about 287*86b17aafSVegard Nossum # these references if this is explicitly requested.) 288*86b17aafSVegard Nossum text = match.group(0) 289*86b17aafSVegard Nossum rev = match.group('rev') 290*86b17aafSVegard Nossum return nodes.reference('', nodes.Text(text), 291*86b17aafSVegard Nossum refuri=f'https://git.kernel.org/torvalds/c/{rev}') 292*86b17aafSVegard Nossum 293d74b0d31SJonathan Corbetdef auto_markup(app, doctree, name): 294fb568273SNícolas F. R. A. Prado global c_namespace 295fb568273SNícolas F. R. A. Prado c_namespace = get_c_namespace(app, name) 2967cc4c092SJames Clark def text_but_not_a_reference(node): 2977cc4c092SJames Clark # The nodes.literal test catches ``literal text``, its purpose is to 2987cc4c092SJames Clark # avoid adding cross-references to functions that have been explicitly 2997cc4c092SJames Clark # marked with cc:func:. 3007cc4c092SJames Clark if not isinstance(node, nodes.Text) or isinstance(node.parent, nodes.literal): 3017cc4c092SJames Clark return False 3027cc4c092SJames Clark 3037cc4c092SJames Clark child_of_reference = False 3047cc4c092SJames Clark parent = node.parent 3057cc4c092SJames Clark while parent: 3067cc4c092SJames Clark if isinstance(parent, nodes.Referential): 3077cc4c092SJames Clark child_of_reference = True 3087cc4c092SJames Clark break 3097cc4c092SJames Clark parent = parent.parent 3107cc4c092SJames Clark return not child_of_reference 3117cc4c092SJames Clark 312d74b0d31SJonathan Corbet # 313d74b0d31SJonathan Corbet # This loop could eventually be improved on. Someday maybe we 314d74b0d31SJonathan Corbet # want a proper tree traversal with a lot of awareness of which 315d74b0d31SJonathan Corbet # kinds of nodes to prune. But this works well for now. 316d74b0d31SJonathan Corbet # 317d74b0d31SJonathan Corbet for para in doctree.traverse(nodes.paragraph): 3187cc4c092SJames Clark for node in para.traverse(condition=text_but_not_a_reference): 3191ac4cfb2SNícolas F. R. A. Prado node.parent.replace(node, markup_refs(name, app, node)) 320d74b0d31SJonathan Corbet 321d74b0d31SJonathan Corbetdef setup(app): 322d74b0d31SJonathan Corbet app.connect('doctree-resolved', auto_markup) 323d74b0d31SJonathan Corbet return { 324d74b0d31SJonathan Corbet 'parallel_read_safe': True, 325d74b0d31SJonathan Corbet 'parallel_write_safe': True, 326d74b0d31SJonathan Corbet } 327