1# SPDX-License-Identifier: GPL-2.0 2# Copyright 2019 Jonathan Corbet <corbet@lwn.net> 3# 4# Apply kernel-specific tweaks after the initial document processing 5# has been done. 6# 7from docutils import nodes 8import sphinx 9from sphinx import addnodes 10from sphinx.errors import NoUri 11import re 12from itertools import chain 13 14# 15# Python 2 lacks re.ASCII... 16# 17try: 18 ascii_p3 = re.ASCII 19except AttributeError: 20 ascii_p3 = 0 21 22# 23# Regex nastiness. Of course. 24# Try to identify "function()" that's not already marked up some 25# other way. Sphinx doesn't like a lot of stuff right after a 26# :c:func: block (i.e. ":c:func:`mmap()`s" flakes out), so the last 27# bit tries to restrict matches to things that won't create trouble. 28# 29RE_function = re.compile(r'\b(([a-zA-Z_]\w+)\(\))', flags=ascii_p3) 30 31# 32# Sphinx 2 uses the same :c:type role for struct, union, enum and typedef 33# 34RE_generic_type = re.compile(r'\b(struct|union|enum|typedef)\s+([a-zA-Z_]\w+)', 35 flags=ascii_p3) 36 37# 38# Sphinx 3 uses a different C role for each one of struct, union, enum and 39# typedef 40# 41RE_struct = re.compile(r'\b(struct)\s+([a-zA-Z_]\w+)', flags=ascii_p3) 42RE_union = re.compile(r'\b(union)\s+([a-zA-Z_]\w+)', flags=ascii_p3) 43RE_enum = re.compile(r'\b(enum)\s+([a-zA-Z_]\w+)', flags=ascii_p3) 44RE_typedef = re.compile(r'\b(typedef)\s+([a-zA-Z_]\w+)', flags=ascii_p3) 45 46# 47# Detects a reference to a documentation page of the form Documentation/... with 48# an optional extension 49# 50RE_doc = re.compile(r'(\bDocumentation/)?((\.\./)*[\w\-/]+)\.(rst|txt)') 51 52RE_namespace = re.compile(r'^\s*..\s*c:namespace::\s*(\S+)\s*$') 53 54# 55# Reserved C words that we should skip when cross-referencing 56# 57Skipnames = [ 'for', 'if', 'register', 'sizeof', 'struct', 'unsigned' ] 58 59 60# 61# Many places in the docs refer to common system calls. It is 62# pointless to try to cross-reference them and, as has been known 63# to happen, somebody defining a function by these names can lead 64# to the creation of incorrect and confusing cross references. So 65# just don't even try with these names. 66# 67Skipfuncs = [ 'open', 'close', 'read', 'write', 'fcntl', 'mmap', 68 'select', 'poll', 'fork', 'execve', 'clone', 'ioctl', 69 'socket' ] 70 71c_namespace = '' 72 73# 74# Detect references to commits. 75# 76RE_git = re.compile(r'commit\s+(?P<rev>[0-9a-f]{12,40})(?:\s+\(".*?"\))?', 77 flags=re.IGNORECASE | re.DOTALL) 78 79def markup_refs(docname, app, node): 80 t = node.astext() 81 done = 0 82 repl = [ ] 83 # 84 # Associate each regex with the function that will markup its matches 85 # 86 markup_func_sphinx2 = {RE_doc: markup_doc_ref, 87 RE_function: markup_c_ref, 88 RE_generic_type: markup_c_ref} 89 90 markup_func_sphinx3 = {RE_doc: markup_doc_ref, 91 RE_function: markup_func_ref_sphinx3, 92 RE_struct: markup_c_ref, 93 RE_union: markup_c_ref, 94 RE_enum: markup_c_ref, 95 RE_typedef: markup_c_ref, 96 RE_git: markup_git} 97 98 if sphinx.version_info[0] >= 3: 99 markup_func = markup_func_sphinx3 100 else: 101 markup_func = markup_func_sphinx2 102 103 match_iterators = [regex.finditer(t) for regex in markup_func] 104 # 105 # Sort all references by the starting position in text 106 # 107 sorted_matches = sorted(chain(*match_iterators), key=lambda m: m.start()) 108 for m in sorted_matches: 109 # 110 # Include any text prior to match as a normal text node. 111 # 112 if m.start() > done: 113 repl.append(nodes.Text(t[done:m.start()])) 114 115 # 116 # Call the function associated with the regex that matched this text and 117 # append its return to the text 118 # 119 repl.append(markup_func[m.re](docname, app, m)) 120 121 done = m.end() 122 if done < len(t): 123 repl.append(nodes.Text(t[done:])) 124 return repl 125 126# 127# Keep track of cross-reference lookups that failed so we don't have to 128# do them again. 129# 130failed_lookups = { } 131def failure_seen(target): 132 return (target) in failed_lookups 133def note_failure(target): 134 failed_lookups[target] = True 135 136# 137# In sphinx3 we can cross-reference to C macro and function, each one with its 138# own C role, but both match the same regex, so we try both. 139# 140def markup_func_ref_sphinx3(docname, app, match): 141 cdom = app.env.domains['c'] 142 # 143 # Go through the dance of getting an xref out of the C domain 144 # 145 base_target = match.group(2) 146 target_text = nodes.Text(match.group(0)) 147 xref = None 148 possible_targets = [base_target] 149 # Check if this document has a namespace, and if so, try 150 # cross-referencing inside it first. 151 if c_namespace: 152 possible_targets.insert(0, c_namespace + "." + base_target) 153 154 if base_target not in Skipnames: 155 for target in possible_targets: 156 if (target not in Skipfuncs) and not failure_seen(target): 157 lit_text = nodes.literal(classes=['xref', 'c', 'c-func']) 158 lit_text += target_text 159 pxref = addnodes.pending_xref('', refdomain = 'c', 160 reftype = 'function', 161 reftarget = target, 162 modname = None, 163 classname = None) 164 # 165 # XXX The Latex builder will throw NoUri exceptions here, 166 # work around that by ignoring them. 167 # 168 try: 169 xref = cdom.resolve_xref(app.env, docname, app.builder, 170 'function', target, pxref, 171 lit_text) 172 except NoUri: 173 xref = None 174 175 if xref: 176 return xref 177 note_failure(target) 178 179 return target_text 180 181def markup_c_ref(docname, app, match): 182 class_str = {# Sphinx 2 only 183 RE_function: 'c-func', 184 RE_generic_type: 'c-type', 185 # Sphinx 3+ only 186 RE_struct: 'c-struct', 187 RE_union: 'c-union', 188 RE_enum: 'c-enum', 189 RE_typedef: 'c-type', 190 } 191 reftype_str = {# Sphinx 2 only 192 RE_function: 'function', 193 RE_generic_type: 'type', 194 # Sphinx 3+ only 195 RE_struct: 'struct', 196 RE_union: 'union', 197 RE_enum: 'enum', 198 RE_typedef: 'type', 199 } 200 201 cdom = app.env.domains['c'] 202 # 203 # Go through the dance of getting an xref out of the C domain 204 # 205 base_target = match.group(2) 206 target_text = nodes.Text(match.group(0)) 207 xref = None 208 possible_targets = [base_target] 209 # Check if this document has a namespace, and if so, try 210 # cross-referencing inside it first. 211 if c_namespace: 212 possible_targets.insert(0, c_namespace + "." + base_target) 213 214 if base_target not in Skipnames: 215 for target in possible_targets: 216 if not (match.re == RE_function and target in Skipfuncs): 217 lit_text = nodes.literal(classes=['xref', 'c', class_str[match.re]]) 218 lit_text += target_text 219 pxref = addnodes.pending_xref('', refdomain = 'c', 220 reftype = reftype_str[match.re], 221 reftarget = target, modname = None, 222 classname = None) 223 # 224 # XXX The Latex builder will throw NoUri exceptions here, 225 # work around that by ignoring them. 226 # 227 try: 228 xref = cdom.resolve_xref(app.env, docname, app.builder, 229 reftype_str[match.re], target, pxref, 230 lit_text) 231 except NoUri: 232 xref = None 233 234 if xref: 235 return xref 236 237 return target_text 238 239# 240# Try to replace a documentation reference of the form Documentation/... with a 241# cross reference to that page 242# 243def markup_doc_ref(docname, app, match): 244 stddom = app.env.domains['std'] 245 # 246 # Go through the dance of getting an xref out of the std domain 247 # 248 absolute = match.group(1) 249 target = match.group(2) 250 if absolute: 251 target = "/" + target 252 xref = None 253 pxref = addnodes.pending_xref('', refdomain = 'std', reftype = 'doc', 254 reftarget = target, modname = None, 255 classname = None, refexplicit = False) 256 # 257 # XXX The Latex builder will throw NoUri exceptions here, 258 # work around that by ignoring them. 259 # 260 try: 261 xref = stddom.resolve_xref(app.env, docname, app.builder, 'doc', 262 target, pxref, None) 263 except NoUri: 264 xref = None 265 # 266 # Return the xref if we got it; otherwise just return the plain text. 267 # 268 if xref: 269 return xref 270 else: 271 return nodes.Text(match.group(0)) 272 273def get_c_namespace(app, docname): 274 source = app.env.doc2path(docname) 275 with open(source) as f: 276 for l in f: 277 match = RE_namespace.search(l) 278 if match: 279 return match.group(1) 280 return '' 281 282def markup_git(docname, app, match): 283 # While we could probably assume that we are running in a git 284 # repository, we can't know for sure, so let's just mechanically 285 # turn them into git.kernel.org links without checking their 286 # validity. (Maybe we can do something in the future to warn about 287 # these references if this is explicitly requested.) 288 text = match.group(0) 289 rev = match.group('rev') 290 return nodes.reference('', nodes.Text(text), 291 refuri=f'https://git.kernel.org/torvalds/c/{rev}') 292 293def auto_markup(app, doctree, name): 294 global c_namespace 295 c_namespace = get_c_namespace(app, name) 296 def text_but_not_a_reference(node): 297 # The nodes.literal test catches ``literal text``, its purpose is to 298 # avoid adding cross-references to functions that have been explicitly 299 # marked with cc:func:. 300 if not isinstance(node, nodes.Text) or isinstance(node.parent, nodes.literal): 301 return False 302 303 child_of_reference = False 304 parent = node.parent 305 while parent: 306 if isinstance(parent, nodes.Referential): 307 child_of_reference = True 308 break 309 parent = parent.parent 310 return not child_of_reference 311 312 # 313 # This loop could eventually be improved on. Someday maybe we 314 # want a proper tree traversal with a lot of awareness of which 315 # kinds of nodes to prune. But this works well for now. 316 # 317 for para in doctree.traverse(nodes.paragraph): 318 for node in para.traverse(condition=text_but_not_a_reference): 319 node.parent.replace(node, markup_refs(name, app, node)) 320 321def setup(app): 322 app.connect('doctree-resolved', auto_markup) 323 return { 324 'parallel_read_safe': True, 325 'parallel_write_safe': True, 326 } 327