1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# pylint: disable=R0903, R0912, R0914, R0915, C0209,W0707 4 5 6""" 7Implementation of the ``kernel-include`` reST-directive. 8 9:copyright: Copyright (C) 2016 Markus Heiser 10:license: GPL Version 2, June 1991 see linux/COPYING for details. 11 12The ``kernel-include`` reST-directive is a replacement for the ``include`` 13directive. The ``kernel-include`` directive expand environment variables in 14the path name and allows to include files from arbitrary locations. 15 16.. hint:: 17 18 Including files from arbitrary locations (e.g. from ``/etc``) is a 19 security risk for builders. This is why the ``include`` directive from 20 docutils *prohibit* pathnames pointing to locations *above* the filesystem 21 tree where the reST document with the include directive is placed. 22 23Substrings of the form $name or ${name} are replaced by the value of 24environment variable name. Malformed variable names and references to 25non-existing variables are left unchanged. 26 27**Supported Sphinx Include Options**: 28 29:param literal: 30 If present, the included file is inserted as a literal block. 31 32:param code: 33 Specify the language for syntax highlighting (e.g., 'c', 'python'). 34 35:param encoding: 36 Specify the encoding of the included file (default: 'utf-8'). 37 38:param tab-width: 39 Specify the number of spaces that a tab represents. 40 41:param start-line: 42 Line number at which to start including the file (1-based). 43 44:param end-line: 45 Line number at which to stop including the file (inclusive). 46 47:param start-after: 48 Include lines after the first line matching this text. 49 50:param end-before: 51 Include lines before the first line matching this text. 52 53:param number-lines: 54 Number the included lines (integer specifies start number). 55 Only effective with 'literal' or 'code' options. 56 57:param class: 58 Specify HTML class attribute for the included content. 59 60**Kernel-specific Extensions**: 61 62:param generate-cross-refs: 63 If present, instead of directly including the file, it calls 64 ParseDataStructs() to convert C data structures into cross-references 65 that link to comprehensive documentation in other ReST files. 66 67:param exception-file: 68 (Used with generate-cross-refs) 69 70 Path to a file containing rules for handling special cases: 71 - Ignore specific C data structures 72 - Use alternative reference names 73 - Specify different reference types 74 75:param warn-broken: 76 (Used with generate-cross-refs) 77 78 Enables warnings when auto-generated cross-references don't point to 79 existing documentation targets. 80""" 81 82# ============================================================================== 83# imports 84# ============================================================================== 85 86import os.path 87import re 88import sys 89 90from docutils import io, nodes, statemachine 91from docutils.statemachine import ViewList 92from docutils.parsers.rst import Directive, directives 93from docutils.parsers.rst.directives.body import CodeBlock, NumberLines 94 95from sphinx.util import logging 96 97srctree = os.path.abspath(os.environ["srctree"]) 98sys.path.insert(0, os.path.join(srctree, "tools/docs/lib")) 99 100from parse_data_structs import ParseDataStructs 101 102__version__ = "1.0" 103logger = logging.getLogger(__name__) 104 105RE_DOMAIN_REF = re.compile(r'\\ :(ref|c:type|c:func):`([^<`]+)(?:<([^>]+)>)?`\\') 106RE_SIMPLE_REF = re.compile(r'`([^`]+)`') 107RE_LINENO_REF = re.compile(r'^\s*-\s+LINENO_(\d+):\s+(.*)') 108RE_SPLIT_DOMAIN = re.compile(r"(.*)\.(.*)") 109 110def ErrorString(exc): # Shamelessly stolen from docutils 111 return f'{exc.__class__.__name}: {exc}' 112 113 114# ============================================================================== 115class KernelInclude(Directive): 116 """ 117 KernelInclude (``kernel-include``) directive 118 119 Most of the stuff here came from Include directive defined at: 120 docutils/parsers/rst/directives/misc.py 121 122 Yet, overriding the class don't has any benefits: the original class 123 only have run() and argument list. Not all of them are implemented, 124 when checked against latest Sphinx version, as with time more arguments 125 were added. 126 127 So, keep its own list of supported arguments 128 """ 129 130 required_arguments = 1 131 optional_arguments = 0 132 final_argument_whitespace = True 133 option_spec = { 134 'literal': directives.flag, 135 'code': directives.unchanged, 136 'encoding': directives.encoding, 137 'tab-width': int, 138 'start-line': int, 139 'end-line': int, 140 'start-after': directives.unchanged_required, 141 'end-before': directives.unchanged_required, 142 # ignored except for 'literal' or 'code': 143 'number-lines': directives.unchanged, # integer or None 144 'class': directives.class_option, 145 146 # Arguments that aren't from Sphinx Include directive 147 'generate-cross-refs': directives.flag, 148 'warn-broken': directives.flag, 149 'toc': directives.flag, 150 'exception-file': directives.unchanged, 151 } 152 153 def read_rawtext(self, path, encoding): 154 """Read and process file content with error handling""" 155 try: 156 self.state.document.settings.record_dependencies.add(path) 157 include_file = io.FileInput(source_path=path, 158 encoding=encoding, 159 error_handler=self.state.document.settings.input_encoding_error_handler) 160 except UnicodeEncodeError: 161 raise self.severe('Problems with directive path:\n' 162 'Cannot encode input file path "%s" ' 163 '(wrong locale?).' % path) 164 except IOError as error: 165 raise self.severe('Problems with directive path:\n%s.' % ErrorString(error)) 166 167 try: 168 return include_file.read() 169 except UnicodeError as error: 170 raise self.severe('Problem with directive:\n%s' % ErrorString(error)) 171 172 def apply_range(self, rawtext): 173 """ 174 Handles start-line, end-line, start-after and end-before parameters 175 """ 176 177 # Get to-be-included content 178 startline = self.options.get('start-line', None) 179 endline = self.options.get('end-line', None) 180 try: 181 if startline or (endline is not None): 182 lines = rawtext.splitlines() 183 rawtext = '\n'.join(lines[startline:endline]) 184 except UnicodeError as error: 185 raise self.severe(f'Problem with "{self.name}" directive:\n' 186 + io.error_string(error)) 187 # start-after/end-before: no restrictions on newlines in match-text, 188 # and no restrictions on matching inside lines vs. line boundaries 189 after_text = self.options.get("start-after", None) 190 if after_text: 191 # skip content in rawtext before *and incl.* a matching text 192 after_index = rawtext.find(after_text) 193 if after_index < 0: 194 raise self.severe('Problem with "start-after" option of "%s" ' 195 "directive:\nText not found." % self.name) 196 rawtext = rawtext[after_index + len(after_text) :] 197 before_text = self.options.get("end-before", None) 198 if before_text: 199 # skip content in rawtext after *and incl.* a matching text 200 before_index = rawtext.find(before_text) 201 if before_index < 0: 202 raise self.severe('Problem with "end-before" option of "%s" ' 203 "directive:\nText not found." % self.name) 204 rawtext = rawtext[:before_index] 205 206 return rawtext 207 208 def xref_text(self, env, path, tab_width): 209 """ 210 Read and add contents from a C file parsed to have cross references. 211 212 There are two types of supported output here: 213 - A C source code with cross-references; 214 - a TOC table containing cross references. 215 """ 216 parser = ParseDataStructs() 217 218 if 'exception-file' in self.options: 219 source_dir = os.path.dirname(os.path.abspath( 220 self.state_machine.input_lines.source( 221 self.lineno - self.state_machine.input_offset - 1))) 222 exceptions_file = os.path.join(source_dir, self.options['exception-file']) 223 else: 224 exceptions_file = None 225 226 parser.parse_file(path, exceptions_file) 227 228 # Store references on a symbol dict to be used at check time 229 if 'warn-broken' in self.options: 230 env._xref_files.add(path) 231 232 if "toc" not in self.options: 233 234 rawtext = ".. parsed-literal::\n\n" + parser.gen_output() 235 self.apply_range(rawtext) 236 237 include_lines = statemachine.string2lines(rawtext, tab_width, 238 convert_whitespace=True) 239 240 # Sphinx always blame the ".. <directive>", so placing 241 # line numbers here won't make any difference 242 243 self.state_machine.insert_input(include_lines, path) 244 return [] 245 246 # TOC output is a ReST file, not a literal. So, we can add line 247 # numbers 248 249 startline = self.options.get('start-line', None) 250 endline = self.options.get('end-line', None) 251 252 relpath = os.path.relpath(path, srctree) 253 254 result = ViewList() 255 for line in parser.gen_toc().split("\n"): 256 match = RE_LINENO_REF.match(line) 257 if not match: 258 result.append(line, path) 259 continue 260 261 ln, ref = match.groups() 262 ln = int(ln) 263 264 # Filter line range if needed 265 if startline and (ln < startline): 266 continue 267 268 if endline and (ln > endline): 269 continue 270 271 # Sphinx numerates starting with zero, but text editors 272 # and other tools start from one 273 realln = ln + 1 274 result.append(f"- {ref}: {relpath}#{realln}", path, ln) 275 276 self.state_machine.insert_input(result, path) 277 278 return [] 279 280 def literal(self, path, tab_width, rawtext): 281 """Output a literal block""" 282 283 # Convert tabs to spaces, if `tab_width` is positive. 284 if tab_width >= 0: 285 text = rawtext.expandtabs(tab_width) 286 else: 287 text = rawtext 288 literal_block = nodes.literal_block(rawtext, source=path, 289 classes=self.options.get("class", [])) 290 literal_block.line = 1 291 self.add_name(literal_block) 292 if "number-lines" in self.options: 293 try: 294 startline = int(self.options["number-lines"] or 1) 295 except ValueError: 296 raise self.error(":number-lines: with non-integer start value") 297 endline = startline + len(include_lines) 298 if text.endswith("\n"): 299 text = text[:-1] 300 tokens = NumberLines([([], text)], startline, endline) 301 for classes, value in tokens: 302 if classes: 303 literal_block += nodes.inline(value, value, 304 classes=classes) 305 else: 306 literal_block += nodes.Text(value, value) 307 else: 308 literal_block += nodes.Text(text, text) 309 return [literal_block] 310 311 def code(self, path, tab_width): 312 """Output a code block""" 313 314 include_lines = statemachine.string2lines(rawtext, tab_width, 315 convert_whitespace=True) 316 317 self.options["source"] = path 318 codeblock = CodeBlock(self.name, 319 [self.options.pop("code")], # arguments 320 self.options, 321 include_lines, 322 self.lineno, 323 self.content_offset, 324 self.block_text, 325 self.state, 326 self.state_machine) 327 return codeblock.run() 328 329 def run(self): 330 """Include a file as part of the content of this reST file.""" 331 env = self.state.document.settings.env 332 333 # 334 # The include logic accepts only patches relative to the 335 # Kernel source tree. The logic does check it to prevent 336 # directory traverse issues. 337 # 338 339 srctree = os.path.abspath(os.environ["srctree"]) 340 341 path = os.path.expandvars(self.arguments[0]) 342 src_path = os.path.join(srctree, path) 343 344 if os.path.isfile(src_path): 345 base = srctree 346 path = src_path 347 else: 348 raise self.warning(f'File "%s" doesn\'t exist', path) 349 350 abs_base = os.path.abspath(base) 351 abs_full_path = os.path.abspath(os.path.join(base, path)) 352 353 try: 354 if os.path.commonpath([abs_full_path, abs_base]) != abs_base: 355 raise self.severe('Problems with "%s" directive, prohibited path: %s' % 356 (self.name, path)) 357 except ValueError: 358 # Paths don't have the same drive (Windows) or other incompatibility 359 raise self.severe('Problems with "%s" directive, invalid path: %s' % 360 (self.name, path)) 361 362 self.arguments[0] = path 363 364 # 365 # Add path location to Sphinx dependencies to ensure proper cache 366 # invalidation check. 367 # 368 369 env.note_dependency(os.path.abspath(path)) 370 371 if not self.state.document.settings.file_insertion_enabled: 372 raise self.warning('"%s" directive disabled.' % self.name) 373 source = self.state_machine.input_lines.source(self.lineno - 374 self.state_machine.input_offset - 1) 375 source_dir = os.path.dirname(os.path.abspath(source)) 376 path = directives.path(self.arguments[0]) 377 if path.startswith("<") and path.endswith(">"): 378 path = os.path.join(self.standard_include_path, path[1:-1]) 379 path = os.path.normpath(os.path.join(source_dir, path)) 380 381 # HINT: this is the only line I had to change / commented out: 382 # path = utils.relative_path(None, path) 383 384 encoding = self.options.get("encoding", 385 self.state.document.settings.input_encoding) 386 tab_width = self.options.get("tab-width", 387 self.state.document.settings.tab_width) 388 389 # Get optional arguments to related to cross-references generation 390 if "generate-cross-refs" in self.options: 391 return self.xref_text(env, path, tab_width) 392 393 rawtext = self.read_rawtext(path, encoding) 394 rawtext = self.apply_range(rawtext) 395 396 if "code" in self.options: 397 return self.code(path, tab_width, rawtext) 398 399 return self.literal(path, tab_width, rawtext) 400 401# ============================================================================== 402 403reported = set() 404 405DOMAIN_INFO = {} 406 407def fill_domain_info(env): 408 """ 409 Get supported reference types for each Sphinx domain and C namespaces 410 """ 411 if DOMAIN_INFO: 412 return 413 414 for domain_name, domain_instance in env.domains.items(): 415 try: 416 object_types = list(domain_instance.object_types.keys()) 417 DOMAIN_INFO[domain_name] = object_types 418 except AttributeError: 419 # Ignore domains that we can't retrieve object types, if any 420 pass 421 422def get_suggestions(app, env, node, 423 original_target, original_domain, original_reftype): 424 """Check if target exists in the other domain or with different reftypes.""" 425 original_target = original_target.lower() 426 427 # Remove namespace if present 428 if '.' in original_target: 429 original_target = original_target.split(".")[-1] 430 431 targets = set([ 432 original_target, 433 original_target.replace("-", "_"), 434 original_target.replace("_", "-"), 435 ]) 436 437 # Propose some suggestions, if possible 438 # The code below checks not only variants of the target, but also it 439 # works when .. c:namespace:: targets setting a different C namespace 440 # is in place 441 442 suggestions = [] 443 for target in sorted(targets): 444 for domain in DOMAIN_INFO.keys(): 445 domain_obj = env.get_domain(domain) 446 for name, dispname, objtype, docname, anchor, priority in domain_obj.get_objects(): 447 lower_name = name.lower() 448 449 if domain == "c": 450 # Check if name belongs to a different C namespace 451 match = RE_SPLIT_DOMAIN.match(name) 452 if match: 453 if target != match.group(2).lower(): 454 continue 455 else: 456 if target != lower_name: 457 continue 458 else: 459 if target != lower_name: 460 continue 461 462 suggestions.append(f"\t{domain}:{objtype}:`{name}` (from {docname})") 463 464 return suggestions 465 466def check_missing_refs(app, env, node, contnode): 467 """Check broken refs for the files it creates xrefs""" 468 if not node.source: 469 return None 470 471 try: 472 xref_files = env._xref_files 473 except AttributeError: 474 logger.critical("FATAL: _xref_files not initialized!") 475 raise 476 477 # Only show missing references for kernel-include reference-parsed files 478 if node.source not in xref_files: 479 return None 480 481 fill_domain_info(env) 482 483 target = node.get('reftarget', '') 484 domain = node.get('refdomain', 'std') 485 reftype = node.get('reftype', '') 486 487 msg = f"Invalid xref: {domain}:{reftype}:`{target}`" 488 489 # Don't duplicate warnings 490 data = (node.source, msg) 491 if data in reported: 492 return None 493 reported.add(data) 494 495 suggestions = get_suggestions(app, env, node, target, domain, reftype) 496 if suggestions: 497 msg += ". Possible alternatives:\n" + '\n'.join(suggestions) 498 499 logger.warning(msg, location=node, type='ref', subtype='missing') 500 501 return None 502 503def merge_xref_info(app, env, docnames, other): 504 """ 505 As each process modify env._xref_files, we need to merge them back. 506 """ 507 if not hasattr(other, "_xref_files"): 508 return 509 env._xref_files.update(getattr(other, "_xref_files", set())) 510 511def init_xref_docs(app, env, docnames): 512 """Initialize a list of files that we're generating cross references¨""" 513 app.env._xref_files = set() 514 515# ============================================================================== 516 517def setup(app): 518 """Setup Sphinx exension""" 519 520 app.connect("env-before-read-docs", init_xref_docs) 521 app.connect("env-merge-info", merge_xref_info) 522 app.add_directive("kernel-include", KernelInclude) 523 app.connect("missing-reference", check_missing_refs) 524 525 return { 526 "version": __version__, 527 "parallel_read_safe": True, 528 "parallel_write_safe": True, 529 } 530