1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# pylint: disable=R0903, R0912, R0914, R0915, C0209,W0707 4 5 6""" 7Implementation of the ``kernel-include`` reST-directive. 8 9:copyright: Copyright (C) 2016 Markus Heiser 10:license: GPL Version 2, June 1991 see linux/COPYING for details. 11 12The ``kernel-include`` reST-directive is a replacement for the ``include`` 13directive. The ``kernel-include`` directive expand environment variables in 14the path name and allows to include files from arbitrary locations. 15 16.. hint:: 17 18 Including files from arbitrary locations (e.g. from ``/etc``) is a 19 security risk for builders. This is why the ``include`` directive from 20 docutils *prohibit* pathnames pointing to locations *above* the filesystem 21 tree where the reST document with the include directive is placed. 22 23Substrings of the form $name or ${name} are replaced by the value of 24environment variable name. Malformed variable names and references to 25non-existing variables are left unchanged. 26 27**Supported Sphinx Include Options**: 28 29:param literal: 30 If present, the included file is inserted as a literal block. 31 32:param code: 33 Specify the language for syntax highlighting (e.g., 'c', 'python'). 34 35:param encoding: 36 Specify the encoding of the included file (default: 'utf-8'). 37 38:param tab-width: 39 Specify the number of spaces that a tab represents. 40 41:param start-line: 42 Line number at which to start including the file (1-based). 43 44:param end-line: 45 Line number at which to stop including the file (inclusive). 46 47:param start-after: 48 Include lines after the first line matching this text. 49 50:param end-before: 51 Include lines before the first line matching this text. 52 53:param number-lines: 54 Number the included lines (integer specifies start number). 55 Only effective with 'literal' or 'code' options. 56 57:param class: 58 Specify HTML class attribute for the included content. 59 60**Kernel-specific Extensions**: 61 62:param generate-cross-refs: 63 If present, instead of directly including the file, it calls 64 ParseDataStructs() to convert C data structures into cross-references 65 that link to comprehensive documentation in other ReST files. 66 67:param exception-file: 68 (Used with generate-cross-refs) 69 70 Path to a file containing rules for handling special cases: 71 - Ignore specific C data structures 72 - Use alternative reference names 73 - Specify different reference types 74 75:param warn-broken: 76 (Used with generate-cross-refs) 77 78 Enables warnings when auto-generated cross-references don't point to 79 existing documentation targets. 80""" 81 82# ============================================================================== 83# imports 84# ============================================================================== 85 86import os.path 87import re 88import sys 89 90from docutils import io, nodes, statemachine 91from docutils.statemachine import ViewList 92from docutils.utils.error_reporting import SafeString, ErrorString 93from docutils.parsers.rst import Directive, directives 94from docutils.parsers.rst.directives.body import CodeBlock, NumberLines 95 96from sphinx.util import logging 97 98srctree = os.path.abspath(os.environ["srctree"]) 99sys.path.insert(0, os.path.join(srctree, "tools/docs/lib")) 100 101from parse_data_structs import ParseDataStructs 102 103__version__ = "1.0" 104logger = logging.getLogger(__name__) 105 106RE_DOMAIN_REF = re.compile(r'\\ :(ref|c:type|c:func):`([^<`]+)(?:<([^>]+)>)?`\\') 107RE_SIMPLE_REF = re.compile(r'`([^`]+)`') 108 109 110# ============================================================================== 111class KernelInclude(Directive): 112 """ 113 KernelInclude (``kernel-include``) directive 114 115 Most of the stuff here came from Include directive defined at: 116 docutils/parsers/rst/directives/misc.py 117 118 Yet, overriding the class don't has any benefits: the original class 119 only have run() and argument list. Not all of them are implemented, 120 when checked against latest Sphinx version, as with time more arguments 121 were added. 122 123 So, keep its own list of supported arguments 124 """ 125 126 required_arguments = 1 127 optional_arguments = 0 128 final_argument_whitespace = True 129 option_spec = { 130 'literal': directives.flag, 131 'code': directives.unchanged, 132 'encoding': directives.encoding, 133 'tab-width': int, 134 'start-line': int, 135 'end-line': int, 136 'start-after': directives.unchanged_required, 137 'end-before': directives.unchanged_required, 138 # ignored except for 'literal' or 'code': 139 'number-lines': directives.unchanged, # integer or None 140 'class': directives.class_option, 141 142 # Arguments that aren't from Sphinx Include directive 143 'generate-cross-refs': directives.flag, 144 'warn-broken': directives.flag, 145 'toc': directives.flag, 146 'exception-file': directives.unchanged, 147 } 148 149 def read_rawtext(self, path, encoding): 150 """Read and process file content with error handling""" 151 try: 152 self.state.document.settings.record_dependencies.add(path) 153 include_file = io.FileInput(source_path=path, 154 encoding=encoding, 155 error_handler=self.state.document.settings.input_encoding_error_handler) 156 except UnicodeEncodeError: 157 raise self.severe('Problems with directive path:\n' 158 'Cannot encode input file path "%s" ' 159 '(wrong locale?).' % SafeString(path)) 160 except IOError as error: 161 raise self.severe('Problems with directive path:\n%s.' % ErrorString(error)) 162 163 try: 164 return include_file.read() 165 except UnicodeError as error: 166 raise self.severe('Problem with directive:\n%s' % ErrorString(error)) 167 168 def apply_range(self, rawtext): 169 """ 170 Handles start-line, end-line, start-after and end-before parameters 171 """ 172 173 # Get to-be-included content 174 startline = self.options.get('start-line', None) 175 endline = self.options.get('end-line', None) 176 try: 177 if startline or (endline is not None): 178 lines = rawtext.splitlines() 179 rawtext = '\n'.join(lines[startline:endline]) 180 except UnicodeError as error: 181 raise self.severe(f'Problem with "{self.name}" directive:\n' 182 + io.error_string(error)) 183 # start-after/end-before: no restrictions on newlines in match-text, 184 # and no restrictions on matching inside lines vs. line boundaries 185 after_text = self.options.get("start-after", None) 186 if after_text: 187 # skip content in rawtext before *and incl.* a matching text 188 after_index = rawtext.find(after_text) 189 if after_index < 0: 190 raise self.severe('Problem with "start-after" option of "%s" ' 191 "directive:\nText not found." % self.name) 192 rawtext = rawtext[after_index + len(after_text) :] 193 before_text = self.options.get("end-before", None) 194 if before_text: 195 # skip content in rawtext after *and incl.* a matching text 196 before_index = rawtext.find(before_text) 197 if before_index < 0: 198 raise self.severe('Problem with "end-before" option of "%s" ' 199 "directive:\nText not found." % self.name) 200 rawtext = rawtext[:before_index] 201 202 return rawtext 203 204 def xref_text(self, env, path, tab_width): 205 """ 206 Read and add contents from a C file parsed to have cross references. 207 208 There are two types of supported output here: 209 - A C source code with cross-references; 210 - a TOC table containing cross references. 211 """ 212 parser = ParseDataStructs() 213 parser.parse_file(path) 214 215 if 'exception-file' in self.options: 216 source_dir = os.path.dirname(os.path.abspath( 217 self.state_machine.input_lines.source( 218 self.lineno - self.state_machine.input_offset - 1))) 219 exceptions_file = os.path.join(source_dir, self.options['exception-file']) 220 parser.process_exceptions(exceptions_file) 221 222 # Store references on a symbol dict to be used at check time 223 if 'warn-broken' in self.options: 224 env._xref_files.add(path) 225 226 if "toc" not in self.options: 227 228 rawtext = ".. parsed-literal::\n\n" + parser.gen_output() 229 self.apply_range(rawtext) 230 231 include_lines = statemachine.string2lines(rawtext, tab_width, 232 convert_whitespace=True) 233 234 # Sphinx always blame the ".. <directive>", so placing 235 # line numbers here won't make any difference 236 237 self.state_machine.insert_input(include_lines, path) 238 return [] 239 240 # TOC output is a ReST file, not a literal. So, we can add line 241 # numbers 242 243 rawtext = parser.gen_toc() 244 245 include_lines = statemachine.string2lines(rawtext, tab_width, 246 convert_whitespace=True) 247 248 # Append line numbers data 249 250 startline = self.options.get('start-line', None) 251 252 result = ViewList() 253 if startline and startline > 0: 254 offset = startline - 1 255 else: 256 offset = 0 257 258 for ln, line in enumerate(include_lines, start=offset): 259 result.append(line, path, ln) 260 261 self.state_machine.insert_input(result, path) 262 263 return [] 264 265 def literal(self, path, tab_width, rawtext): 266 """Output a literal block""" 267 268 # Convert tabs to spaces, if `tab_width` is positive. 269 if tab_width >= 0: 270 text = rawtext.expandtabs(tab_width) 271 else: 272 text = rawtext 273 literal_block = nodes.literal_block(rawtext, source=path, 274 classes=self.options.get("class", [])) 275 literal_block.line = 1 276 self.add_name(literal_block) 277 if "number-lines" in self.options: 278 try: 279 startline = int(self.options["number-lines"] or 1) 280 except ValueError: 281 raise self.error(":number-lines: with non-integer start value") 282 endline = startline + len(include_lines) 283 if text.endswith("\n"): 284 text = text[:-1] 285 tokens = NumberLines([([], text)], startline, endline) 286 for classes, value in tokens: 287 if classes: 288 literal_block += nodes.inline(value, value, 289 classes=classes) 290 else: 291 literal_block += nodes.Text(value, value) 292 else: 293 literal_block += nodes.Text(text, text) 294 return [literal_block] 295 296 def code(self, path, tab_width): 297 """Output a code block""" 298 299 include_lines = statemachine.string2lines(rawtext, tab_width, 300 convert_whitespace=True) 301 302 self.options["source"] = path 303 codeblock = CodeBlock(self.name, 304 [self.options.pop("code")], # arguments 305 self.options, 306 include_lines, 307 self.lineno, 308 self.content_offset, 309 self.block_text, 310 self.state, 311 self.state_machine) 312 return codeblock.run() 313 314 def run(self): 315 """Include a file as part of the content of this reST file.""" 316 env = self.state.document.settings.env 317 318 # 319 # The include logic accepts only patches relative to the 320 # Kernel source tree. The logic does check it to prevent 321 # directory traverse issues. 322 # 323 324 srctree = os.path.abspath(os.environ["srctree"]) 325 326 path = os.path.expandvars(self.arguments[0]) 327 src_path = os.path.join(srctree, path) 328 329 if os.path.isfile(src_path): 330 base = srctree 331 path = src_path 332 else: 333 raise self.warning(f'File "%s" doesn\'t exist', path) 334 335 abs_base = os.path.abspath(base) 336 abs_full_path = os.path.abspath(os.path.join(base, path)) 337 338 try: 339 if os.path.commonpath([abs_full_path, abs_base]) != abs_base: 340 raise self.severe('Problems with "%s" directive, prohibited path: %s' % 341 (self.name, path)) 342 except ValueError: 343 # Paths don't have the same drive (Windows) or other incompatibility 344 raise self.severe('Problems with "%s" directive, invalid path: %s' % 345 (self.name, path)) 346 347 self.arguments[0] = path 348 349 # 350 # Add path location to Sphinx dependencies to ensure proper cache 351 # invalidation check. 352 # 353 354 env.note_dependency(os.path.abspath(path)) 355 356 if not self.state.document.settings.file_insertion_enabled: 357 raise self.warning('"%s" directive disabled.' % self.name) 358 source = self.state_machine.input_lines.source(self.lineno - 359 self.state_machine.input_offset - 1) 360 source_dir = os.path.dirname(os.path.abspath(source)) 361 path = directives.path(self.arguments[0]) 362 if path.startswith("<") and path.endswith(">"): 363 path = os.path.join(self.standard_include_path, path[1:-1]) 364 path = os.path.normpath(os.path.join(source_dir, path)) 365 366 # HINT: this is the only line I had to change / commented out: 367 # path = utils.relative_path(None, path) 368 369 encoding = self.options.get("encoding", 370 self.state.document.settings.input_encoding) 371 tab_width = self.options.get("tab-width", 372 self.state.document.settings.tab_width) 373 374 # Get optional arguments to related to cross-references generation 375 if "generate-cross-refs" in self.options: 376 return self.xref_text(env, path, tab_width) 377 378 rawtext = self.read_rawtext(path, encoding) 379 rawtext = self.apply_range(rawtext) 380 381 if "code" in self.options: 382 return self.code(path, tab_width, rawtext) 383 384 return self.literal(path, tab_width, rawtext) 385 386# ============================================================================== 387 388reported = set() 389 390def check_missing_refs(app, env, node, contnode): 391 """Check broken refs for the files it creates xrefs""" 392 if not node.source: 393 return None 394 395 try: 396 xref_files = env._xref_files 397 except AttributeError: 398 logger.critical("FATAL: _xref_files not initialized!") 399 raise 400 401 # Only show missing references for kernel-include reference-parsed files 402 if node.source not in xref_files: 403 return None 404 405 target = node.get('reftarget', '') 406 domain = node.get('refdomain', 'std') 407 reftype = node.get('reftype', '') 408 409 msg = f"can't link to: {domain}:{reftype}:: {target}" 410 411 # Don't duplicate warnings 412 data = (node.source, msg) 413 if data in reported: 414 return None 415 reported.add(data) 416 417 logger.warning(msg, location=node, type='ref', subtype='missing') 418 419 return None 420 421def merge_xref_info(app, env, docnames, other): 422 """ 423 As each process modify env._xref_files, we need to merge them back. 424 """ 425 if not hasattr(other, "_xref_files"): 426 return 427 env._xref_files.update(getattr(other, "_xref_files", set())) 428 429def init_xref_docs(app, env, docnames): 430 """Initialize a list of files that we're generating cross references¨""" 431 app.env._xref_files = set() 432 433# ============================================================================== 434 435def setup(app): 436 """Setup Sphinx exension""" 437 438 app.connect("env-before-read-docs", init_xref_docs) 439 app.connect("env-merge-info", merge_xref_info) 440 app.add_directive("kernel-include", KernelInclude) 441 app.connect("missing-reference", check_missing_refs) 442 443 return { 444 "version": __version__, 445 "parallel_read_safe": True, 446 "parallel_write_safe": True, 447 } 448