1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# pylint: disable=R0903, R0912, R0914, R0915, C0209,W0707 4 5 6""" 7Implementation of the ``kernel-include`` reST-directive. 8 9:copyright: Copyright (C) 2016 Markus Heiser 10:license: GPL Version 2, June 1991 see linux/COPYING for details. 11 12The ``kernel-include`` reST-directive is a replacement for the ``include`` 13directive. The ``kernel-include`` directive expand environment variables in 14the path name and allows to include files from arbitrary locations. 15 16.. hint:: 17 18 Including files from arbitrary locations (e.g. from ``/etc``) is a 19 security risk for builders. This is why the ``include`` directive from 20 docutils *prohibit* pathnames pointing to locations *above* the filesystem 21 tree where the reST document with the include directive is placed. 22 23Substrings of the form $name or ${name} are replaced by the value of 24environment variable name. Malformed variable names and references to 25non-existing variables are left unchanged. 26 27**Supported Sphinx Include Options**: 28 29:param literal: 30 If present, the included file is inserted as a literal block. 31 32:param code: 33 Specify the language for syntax highlighting (e.g., 'c', 'python'). 34 35:param encoding: 36 Specify the encoding of the included file (default: 'utf-8'). 37 38:param tab-width: 39 Specify the number of spaces that a tab represents. 40 41:param start-line: 42 Line number at which to start including the file (1-based). 43 44:param end-line: 45 Line number at which to stop including the file (inclusive). 46 47:param start-after: 48 Include lines after the first line matching this text. 49 50:param end-before: 51 Include lines before the first line matching this text. 52 53:param number-lines: 54 Number the included lines (integer specifies start number). 55 Only effective with 'literal' or 'code' options. 56 57:param class: 58 Specify HTML class attribute for the included content. 59 60**Kernel-specific Extensions**: 61 62:param generate-cross-refs: 63 If present, instead of directly including the file, it calls 64 ParseDataStructs() to convert C data structures into cross-references 65 that link to comprehensive documentation in other ReST files. 66 67:param exception-file: 68 (Used with generate-cross-refs) 69 70 Path to a file containing rules for handling special cases: 71 - Ignore specific C data structures 72 - Use alternative reference names 73 - Specify different reference types 74 75:param warn-broken: 76 (Used with generate-cross-refs) 77 78 Enables warnings when auto-generated cross-references don't point to 79 existing documentation targets. 80""" 81 82# ============================================================================== 83# imports 84# ============================================================================== 85 86import os.path 87import re 88import sys 89 90from docutils import io, nodes, statemachine 91from docutils.statemachine import ViewList 92from docutils.parsers.rst import Directive, directives 93from docutils.parsers.rst.directives.body import CodeBlock, NumberLines 94 95from sphinx.util import logging 96 97srctree = os.path.abspath(os.environ["srctree"]) 98sys.path.insert(0, os.path.join(srctree, "tools/docs/lib")) 99 100from parse_data_structs import ParseDataStructs 101 102__version__ = "1.0" 103logger = logging.getLogger(__name__) 104 105RE_DOMAIN_REF = re.compile(r'\\ :(ref|c:type|c:func):`([^<`]+)(?:<([^>]+)>)?`\\') 106RE_SIMPLE_REF = re.compile(r'`([^`]+)`') 107 108def ErrorString(exc): # Shamelessly stolen from docutils 109 return f'{exc.__class__.__name}: {exc}' 110 111 112# ============================================================================== 113class KernelInclude(Directive): 114 """ 115 KernelInclude (``kernel-include``) directive 116 117 Most of the stuff here came from Include directive defined at: 118 docutils/parsers/rst/directives/misc.py 119 120 Yet, overriding the class don't has any benefits: the original class 121 only have run() and argument list. Not all of them are implemented, 122 when checked against latest Sphinx version, as with time more arguments 123 were added. 124 125 So, keep its own list of supported arguments 126 """ 127 128 required_arguments = 1 129 optional_arguments = 0 130 final_argument_whitespace = True 131 option_spec = { 132 'literal': directives.flag, 133 'code': directives.unchanged, 134 'encoding': directives.encoding, 135 'tab-width': int, 136 'start-line': int, 137 'end-line': int, 138 'start-after': directives.unchanged_required, 139 'end-before': directives.unchanged_required, 140 # ignored except for 'literal' or 'code': 141 'number-lines': directives.unchanged, # integer or None 142 'class': directives.class_option, 143 144 # Arguments that aren't from Sphinx Include directive 145 'generate-cross-refs': directives.flag, 146 'warn-broken': directives.flag, 147 'toc': directives.flag, 148 'exception-file': directives.unchanged, 149 } 150 151 def read_rawtext(self, path, encoding): 152 """Read and process file content with error handling""" 153 try: 154 self.state.document.settings.record_dependencies.add(path) 155 include_file = io.FileInput(source_path=path, 156 encoding=encoding, 157 error_handler=self.state.document.settings.input_encoding_error_handler) 158 except UnicodeEncodeError: 159 raise self.severe('Problems with directive path:\n' 160 'Cannot encode input file path "%s" ' 161 '(wrong locale?).' % path) 162 except IOError as error: 163 raise self.severe('Problems with directive path:\n%s.' % ErrorString(error)) 164 165 try: 166 return include_file.read() 167 except UnicodeError as error: 168 raise self.severe('Problem with directive:\n%s' % ErrorString(error)) 169 170 def apply_range(self, rawtext): 171 """ 172 Handles start-line, end-line, start-after and end-before parameters 173 """ 174 175 # Get to-be-included content 176 startline = self.options.get('start-line', None) 177 endline = self.options.get('end-line', None) 178 try: 179 if startline or (endline is not None): 180 lines = rawtext.splitlines() 181 rawtext = '\n'.join(lines[startline:endline]) 182 except UnicodeError as error: 183 raise self.severe(f'Problem with "{self.name}" directive:\n' 184 + io.error_string(error)) 185 # start-after/end-before: no restrictions on newlines in match-text, 186 # and no restrictions on matching inside lines vs. line boundaries 187 after_text = self.options.get("start-after", None) 188 if after_text: 189 # skip content in rawtext before *and incl.* a matching text 190 after_index = rawtext.find(after_text) 191 if after_index < 0: 192 raise self.severe('Problem with "start-after" option of "%s" ' 193 "directive:\nText not found." % self.name) 194 rawtext = rawtext[after_index + len(after_text) :] 195 before_text = self.options.get("end-before", None) 196 if before_text: 197 # skip content in rawtext after *and incl.* a matching text 198 before_index = rawtext.find(before_text) 199 if before_index < 0: 200 raise self.severe('Problem with "end-before" option of "%s" ' 201 "directive:\nText not found." % self.name) 202 rawtext = rawtext[:before_index] 203 204 return rawtext 205 206 def xref_text(self, env, path, tab_width): 207 """ 208 Read and add contents from a C file parsed to have cross references. 209 210 There are two types of supported output here: 211 - A C source code with cross-references; 212 - a TOC table containing cross references. 213 """ 214 parser = ParseDataStructs() 215 parser.parse_file(path) 216 217 if 'exception-file' in self.options: 218 source_dir = os.path.dirname(os.path.abspath( 219 self.state_machine.input_lines.source( 220 self.lineno - self.state_machine.input_offset - 1))) 221 exceptions_file = os.path.join(source_dir, self.options['exception-file']) 222 parser.process_exceptions(exceptions_file) 223 224 # Store references on a symbol dict to be used at check time 225 if 'warn-broken' in self.options: 226 env._xref_files.add(path) 227 228 if "toc" not in self.options: 229 230 rawtext = ".. parsed-literal::\n\n" + parser.gen_output() 231 self.apply_range(rawtext) 232 233 include_lines = statemachine.string2lines(rawtext, tab_width, 234 convert_whitespace=True) 235 236 # Sphinx always blame the ".. <directive>", so placing 237 # line numbers here won't make any difference 238 239 self.state_machine.insert_input(include_lines, path) 240 return [] 241 242 # TOC output is a ReST file, not a literal. So, we can add line 243 # numbers 244 245 rawtext = parser.gen_toc() 246 247 include_lines = statemachine.string2lines(rawtext, tab_width, 248 convert_whitespace=True) 249 250 # Append line numbers data 251 252 startline = self.options.get('start-line', None) 253 254 result = ViewList() 255 if startline and startline > 0: 256 offset = startline - 1 257 else: 258 offset = 0 259 260 for ln, line in enumerate(include_lines, start=offset): 261 result.append(line, path, ln) 262 263 self.state_machine.insert_input(result, path) 264 265 return [] 266 267 def literal(self, path, tab_width, rawtext): 268 """Output a literal block""" 269 270 # Convert tabs to spaces, if `tab_width` is positive. 271 if tab_width >= 0: 272 text = rawtext.expandtabs(tab_width) 273 else: 274 text = rawtext 275 literal_block = nodes.literal_block(rawtext, source=path, 276 classes=self.options.get("class", [])) 277 literal_block.line = 1 278 self.add_name(literal_block) 279 if "number-lines" in self.options: 280 try: 281 startline = int(self.options["number-lines"] or 1) 282 except ValueError: 283 raise self.error(":number-lines: with non-integer start value") 284 endline = startline + len(include_lines) 285 if text.endswith("\n"): 286 text = text[:-1] 287 tokens = NumberLines([([], text)], startline, endline) 288 for classes, value in tokens: 289 if classes: 290 literal_block += nodes.inline(value, value, 291 classes=classes) 292 else: 293 literal_block += nodes.Text(value, value) 294 else: 295 literal_block += nodes.Text(text, text) 296 return [literal_block] 297 298 def code(self, path, tab_width): 299 """Output a code block""" 300 301 include_lines = statemachine.string2lines(rawtext, tab_width, 302 convert_whitespace=True) 303 304 self.options["source"] = path 305 codeblock = CodeBlock(self.name, 306 [self.options.pop("code")], # arguments 307 self.options, 308 include_lines, 309 self.lineno, 310 self.content_offset, 311 self.block_text, 312 self.state, 313 self.state_machine) 314 return codeblock.run() 315 316 def run(self): 317 """Include a file as part of the content of this reST file.""" 318 env = self.state.document.settings.env 319 320 # 321 # The include logic accepts only patches relative to the 322 # Kernel source tree. The logic does check it to prevent 323 # directory traverse issues. 324 # 325 326 srctree = os.path.abspath(os.environ["srctree"]) 327 328 path = os.path.expandvars(self.arguments[0]) 329 src_path = os.path.join(srctree, path) 330 331 if os.path.isfile(src_path): 332 base = srctree 333 path = src_path 334 else: 335 raise self.warning(f'File "%s" doesn\'t exist', path) 336 337 abs_base = os.path.abspath(base) 338 abs_full_path = os.path.abspath(os.path.join(base, path)) 339 340 try: 341 if os.path.commonpath([abs_full_path, abs_base]) != abs_base: 342 raise self.severe('Problems with "%s" directive, prohibited path: %s' % 343 (self.name, path)) 344 except ValueError: 345 # Paths don't have the same drive (Windows) or other incompatibility 346 raise self.severe('Problems with "%s" directive, invalid path: %s' % 347 (self.name, path)) 348 349 self.arguments[0] = path 350 351 # 352 # Add path location to Sphinx dependencies to ensure proper cache 353 # invalidation check. 354 # 355 356 env.note_dependency(os.path.abspath(path)) 357 358 if not self.state.document.settings.file_insertion_enabled: 359 raise self.warning('"%s" directive disabled.' % self.name) 360 source = self.state_machine.input_lines.source(self.lineno - 361 self.state_machine.input_offset - 1) 362 source_dir = os.path.dirname(os.path.abspath(source)) 363 path = directives.path(self.arguments[0]) 364 if path.startswith("<") and path.endswith(">"): 365 path = os.path.join(self.standard_include_path, path[1:-1]) 366 path = os.path.normpath(os.path.join(source_dir, path)) 367 368 # HINT: this is the only line I had to change / commented out: 369 # path = utils.relative_path(None, path) 370 371 encoding = self.options.get("encoding", 372 self.state.document.settings.input_encoding) 373 tab_width = self.options.get("tab-width", 374 self.state.document.settings.tab_width) 375 376 # Get optional arguments to related to cross-references generation 377 if "generate-cross-refs" in self.options: 378 return self.xref_text(env, path, tab_width) 379 380 rawtext = self.read_rawtext(path, encoding) 381 rawtext = self.apply_range(rawtext) 382 383 if "code" in self.options: 384 return self.code(path, tab_width, rawtext) 385 386 return self.literal(path, tab_width, rawtext) 387 388# ============================================================================== 389 390reported = set() 391 392def check_missing_refs(app, env, node, contnode): 393 """Check broken refs for the files it creates xrefs""" 394 if not node.source: 395 return None 396 397 try: 398 xref_files = env._xref_files 399 except AttributeError: 400 logger.critical("FATAL: _xref_files not initialized!") 401 raise 402 403 # Only show missing references for kernel-include reference-parsed files 404 if node.source not in xref_files: 405 return None 406 407 target = node.get('reftarget', '') 408 domain = node.get('refdomain', 'std') 409 reftype = node.get('reftype', '') 410 411 msg = f"can't link to: {domain}:{reftype}:: {target}" 412 413 # Don't duplicate warnings 414 data = (node.source, msg) 415 if data in reported: 416 return None 417 reported.add(data) 418 419 logger.warning(msg, location=node, type='ref', subtype='missing') 420 421 return None 422 423def merge_xref_info(app, env, docnames, other): 424 """ 425 As each process modify env._xref_files, we need to merge them back. 426 """ 427 if not hasattr(other, "_xref_files"): 428 return 429 env._xref_files.update(getattr(other, "_xref_files", set())) 430 431def init_xref_docs(app, env, docnames): 432 """Initialize a list of files that we're generating cross references¨""" 433 app.env._xref_files = set() 434 435# ============================================================================== 436 437def setup(app): 438 """Setup Sphinx exension""" 439 440 app.connect("env-before-read-docs", init_xref_docs) 441 app.connect("env-merge-info", merge_xref_info) 442 app.add_directive("kernel-include", KernelInclude) 443 app.connect("missing-reference", check_missing_refs) 444 445 return { 446 "version": __version__, 447 "parallel_read_safe": True, 448 "parallel_write_safe": True, 449 } 450