1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# pylint: disable=R0903, R0912, R0914, R0915, C0209,W0707 4 5 6""" 7Implementation of the ``kernel-include`` reST-directive. 8 9:copyright: Copyright (C) 2016 Markus Heiser 10:license: GPL Version 2, June 1991 see linux/COPYING for details. 11 12The ``kernel-include`` reST-directive is a replacement for the ``include`` 13directive. The ``kernel-include`` directive expand environment variables in 14the path name and allows to include files from arbitrary locations. 15 16.. hint:: 17 18 Including files from arbitrary locations (e.g. from ``/etc``) is a 19 security risk for builders. This is why the ``include`` directive from 20 docutils *prohibit* pathnames pointing to locations *above* the filesystem 21 tree where the reST document with the include directive is placed. 22 23Substrings of the form $name or ${name} are replaced by the value of 24environment variable name. Malformed variable names and references to 25non-existing variables are left unchanged. 26 27**Supported Sphinx Include Options**: 28 29:param literal: 30 If present, the included file is inserted as a literal block. 31 32:param code: 33 Specify the language for syntax highlighting (e.g., 'c', 'python'). 34 35:param encoding: 36 Specify the encoding of the included file (default: 'utf-8'). 37 38:param tab-width: 39 Specify the number of spaces that a tab represents. 40 41:param start-line: 42 Line number at which to start including the file (1-based). 43 44:param end-line: 45 Line number at which to stop including the file (inclusive). 46 47:param start-after: 48 Include lines after the first line matching this text. 49 50:param end-before: 51 Include lines before the first line matching this text. 52 53:param number-lines: 54 Number the included lines (integer specifies start number). 55 Only effective with 'literal' or 'code' options. 56 57:param class: 58 Specify HTML class attribute for the included content. 59 60**Kernel-specific Extensions**: 61 62:param generate-cross-refs: 63 If present, instead of directly including the file, it calls 64 ParseDataStructs() to convert C data structures into cross-references 65 that link to comprehensive documentation in other ReST files. 66 67:param exception-file: 68 (Used with generate-cross-refs) 69 70 Path to a file containing rules for handling special cases: 71 - Ignore specific C data structures 72 - Use alternative reference names 73 - Specify different reference types 74 75:param warn-broken: 76 (Used with generate-cross-refs) 77 78 Enables warnings when auto-generated cross-references don't point to 79 existing documentation targets. 80""" 81 82# ============================================================================== 83# imports 84# ============================================================================== 85 86import os.path 87import re 88import sys 89 90from docutils import io, nodes, statemachine 91from docutils.statemachine import ViewList 92from docutils.utils.error_reporting import SafeString, ErrorString 93from docutils.parsers.rst import Directive, directives 94from docutils.parsers.rst.directives.body import CodeBlock, NumberLines 95 96from sphinx.util import logging 97 98srctree = os.path.abspath(os.environ["srctree"]) 99sys.path.insert(0, os.path.join(srctree, "tools/docs/lib")) 100 101from parse_data_structs import ParseDataStructs 102 103__version__ = "1.0" 104logger = logging.getLogger(__name__) 105 106RE_DOMAIN_REF = re.compile(r'\\ :(ref|c:type|c:func):`([^<`]+)(?:<([^>]+)>)?`\\') 107RE_SIMPLE_REF = re.compile(r'`([^`]+)`') 108 109 110# ============================================================================== 111class KernelInclude(Directive): 112 """ 113 KernelInclude (``kernel-include``) directive 114 115 Most of the stuff here came from Include directive defined at: 116 docutils/parsers/rst/directives/misc.py 117 118 Yet, overriding the class don't has any benefits: the original class 119 only have run() and argument list. Not all of them are implemented, 120 when checked against latest Sphinx version, as with time more arguments 121 were added. 122 123 So, keep its own list of supported arguments 124 """ 125 126 required_arguments = 1 127 optional_arguments = 0 128 final_argument_whitespace = True 129 option_spec = { 130 'literal': directives.flag, 131 'code': directives.unchanged, 132 'encoding': directives.encoding, 133 'tab-width': int, 134 'start-line': int, 135 'end-line': int, 136 'start-after': directives.unchanged_required, 137 'end-before': directives.unchanged_required, 138 # ignored except for 'literal' or 'code': 139 'number-lines': directives.unchanged, # integer or None 140 'class': directives.class_option, 141 142 # Arguments that aren't from Sphinx Include directive 143 'generate-cross-refs': directives.flag, 144 'warn-broken': directives.flag, 145 'toc': directives.flag, 146 'exception-file': directives.unchanged, 147 } 148 149 def read_rawtext(self, path, encoding): 150 """Read and process file content with error handling""" 151 try: 152 self.state.document.settings.record_dependencies.add(path) 153 include_file = io.FileInput(source_path=path, 154 encoding=encoding, 155 error_handler=self.state.document.settings.input_encoding_error_handler) 156 except UnicodeEncodeError: 157 raise self.severe('Problems with directive path:\n' 158 'Cannot encode input file path "%s" ' 159 '(wrong locale?).' % SafeString(path)) 160 except IOError as error: 161 raise self.severe('Problems with directive path:\n%s.' % ErrorString(error)) 162 163 try: 164 return include_file.read() 165 except UnicodeError as error: 166 raise self.severe('Problem with directive:\n%s' % ErrorString(error)) 167 168 def apply_range(self, rawtext): 169 """ 170 Handles start-line, end-line, start-after and end-before parameters 171 """ 172 173 # Get to-be-included content 174 startline = self.options.get('start-line', None) 175 endline = self.options.get('end-line', None) 176 try: 177 if startline or (endline is not None): 178 lines = rawtext.splitlines() 179 rawtext = '\n'.join(lines[startline:endline]) 180 except UnicodeError as error: 181 raise self.severe(f'Problem with "{self.name}" directive:\n' 182 + io.error_string(error)) 183 # start-after/end-before: no restrictions on newlines in match-text, 184 # and no restrictions on matching inside lines vs. line boundaries 185 after_text = self.options.get("start-after", None) 186 if after_text: 187 # skip content in rawtext before *and incl.* a matching text 188 after_index = rawtext.find(after_text) 189 if after_index < 0: 190 raise self.severe('Problem with "start-after" option of "%s" ' 191 "directive:\nText not found." % self.name) 192 rawtext = rawtext[after_index + len(after_text) :] 193 before_text = self.options.get("end-before", None) 194 if before_text: 195 # skip content in rawtext after *and incl.* a matching text 196 before_index = rawtext.find(before_text) 197 if before_index < 0: 198 raise self.severe('Problem with "end-before" option of "%s" ' 199 "directive:\nText not found." % self.name) 200 rawtext = rawtext[:before_index] 201 202 return rawtext 203 204 def xref_text(self, env, path, tab_width): 205 """ 206 Read and add contents from a C file parsed to have cross references. 207 208 There are two types of supported output here: 209 - A C source code with cross-references; 210 - a TOC table containing cross references. 211 """ 212 parser = ParseDataStructs() 213 parser.parse_file(path) 214 215 if 'exception-file' in self.options: 216 source_dir = os.path.dirname(os.path.abspath( 217 self.state_machine.input_lines.source( 218 self.lineno - self.state_machine.input_offset - 1))) 219 exceptions_file = os.path.join(source_dir, self.options['exception-file']) 220 parser.process_exceptions(exceptions_file) 221 222 # Store references on a symbol dict to be used at check time 223 if 'warn-broken' in self.options: 224 env._xref_files.add(path) 225 226 if "toc" not in self.options: 227 228 rawtext = ".. parsed-literal::\n\n" + parser.gen_output() 229 self.apply_range(rawtext) 230 231 include_lines = statemachine.string2lines(rawtext, tab_width, 232 convert_whitespace=True) 233 234 # Sphinx always blame the ".. <directive>", so placing 235 # line numbers here won't make any difference 236 237 self.state_machine.insert_input(include_lines, path) 238 return [] 239 240 # TOC output is a ReST file, not a literal. So, we can add line 241 # numbers 242 243 rawtext = parser.gen_toc() 244 245 include_lines = statemachine.string2lines(rawtext, tab_width, 246 convert_whitespace=True) 247 248 # Append line numbers data 249 250 startline = self.options.get('start-line', None) 251 252 result = ViewList() 253 if startline and startline > 0: 254 offset = startline - 1 255 else: 256 offset = 0 257 258 for ln, line in enumerate(include_lines, start=offset): 259 result.append(line, path, ln) 260 261 self.state_machine.insert_input(result, path) 262 263 return [] 264 265 def literal(self, path, tab_width, rawtext): 266 """Output a literal block""" 267 268 # Convert tabs to spaces, if `tab_width` is positive. 269 if tab_width >= 0: 270 text = rawtext.expandtabs(tab_width) 271 else: 272 text = rawtext 273 literal_block = nodes.literal_block(rawtext, source=path, 274 classes=self.options.get("class", [])) 275 literal_block.line = 1 276 self.add_name(literal_block) 277 if "number-lines" in self.options: 278 try: 279 startline = int(self.options["number-lines"] or 1) 280 except ValueError: 281 raise self.error(":number-lines: with non-integer start value") 282 endline = startline + len(include_lines) 283 if text.endswith("\n"): 284 text = text[:-1] 285 tokens = NumberLines([([], text)], startline, endline) 286 for classes, value in tokens: 287 if classes: 288 literal_block += nodes.inline(value, value, 289 classes=classes) 290 else: 291 literal_block += nodes.Text(value, value) 292 else: 293 literal_block += nodes.Text(text, text) 294 return [literal_block] 295 296 def code(self, path, tab_width): 297 """Output a code block""" 298 299 include_lines = statemachine.string2lines(rawtext, tab_width, 300 convert_whitespace=True) 301 302 self.options["source"] = path 303 codeblock = CodeBlock(self.name, 304 [self.options.pop("code")], # arguments 305 self.options, 306 include_lines, 307 self.lineno, 308 self.content_offset, 309 self.block_text, 310 self.state, 311 self.state_machine) 312 return codeblock.run() 313 314 def run(self): 315 """Include a file as part of the content of this reST file.""" 316 env = self.state.document.settings.env 317 path = os.path.realpath(os.path.expandvars(self.arguments[0])) 318 319 # to get a bit security back, prohibit /etc: 320 if path.startswith(os.sep + "etc"): 321 raise self.severe('Problems with "%s" directive, prohibited path: %s' % 322 (self.name, path)) 323 324 self.arguments[0] = path 325 326 env.note_dependency(os.path.abspath(path)) 327 328 # HINT: I had to copy&paste the whole Include.run method. I'am not happy 329 # with this, but due to security reasons, the Include.run method does 330 # not allow absolute or relative pathnames pointing to locations *above* 331 # the filesystem tree where the reST document is placed. 332 333 if not self.state.document.settings.file_insertion_enabled: 334 raise self.warning('"%s" directive disabled.' % self.name) 335 source = self.state_machine.input_lines.source(self.lineno - 336 self.state_machine.input_offset - 1) 337 source_dir = os.path.dirname(os.path.abspath(source)) 338 path = directives.path(self.arguments[0]) 339 if path.startswith("<") and path.endswith(">"): 340 path = os.path.join(self.standard_include_path, path[1:-1]) 341 path = os.path.normpath(os.path.join(source_dir, path)) 342 343 # HINT: this is the only line I had to change / commented out: 344 # path = utils.relative_path(None, path) 345 346 encoding = self.options.get("encoding", 347 self.state.document.settings.input_encoding) 348 tab_width = self.options.get("tab-width", 349 self.state.document.settings.tab_width) 350 351 # Get optional arguments to related to cross-references generation 352 if "generate-cross-refs" in self.options: 353 return self.xref_text(env, path, tab_width) 354 355 rawtext = self.read_rawtext(path, encoding) 356 rawtext = self.apply_range(rawtext) 357 358 if "code" in self.options: 359 return self.code(path, tab_width, rawtext) 360 361 return self.literal(path, tab_width, rawtext) 362 363# ============================================================================== 364 365reported = set() 366 367def check_missing_refs(app, env, node, contnode): 368 """Check broken refs for the files it creates xrefs""" 369 if not node.source: 370 return None 371 372 try: 373 xref_files = env._xref_files 374 except AttributeError: 375 logger.critical("FATAL: _xref_files not initialized!") 376 raise 377 378 # Only show missing references for kernel-include reference-parsed files 379 if node.source not in xref_files: 380 return None 381 382 target = node.get('reftarget', '') 383 domain = node.get('refdomain', 'std') 384 reftype = node.get('reftype', '') 385 386 msg = f"can't link to: {domain}:{reftype}:: {target}" 387 388 # Don't duplicate warnings 389 data = (node.source, msg) 390 if data in reported: 391 return None 392 reported.add(data) 393 394 logger.warning(msg, location=node, type='ref', subtype='missing') 395 396 return None 397 398def merge_xref_info(app, env, docnames, other): 399 """ 400 As each process modify env._xref_files, we need to merge them back. 401 """ 402 if not hasattr(other, "_xref_files"): 403 return 404 env._xref_files.update(getattr(other, "_xref_files", set())) 405 406def init_xref_docs(app, env, docnames): 407 """Initialize a list of files that we're generating cross references¨""" 408 app.env._xref_files = set() 409 410# ============================================================================== 411 412def setup(app): 413 """Setup Sphinx exension""" 414 415 app.connect("env-before-read-docs", init_xref_docs) 416 app.connect("env-merge-info", merge_xref_info) 417 app.add_directive("kernel-include", KernelInclude) 418 app.connect("missing-reference", check_missing_refs) 419 420 return { 421 "version": __version__, 422 "parallel_read_safe": True, 423 "parallel_write_safe": True, 424 } 425