1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# pylint: disable=R0903, R0912, R0914, R0915, C0209,W0707 4 5 6""" 7Implementation of the ``kernel-include`` reST-directive. 8 9:copyright: Copyright (C) 2016 Markus Heiser 10:license: GPL Version 2, June 1991 see linux/COPYING for details. 11 12The ``kernel-include`` reST-directive is a replacement for the ``include`` 13directive. The ``kernel-include`` directive expand environment variables in 14the path name and allows to include files from arbitrary locations. 15 16.. hint:: 17 18 Including files from arbitrary locations (e.g. from ``/etc``) is a 19 security risk for builders. This is why the ``include`` directive from 20 docutils *prohibit* pathnames pointing to locations *above* the filesystem 21 tree where the reST document with the include directive is placed. 22 23Substrings of the form $name or ${name} are replaced by the value of 24environment variable name. Malformed variable names and references to 25non-existing variables are left unchanged. 26 27**Supported Sphinx Include Options**: 28 29:param literal: 30 If present, the included file is inserted as a literal block. 31 32:param code: 33 Specify the language for syntax highlighting (e.g., 'c', 'python'). 34 35:param encoding: 36 Specify the encoding of the included file (default: 'utf-8'). 37 38:param tab-width: 39 Specify the number of spaces that a tab represents. 40 41:param start-line: 42 Line number at which to start including the file (1-based). 43 44:param end-line: 45 Line number at which to stop including the file (inclusive). 46 47:param start-after: 48 Include lines after the first line matching this text. 49 50:param end-before: 51 Include lines before the first line matching this text. 52 53:param number-lines: 54 Number the included lines (integer specifies start number). 55 Only effective with 'literal' or 'code' options. 56 57:param class: 58 Specify HTML class attribute for the included content. 59 60**Kernel-specific Extensions**: 61 62:param generate-cross-refs: 63 If present, instead of directly including the file, it calls 64 ParseDataStructs() to convert C data structures into cross-references 65 that link to comprehensive documentation in other ReST files. 66 67:param exception-file: 68 (Used with generate-cross-refs) 69 70 Path to a file containing rules for handling special cases: 71 - Ignore specific C data structures 72 - Use alternative reference names 73 - Specify different reference types 74 75:param warn-broken: 76 (Used with generate-cross-refs) 77 78 Enables warnings when auto-generated cross-references don't point to 79 existing documentation targets. 80""" 81 82# ============================================================================== 83# imports 84# ============================================================================== 85 86import os.path 87import re 88import sys 89 90from difflib import get_close_matches 91 92from docutils import io, nodes, statemachine 93from docutils.statemachine import ViewList 94from docutils.parsers.rst import Directive, directives 95from docutils.parsers.rst.directives.body import CodeBlock, NumberLines 96 97from sphinx.util import logging 98 99srctree = os.path.abspath(os.environ["srctree"]) 100sys.path.insert(0, os.path.join(srctree, "tools/lib/python")) 101 102from kdoc.parse_data_structs import ParseDataStructs 103 104__version__ = "1.0" 105logger = logging.getLogger(__name__) 106 107RE_DOMAIN_REF = re.compile(r'\\ :(ref|c:type|c:func):`([^<`]+)(?:<([^>]+)>)?`\\') 108RE_SIMPLE_REF = re.compile(r'`([^`]+)`') 109RE_LINENO_REF = re.compile(r'^\s*-\s+LINENO_(\d+):\s+(.*)') 110RE_SPLIT_DOMAIN = re.compile(r"(.*)\.(.*)") 111 112def ErrorString(exc): # Shamelessly stolen from docutils 113 return f'{exc.__class__.__name}: {exc}' 114 115 116# ============================================================================== 117class KernelInclude(Directive): 118 """ 119 KernelInclude (``kernel-include``) directive 120 121 Most of the stuff here came from Include directive defined at: 122 docutils/parsers/rst/directives/misc.py 123 124 Yet, overriding the class don't has any benefits: the original class 125 only have run() and argument list. Not all of them are implemented, 126 when checked against latest Sphinx version, as with time more arguments 127 were added. 128 129 So, keep its own list of supported arguments 130 """ 131 132 required_arguments = 1 133 optional_arguments = 0 134 final_argument_whitespace = True 135 option_spec = { 136 'literal': directives.flag, 137 'code': directives.unchanged, 138 'encoding': directives.encoding, 139 'tab-width': int, 140 'start-line': int, 141 'end-line': int, 142 'start-after': directives.unchanged_required, 143 'end-before': directives.unchanged_required, 144 # ignored except for 'literal' or 'code': 145 'number-lines': directives.unchanged, # integer or None 146 'class': directives.class_option, 147 148 # Arguments that aren't from Sphinx Include directive 149 'generate-cross-refs': directives.flag, 150 'warn-broken': directives.flag, 151 'toc': directives.flag, 152 'exception-file': directives.unchanged, 153 } 154 155 def read_rawtext(self, path, encoding): 156 """Read and process file content with error handling""" 157 try: 158 self.state.document.settings.record_dependencies.add(path) 159 include_file = io.FileInput(source_path=path, 160 encoding=encoding, 161 error_handler=self.state.document.settings.input_encoding_error_handler) 162 except UnicodeEncodeError: 163 raise self.severe('Problems with directive path:\n' 164 'Cannot encode input file path "%s" ' 165 '(wrong locale?).' % path) 166 except IOError as error: 167 raise self.severe('Problems with directive path:\n%s.' % ErrorString(error)) 168 169 try: 170 return include_file.read() 171 except UnicodeError as error: 172 raise self.severe('Problem with directive:\n%s' % ErrorString(error)) 173 174 def apply_range(self, rawtext): 175 """ 176 Handles start-line, end-line, start-after and end-before parameters 177 """ 178 179 # Get to-be-included content 180 startline = self.options.get('start-line', None) 181 endline = self.options.get('end-line', None) 182 try: 183 if startline or (endline is not None): 184 lines = rawtext.splitlines() 185 rawtext = '\n'.join(lines[startline:endline]) 186 except UnicodeError as error: 187 raise self.severe(f'Problem with "{self.name}" directive:\n' 188 + io.error_string(error)) 189 # start-after/end-before: no restrictions on newlines in match-text, 190 # and no restrictions on matching inside lines vs. line boundaries 191 after_text = self.options.get("start-after", None) 192 if after_text: 193 # skip content in rawtext before *and incl.* a matching text 194 after_index = rawtext.find(after_text) 195 if after_index < 0: 196 raise self.severe('Problem with "start-after" option of "%s" ' 197 "directive:\nText not found." % self.name) 198 rawtext = rawtext[after_index + len(after_text) :] 199 before_text = self.options.get("end-before", None) 200 if before_text: 201 # skip content in rawtext after *and incl.* a matching text 202 before_index = rawtext.find(before_text) 203 if before_index < 0: 204 raise self.severe('Problem with "end-before" option of "%s" ' 205 "directive:\nText not found." % self.name) 206 rawtext = rawtext[:before_index] 207 208 return rawtext 209 210 def xref_text(self, env, path, tab_width): 211 """ 212 Read and add contents from a C file parsed to have cross references. 213 214 There are two types of supported output here: 215 - A C source code with cross-references; 216 - a TOC table containing cross references. 217 """ 218 parser = ParseDataStructs() 219 220 if 'exception-file' in self.options: 221 source_dir = os.path.dirname(os.path.abspath( 222 self.state_machine.input_lines.source( 223 self.lineno - self.state_machine.input_offset - 1))) 224 exceptions_file = os.path.join(source_dir, self.options['exception-file']) 225 else: 226 exceptions_file = None 227 228 parser.parse_file(path, exceptions_file) 229 230 # Store references on a symbol dict to be used at check time 231 if 'warn-broken' in self.options: 232 env._xref_files.add(path) 233 234 if "toc" not in self.options: 235 236 rawtext = ".. parsed-literal::\n\n" + parser.gen_output() 237 self.apply_range(rawtext) 238 239 include_lines = statemachine.string2lines(rawtext, tab_width, 240 convert_whitespace=True) 241 242 # Sphinx always blame the ".. <directive>", so placing 243 # line numbers here won't make any difference 244 245 self.state_machine.insert_input(include_lines, path) 246 return [] 247 248 # TOC output is a ReST file, not a literal. So, we can add line 249 # numbers 250 251 startline = self.options.get('start-line', None) 252 endline = self.options.get('end-line', None) 253 254 relpath = os.path.relpath(path, srctree) 255 256 result = ViewList() 257 for line in parser.gen_toc().split("\n"): 258 match = RE_LINENO_REF.match(line) 259 if not match: 260 result.append(line, path) 261 continue 262 263 ln, ref = match.groups() 264 ln = int(ln) 265 266 # Filter line range if needed 267 if startline and (ln < startline): 268 continue 269 270 if endline and (ln > endline): 271 continue 272 273 # Sphinx numerates starting with zero, but text editors 274 # and other tools start from one 275 realln = ln + 1 276 result.append(f"- {ref}: {relpath}#{realln}", path, ln) 277 278 self.state_machine.insert_input(result, path) 279 280 return [] 281 282 def literal(self, path, tab_width, rawtext): 283 """Output a literal block""" 284 285 # Convert tabs to spaces, if `tab_width` is positive. 286 if tab_width >= 0: 287 text = rawtext.expandtabs(tab_width) 288 else: 289 text = rawtext 290 literal_block = nodes.literal_block(rawtext, source=path, 291 classes=self.options.get("class", [])) 292 literal_block.line = 1 293 self.add_name(literal_block) 294 if "number-lines" in self.options: 295 try: 296 startline = int(self.options["number-lines"] or 1) 297 except ValueError: 298 raise self.error(":number-lines: with non-integer start value") 299 endline = startline + len(include_lines) 300 if text.endswith("\n"): 301 text = text[:-1] 302 tokens = NumberLines([([], text)], startline, endline) 303 for classes, value in tokens: 304 if classes: 305 literal_block += nodes.inline(value, value, 306 classes=classes) 307 else: 308 literal_block += nodes.Text(value, value) 309 else: 310 literal_block += nodes.Text(text, text) 311 return [literal_block] 312 313 def code(self, path, tab_width): 314 """Output a code block""" 315 316 include_lines = statemachine.string2lines(rawtext, tab_width, 317 convert_whitespace=True) 318 319 self.options["source"] = path 320 codeblock = CodeBlock(self.name, 321 [self.options.pop("code")], # arguments 322 self.options, 323 include_lines, 324 self.lineno, 325 self.content_offset, 326 self.block_text, 327 self.state, 328 self.state_machine) 329 return codeblock.run() 330 331 def run(self): 332 """Include a file as part of the content of this reST file.""" 333 env = self.state.document.settings.env 334 335 # 336 # The include logic accepts only patches relative to the 337 # Kernel source tree. The logic does check it to prevent 338 # directory traverse issues. 339 # 340 341 srctree = os.path.abspath(os.environ["srctree"]) 342 343 path = os.path.expandvars(self.arguments[0]) 344 src_path = os.path.join(srctree, path) 345 346 if os.path.isfile(src_path): 347 base = srctree 348 path = src_path 349 else: 350 raise self.warning(f'File "%s" doesn\'t exist', path) 351 352 abs_base = os.path.abspath(base) 353 abs_full_path = os.path.abspath(os.path.join(base, path)) 354 355 try: 356 if os.path.commonpath([abs_full_path, abs_base]) != abs_base: 357 raise self.severe('Problems with "%s" directive, prohibited path: %s' % 358 (self.name, path)) 359 except ValueError: 360 # Paths don't have the same drive (Windows) or other incompatibility 361 raise self.severe('Problems with "%s" directive, invalid path: %s' % 362 (self.name, path)) 363 364 self.arguments[0] = path 365 366 # 367 # Add path location to Sphinx dependencies to ensure proper cache 368 # invalidation check. 369 # 370 371 env.note_dependency(os.path.abspath(path)) 372 373 if not self.state.document.settings.file_insertion_enabled: 374 raise self.warning('"%s" directive disabled.' % self.name) 375 source = self.state_machine.input_lines.source(self.lineno - 376 self.state_machine.input_offset - 1) 377 source_dir = os.path.dirname(os.path.abspath(source)) 378 path = directives.path(self.arguments[0]) 379 if path.startswith("<") and path.endswith(">"): 380 path = os.path.join(self.standard_include_path, path[1:-1]) 381 path = os.path.normpath(os.path.join(source_dir, path)) 382 383 # HINT: this is the only line I had to change / commented out: 384 # path = utils.relative_path(None, path) 385 386 encoding = self.options.get("encoding", 387 self.state.document.settings.input_encoding) 388 tab_width = self.options.get("tab-width", 389 self.state.document.settings.tab_width) 390 391 # Get optional arguments to related to cross-references generation 392 if "generate-cross-refs" in self.options: 393 return self.xref_text(env, path, tab_width) 394 395 rawtext = self.read_rawtext(path, encoding) 396 rawtext = self.apply_range(rawtext) 397 398 if "code" in self.options: 399 return self.code(path, tab_width, rawtext) 400 401 return self.literal(path, tab_width, rawtext) 402 403# ============================================================================== 404 405reported = set() 406DOMAIN_INFO = {} 407all_refs = {} 408 409def fill_domain_info(env): 410 """ 411 Get supported reference types for each Sphinx domain and C namespaces 412 """ 413 if DOMAIN_INFO: 414 return 415 416 for domain_name, domain_instance in env.domains.items(): 417 try: 418 object_types = list(domain_instance.object_types.keys()) 419 DOMAIN_INFO[domain_name] = object_types 420 except AttributeError: 421 # Ignore domains that we can't retrieve object types, if any 422 pass 423 424 for domain in DOMAIN_INFO.keys(): 425 domain_obj = env.get_domain(domain) 426 for name, dispname, objtype, docname, anchor, priority in domain_obj.get_objects(): 427 ref_name = name.lower() 428 429 if domain == "c": 430 if '.' in ref_name: 431 ref_name = ref_name.split(".")[-1] 432 433 if not ref_name in all_refs: 434 all_refs[ref_name] = [] 435 436 all_refs[ref_name].append(f"\t{domain}:{objtype}:`{name}` (from {docname})") 437 438def get_suggestions(app, env, node, 439 original_target, original_domain, original_reftype): 440 """Check if target exists in the other domain or with different reftypes.""" 441 original_target = original_target.lower() 442 443 # Remove namespace if present 444 if original_domain == "c": 445 if '.' in original_target: 446 original_target = original_target.split(".")[-1] 447 448 suggestions = [] 449 450 # If name exists, propose exact name match on different domains 451 if original_target in all_refs: 452 return all_refs[original_target] 453 454 # If not found, get a close match, using difflib. 455 # Such method is based on Ratcliff-Obershelp Algorithm, which seeks 456 # for a close match within a certain distance. We're using the defaults 457 # here, e.g. cutoff=0.6, proposing 3 alternatives 458 matches = get_close_matches(original_target, all_refs.keys()) 459 for match in matches: 460 suggestions += all_refs[match] 461 462 return suggestions 463 464def check_missing_refs(app, env, node, contnode): 465 """Check broken refs for the files it creates xrefs""" 466 if not node.source: 467 return None 468 469 try: 470 xref_files = env._xref_files 471 except AttributeError: 472 logger.critical("FATAL: _xref_files not initialized!") 473 raise 474 475 # Only show missing references for kernel-include reference-parsed files 476 if node.source not in xref_files: 477 return None 478 479 fill_domain_info(env) 480 481 target = node.get('reftarget', '') 482 domain = node.get('refdomain', 'std') 483 reftype = node.get('reftype', '') 484 485 msg = f"Invalid xref: {domain}:{reftype}:`{target}`" 486 487 # Don't duplicate warnings 488 data = (node.source, msg) 489 if data in reported: 490 return None 491 reported.add(data) 492 493 suggestions = get_suggestions(app, env, node, target, domain, reftype) 494 if suggestions: 495 msg += ". Possible alternatives:\n" + '\n'.join(suggestions) 496 497 logger.warning(msg, location=node, type='ref', subtype='missing') 498 499 return None 500 501def merge_xref_info(app, env, docnames, other): 502 """ 503 As each process modify env._xref_files, we need to merge them back. 504 """ 505 if not hasattr(other, "_xref_files"): 506 return 507 env._xref_files.update(getattr(other, "_xref_files", set())) 508 509def init_xref_docs(app, env, docnames): 510 """Initialize a list of files that we're generating cross references¨""" 511 app.env._xref_files = set() 512 513# ============================================================================== 514 515def setup(app): 516 """Setup Sphinx exension""" 517 518 app.connect("env-before-read-docs", init_xref_docs) 519 app.connect("env-merge-info", merge_xref_info) 520 app.add_directive("kernel-include", KernelInclude) 521 app.connect("missing-reference", check_missing_refs) 522 523 return { 524 "version": __version__, 525 "parallel_read_safe": True, 526 "parallel_write_safe": True, 527 } 528