xref: /linux/Documentation/sphinx/kfigure.py (revision 63740349eba78f242bcbf60d5244d7f2b2600853)
1# -*- coding: utf-8; mode: python -*-
2# SPDX-License-Identifier: GPL-2.0
3# pylint: disable=C0103, R0903, R0912, R0915
4"""
5    scalable figure and image handling
6    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7
8    Sphinx extension which implements scalable image handling.
9
10    :copyright:  Copyright (C) 2016  Markus Heiser
11    :license:    GPL Version 2, June 1991 see Linux/COPYING for details.
12
13    The build for image formats depend on image's source format and output's
14    destination format. This extension implement methods to simplify image
15    handling from the author's POV. Directives like ``kernel-figure`` implement
16    methods *to* always get the best output-format even if some tools are not
17    installed. For more details take a look at ``convert_image(...)`` which is
18    the core of all conversions.
19
20    * ``.. kernel-image``: for image handling / a ``.. image::`` replacement
21
22    * ``.. kernel-figure``: for figure handling / a ``.. figure::`` replacement
23
24    * ``.. kernel-render``: for render markup / a concept to embed *render*
25      markups (or languages). Supported markups (see ``RENDER_MARKUP_EXT``)
26
27      - ``DOT``: render embedded Graphviz's **DOC**
28      - ``SVG``: render embedded Scalable Vector Graphics (**SVG**)
29      - ... *developable*
30
31    Used tools:
32
33    * ``dot(1)``: Graphviz (https://www.graphviz.org). If Graphviz is not
34      available, the DOT language is inserted as literal-block.
35      For conversion to PDF, ``rsvg-convert(1)`` of librsvg
36      (https://gitlab.gnome.org/GNOME/librsvg) is used when available.
37
38    * SVG to PDF: To generate PDF, you need at least one of this tools:
39
40      - ``convert(1)``: ImageMagick (https://www.imagemagick.org)
41      - ``inkscape(1)``: Inkscape (https://inkscape.org/)
42
43    List of customizations:
44
45    * generate PDF from SVG / used by PDF (LaTeX) builder
46
47    * generate SVG (html-builder) and PDF (latex-builder) from DOT files.
48      DOT: see https://www.graphviz.org/content/dot-language
49
50    """
51
52import os
53from os import path
54import subprocess
55from hashlib import sha1
56import re
57from docutils import nodes
58from docutils.statemachine import ViewList
59from docutils.parsers.rst import directives
60from docutils.parsers.rst.directives import images
61import sphinx
62from sphinx.util.nodes import clean_astext
63from sphinx.util import logging
64
65Figure = images.Figure
66
67__version__  = '1.0.0'
68
69logger = logging.getLogger('kfigure')
70
71# simple helper
72# -------------
73
74def which(cmd):
75    """Searches the ``cmd`` in the ``PATH`` environment.
76
77    This *which* searches the PATH for executable ``cmd`` . First match is
78    returned, if nothing is found, ``None` is returned.
79    """
80    envpath = os.environ.get('PATH', None) or os.defpath
81    for folder in envpath.split(os.pathsep):
82        fname = folder + os.sep + cmd
83        if path.isfile(fname):
84            return fname
85
86def mkdir(folder, mode=0o775):
87    if not path.isdir(folder):
88        os.makedirs(folder, mode)
89
90def file2literal(fname):
91    with open(fname, "r") as src:
92        data = src.read()
93        node = nodes.literal_block(data, data)
94    return node
95
96def isNewer(path1, path2):
97    """Returns True if ``path1`` is newer than ``path2``
98
99    If ``path1`` exists and is newer than ``path2`` the function returns
100    ``True`` is returned otherwise ``False``
101    """
102    return (path.exists(path1)
103            and os.stat(path1).st_ctime > os.stat(path2).st_ctime)
104
105def pass_handle(self, node):           # pylint: disable=W0613
106    pass
107
108# setup conversion tools and sphinx extension
109# -------------------------------------------
110
111# Graphviz's dot(1) support
112dot_cmd = None
113# dot(1) -Tpdf should be used
114dot_Tpdf = False
115
116# ImageMagick' convert(1) support
117convert_cmd = None
118
119# librsvg's rsvg-convert(1) support
120rsvg_convert_cmd = None
121
122# Inkscape's inkscape(1) support
123inkscape_cmd = None
124# Inkscape prior to 1.0 uses different command options
125inkscape_ver_one = False
126
127
128def setup(app):
129    # check toolchain first
130    app.connect('builder-inited', setupTools)
131
132    # image handling
133    app.add_directive("kernel-image",  KernelImage)
134    app.add_node(kernel_image,
135                 html    = (visit_kernel_image, pass_handle),
136                 latex   = (visit_kernel_image, pass_handle),
137                 texinfo = (visit_kernel_image, pass_handle),
138                 text    = (visit_kernel_image, pass_handle),
139                 man     = (visit_kernel_image, pass_handle), )
140
141    # figure handling
142    app.add_directive("kernel-figure", KernelFigure)
143    app.add_node(kernel_figure,
144                 html    = (visit_kernel_figure, pass_handle),
145                 latex   = (visit_kernel_figure, pass_handle),
146                 texinfo = (visit_kernel_figure, pass_handle),
147                 text    = (visit_kernel_figure, pass_handle),
148                 man     = (visit_kernel_figure, pass_handle), )
149
150    # render handling
151    app.add_directive('kernel-render', KernelRender)
152    app.add_node(kernel_render,
153                 html    = (visit_kernel_render, pass_handle),
154                 latex   = (visit_kernel_render, pass_handle),
155                 texinfo = (visit_kernel_render, pass_handle),
156                 text    = (visit_kernel_render, pass_handle),
157                 man     = (visit_kernel_render, pass_handle), )
158
159    app.connect('doctree-read', add_kernel_figure_to_std_domain)
160
161    return dict(
162        version = __version__,
163        parallel_read_safe = True,
164        parallel_write_safe = True
165    )
166
167
168def setupTools(app):
169    """
170    Check available build tools and log some *verbose* messages.
171
172    This function is called once, when the builder is initiated.
173    """
174    global dot_cmd, dot_Tpdf, convert_cmd, rsvg_convert_cmd   # pylint: disable=W0603
175    global inkscape_cmd, inkscape_ver_one  # pylint: disable=W0603
176    logger.verbose("kfigure: check installed tools ...")
177
178    dot_cmd = which('dot')
179    convert_cmd = which('convert')
180    rsvg_convert_cmd = which('rsvg-convert')
181    inkscape_cmd = which('inkscape')
182
183    if dot_cmd:
184        logger.verbose("use dot(1) from: " + dot_cmd)
185
186        try:
187            dot_Thelp_list = subprocess.check_output([dot_cmd, '-Thelp'],
188                                    stderr=subprocess.STDOUT)
189        except subprocess.CalledProcessError as err:
190            dot_Thelp_list = err.output
191            pass
192
193        dot_Tpdf_ptn = b'pdf'
194        dot_Tpdf = re.search(dot_Tpdf_ptn, dot_Thelp_list)
195    else:
196        logger.warning(
197            "dot(1) not found, for better output quality install graphviz from https://www.graphviz.org"
198        )
199    if inkscape_cmd:
200        logger.verbose("use inkscape(1) from: " + inkscape_cmd)
201        inkscape_ver = subprocess.check_output([inkscape_cmd, '--version'],
202                                               stderr=subprocess.DEVNULL)
203        ver_one_ptn = b'Inkscape 1'
204        inkscape_ver_one = re.search(ver_one_ptn, inkscape_ver)
205        convert_cmd = None
206        rsvg_convert_cmd = None
207        dot_Tpdf = False
208
209    else:
210        if convert_cmd:
211            logger.verbose("use convert(1) from: " + convert_cmd)
212        else:
213            logger.verbose(
214                "Neither inkscape(1) nor convert(1) found.\n"
215                "For SVG to PDF conversion, install either Inkscape (https://inkscape.org/) (preferred) or\n"
216                "ImageMagick (https://www.imagemagick.org)"
217            )
218
219        if rsvg_convert_cmd:
220            logger.verbose("use rsvg-convert(1) from: " + rsvg_convert_cmd)
221            logger.verbose("use 'dot -Tsvg' and rsvg-convert(1) for DOT -> PDF conversion")
222            dot_Tpdf = False
223        else:
224            logger.verbose(
225                "rsvg-convert(1) not found.\n"
226                "  SVG rendering of convert(1) is done by ImageMagick-native renderer."
227            )
228            if dot_Tpdf:
229                logger.verbose("use 'dot -Tpdf' for DOT -> PDF conversion")
230            else:
231                logger.verbose("use 'dot -Tsvg' and convert(1) for DOT -> PDF conversion")
232
233
234# integrate conversion tools
235# --------------------------
236
237RENDER_MARKUP_EXT = {
238    # The '.ext' must be handled by convert_image(..) function's *in_ext* input.
239    # <name> : <.ext>
240    'DOT' : '.dot',
241    'SVG' : '.svg'
242}
243
244def convert_image(img_node, translator, src_fname=None):
245    """Convert a image node for the builder.
246
247    Different builder prefer different image formats, e.g. *latex* builder
248    prefer PDF while *html* builder prefer SVG format for images.
249
250    This function handles output image formats in dependence of source the
251    format (of the image) and the translator's output format.
252    """
253    app = translator.builder.app
254
255    fname, in_ext = path.splitext(path.basename(img_node['uri']))
256    if src_fname is None:
257        src_fname = path.join(translator.builder.srcdir, img_node['uri'])
258        if not path.exists(src_fname):
259            src_fname = path.join(translator.builder.outdir, img_node['uri'])
260
261    dst_fname = None
262
263    # in kernel builds, use 'make SPHINXOPTS=-v' to see verbose messages
264
265    logger.verbose('assert best format for: ' + img_node['uri'])
266
267    if in_ext == '.dot':
268
269        if not dot_cmd:
270            logger.verbose("dot from graphviz not available / include DOT raw.")
271            img_node.replace_self(file2literal(src_fname))
272
273        elif translator.builder.format == 'latex':
274            dst_fname = path.join(translator.builder.outdir, fname + '.pdf')
275            img_node['uri'] = fname + '.pdf'
276            img_node['candidates'] = {'*': fname + '.pdf'}
277
278
279        elif translator.builder.format == 'html':
280            dst_fname = path.join(
281                translator.builder.outdir,
282                translator.builder.imagedir,
283                fname + '.svg')
284            img_node['uri'] = path.join(
285                translator.builder.imgpath, fname + '.svg')
286            img_node['candidates'] = {
287                '*': path.join(translator.builder.imgpath, fname + '.svg')}
288
289        else:
290            # all other builder formats will include DOT as raw
291            img_node.replace_self(file2literal(src_fname))
292
293    elif in_ext == '.svg':
294
295        if translator.builder.format == 'latex':
296            if not inkscape_cmd and convert_cmd is None:
297                logger.warning(
298                    "no SVG to PDF conversion available / include SVG raw.\n"
299                    "Including large raw SVGs can cause xelatex error.\n"
300                    "Install Inkscape (preferred) or ImageMagick."
301                )
302                img_node.replace_self(file2literal(src_fname))
303            else:
304                dst_fname = path.join(translator.builder.outdir, fname + '.pdf')
305                img_node['uri'] = fname + '.pdf'
306                img_node['candidates'] = {'*': fname + '.pdf'}
307
308    if dst_fname:
309        # the builder needs not to copy one more time, so pop it if exists.
310        translator.builder.images.pop(img_node['uri'], None)
311        _name = dst_fname[len(str(translator.builder.outdir)) + 1:]
312
313        if isNewer(dst_fname, src_fname):
314            logger.verbose("convert: {out}/%s already exists and is newer" % _name)
315
316        else:
317            ok = False
318            mkdir(path.dirname(dst_fname))
319
320            if in_ext == '.dot':
321                logger.verbose('convert DOT to: {out}/' + _name)
322                if translator.builder.format == 'latex' and not dot_Tpdf:
323                    svg_fname = path.join(translator.builder.outdir, fname + '.svg')
324                    ok1 = dot2format(app, src_fname, svg_fname)
325                    ok2 = svg2pdf_by_rsvg(app, svg_fname, dst_fname)
326                    ok = ok1 and ok2
327
328                else:
329                    ok = dot2format(app, src_fname, dst_fname)
330
331            elif in_ext == '.svg':
332                logger.verbose('convert SVG to: {out}/' + _name)
333                ok = svg2pdf(app, src_fname, dst_fname)
334
335            if not ok:
336                img_node.replace_self(file2literal(src_fname))
337
338
339def dot2format(app, dot_fname, out_fname):
340    """Converts DOT file to ``out_fname`` using ``dot(1)``.
341
342    * ``dot_fname`` pathname of the input DOT file, including extension ``.dot``
343    * ``out_fname`` pathname of the output file, including format extension
344
345    The *format extension* depends on the ``dot`` command (see ``man dot``
346    option ``-Txxx``). Normally you will use one of the following extensions:
347
348    - ``.ps`` for PostScript,
349    - ``.svg`` or ``svgz`` for Structured Vector Graphics,
350    - ``.fig`` for XFIG graphics and
351    - ``.png`` or ``gif`` for common bitmap graphics.
352
353    """
354    out_format = path.splitext(out_fname)[1][1:]
355    cmd = [dot_cmd, '-T%s' % out_format, dot_fname]
356    exit_code = 42
357
358    with open(out_fname, "w") as out:
359        exit_code = subprocess.call(cmd, stdout = out)
360        if exit_code != 0:
361            logger.warning(
362                          "Error #%d when calling: %s" % (exit_code, " ".join(cmd)))
363    return bool(exit_code == 0)
364
365def svg2pdf(app, svg_fname, pdf_fname):
366    """Converts SVG to PDF with ``inkscape(1)`` or ``convert(1)`` command.
367
368    Uses ``inkscape(1)`` from Inkscape (https://inkscape.org/) or ``convert(1)``
369    from ImageMagick (https://www.imagemagick.org) for conversion.
370    Returns ``True`` on success and ``False`` if an error occurred.
371
372    * ``svg_fname`` pathname of the input SVG file with extension (``.svg``)
373    * ``pdf_name``  pathname of the output PDF file with extension (``.pdf``)
374
375    """
376    cmd = [convert_cmd, svg_fname, pdf_fname]
377    cmd_name = 'convert(1)'
378
379    if inkscape_cmd:
380        cmd_name = 'inkscape(1)'
381        if inkscape_ver_one:
382            cmd = [inkscape_cmd, '-o', pdf_fname, svg_fname]
383        else:
384            cmd = [inkscape_cmd, '-z', '--export-pdf=%s' % pdf_fname, svg_fname]
385
386    try:
387        warning_msg = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
388        exit_code = 0
389    except subprocess.CalledProcessError as err:
390        warning_msg = err.output
391        exit_code = err.returncode
392        pass
393
394    if exit_code != 0:
395        logger.warning("Error #%d when calling: %s" %
396                            (exit_code, " ".join(cmd)))
397        if warning_msg:
398            logger.warning( "Warning msg from %s: %s" %
399                                (cmd_name, str(warning_msg, 'utf-8')))
400    elif warning_msg:
401        logger.verbose("Warning msg from %s (likely harmless):\n%s" %
402                            (cmd_name, str(warning_msg, 'utf-8')))
403
404    return bool(exit_code == 0)
405
406def svg2pdf_by_rsvg(app, svg_fname, pdf_fname):
407    """Convert SVG to PDF with ``rsvg-convert(1)`` command.
408
409    * ``svg_fname`` pathname of input SVG file, including extension ``.svg``
410    * ``pdf_fname`` pathname of output PDF file, including extension ``.pdf``
411
412    Input SVG file should be the one generated by ``dot2format()``.
413    SVG -> PDF conversion is done by ``rsvg-convert(1)``.
414
415    If ``rsvg-convert(1)`` is unavailable, fall back to ``svg2pdf()``.
416
417    """
418
419    if rsvg_convert_cmd is None:
420        ok = svg2pdf(app, svg_fname, pdf_fname)
421    else:
422        cmd = [rsvg_convert_cmd, '--format=pdf', '-o', pdf_fname, svg_fname]
423        # use stdout and stderr from parent
424        exit_code = subprocess.call(cmd)
425        if exit_code != 0:
426            logger.warning("Error #%d when calling: %s" %
427                                (exit_code, " ".join(cmd)))
428        ok = bool(exit_code == 0)
429
430    return ok
431
432
433# image handling
434# ---------------------
435
436def visit_kernel_image(self, node):    # pylint: disable=W0613
437    """Visitor of the ``kernel_image`` Node.
438
439    Handles the ``image`` child-node with the ``convert_image(...)``.
440    """
441    img_node = node[0]
442    convert_image(img_node, self)
443
444class kernel_image(nodes.image):
445    """Node for ``kernel-image`` directive."""
446    pass
447
448class KernelImage(images.Image):
449    """KernelImage directive
450
451    Earns everything from ``.. image::`` directive, except *remote URI* and
452    *glob* pattern. The KernelImage wraps a image node into a
453    kernel_image node. See ``visit_kernel_image``.
454    """
455
456    def run(self):
457        uri = self.arguments[0]
458        if uri.endswith('.*') or uri.find('://') != -1:
459            raise self.severe(
460                'Error in "%s: %s": glob pattern and remote images are not allowed'
461                % (self.name, uri))
462        result = images.Image.run(self)
463        if len(result) == 2 or isinstance(result[0], nodes.system_message):
464            return result
465        (image_node,) = result
466        # wrap image node into a kernel_image node / see visitors
467        node = kernel_image('', image_node)
468        return [node]
469
470# figure handling
471# ---------------------
472
473def visit_kernel_figure(self, node):   # pylint: disable=W0613
474    """Visitor of the ``kernel_figure`` Node.
475
476    Handles the ``image`` child-node with the ``convert_image(...)``.
477    """
478    img_node = node[0][0]
479    convert_image(img_node, self)
480
481class kernel_figure(nodes.figure):
482    """Node for ``kernel-figure`` directive."""
483
484class KernelFigure(Figure):
485    """KernelImage directive
486
487    Earns everything from ``.. figure::`` directive, except *remote URI* and
488    *glob* pattern.  The KernelFigure wraps a figure node into a kernel_figure
489    node. See ``visit_kernel_figure``.
490    """
491
492    def run(self):
493        uri = self.arguments[0]
494        if uri.endswith('.*') or uri.find('://') != -1:
495            raise self.severe(
496                'Error in "%s: %s":'
497                ' glob pattern and remote images are not allowed'
498                % (self.name, uri))
499        result = Figure.run(self)
500        if len(result) == 2 or isinstance(result[0], nodes.system_message):
501            return result
502        (figure_node,) = result
503        # wrap figure node into a kernel_figure node / see visitors
504        node = kernel_figure('', figure_node)
505        return [node]
506
507
508# render handling
509# ---------------------
510
511def visit_kernel_render(self, node):
512    """Visitor of the ``kernel_render`` Node.
513
514    If rendering tools available, save the markup of the ``literal_block`` child
515    node into a file and replace the ``literal_block`` node with a new created
516    ``image`` node, pointing to the saved markup file. Afterwards, handle the
517    image child-node with the ``convert_image(...)``.
518    """
519    app = self.builder.app
520    srclang = node.get('srclang')
521
522    logger.verbose('visit kernel-render node lang: "%s"' % srclang)
523
524    tmp_ext = RENDER_MARKUP_EXT.get(srclang, None)
525    if tmp_ext is None:
526        logger.warning( 'kernel-render: "%s" unknown / include raw.' % srclang)
527        return
528
529    if not dot_cmd and tmp_ext == '.dot':
530        logger.verbose("dot from graphviz not available / include raw.")
531        return
532
533    literal_block = node[0]
534
535    code      = literal_block.astext()
536    hashobj   = code.encode('utf-8') #  str(node.attributes)
537    fname     = path.join('%s-%s' % (srclang, sha1(hashobj).hexdigest()))
538
539    tmp_fname = path.join(
540        self.builder.outdir, self.builder.imagedir, fname + tmp_ext)
541
542    if not path.isfile(tmp_fname):
543        mkdir(path.dirname(tmp_fname))
544        with open(tmp_fname, "w") as out:
545            out.write(code)
546
547    img_node = nodes.image(node.rawsource, **node.attributes)
548    img_node['uri'] = path.join(self.builder.imgpath, fname + tmp_ext)
549    img_node['candidates'] = {
550        '*': path.join(self.builder.imgpath, fname + tmp_ext)}
551
552    literal_block.replace_self(img_node)
553    convert_image(img_node, self, tmp_fname)
554
555
556class kernel_render(nodes.General, nodes.Inline, nodes.Element):
557    """Node for ``kernel-render`` directive."""
558    pass
559
560class KernelRender(Figure):
561    """KernelRender directive
562
563    Render content by external tool.  Has all the options known from the
564    *figure*  directive, plus option ``caption``.  If ``caption`` has a
565    value, a figure node with the *caption* is inserted. If not, a image node is
566    inserted.
567
568    The KernelRender directive wraps the text of the directive into a
569    literal_block node and wraps it into a kernel_render node. See
570    ``visit_kernel_render``.
571    """
572    has_content = True
573    required_arguments = 1
574    optional_arguments = 0
575    final_argument_whitespace = False
576
577    # earn options from 'figure'
578    option_spec = Figure.option_spec.copy()
579    option_spec['caption'] = directives.unchanged
580
581    def run(self):
582        return [self.build_node()]
583
584    def build_node(self):
585
586        srclang = self.arguments[0].strip()
587        if srclang not in RENDER_MARKUP_EXT.keys():
588            return [self.state_machine.reporter.warning(
589                'Unknown source language "%s", use one of: %s.' % (
590                    srclang, ",".join(RENDER_MARKUP_EXT.keys())),
591                line=self.lineno)]
592
593        code = '\n'.join(self.content)
594        if not code.strip():
595            return [self.state_machine.reporter.warning(
596                'Ignoring "%s" directive without content.' % (
597                    self.name),
598                line=self.lineno)]
599
600        node = kernel_render()
601        node['alt'] = self.options.get('alt','')
602        node['srclang'] = srclang
603        literal_node = nodes.literal_block(code, code)
604        node += literal_node
605
606        caption = self.options.get('caption')
607        if caption:
608            # parse caption's content
609            parsed = nodes.Element()
610            self.state.nested_parse(
611                ViewList([caption], source=''), self.content_offset, parsed)
612            caption_node = nodes.caption(
613                parsed[0].rawsource, '', *parsed[0].children)
614            caption_node.source = parsed[0].source
615            caption_node.line = parsed[0].line
616
617            figure_node = nodes.figure('', node)
618            for k,v in self.options.items():
619                figure_node[k] = v
620            figure_node += caption_node
621
622            node = figure_node
623
624        return node
625
626def add_kernel_figure_to_std_domain(app, doctree):
627    """Add kernel-figure anchors to 'std' domain.
628
629    The ``StandardDomain.process_doc(..)`` method does not know how to resolve
630    the caption (label) of ``kernel-figure`` directive (it only knows about
631    standard nodes, e.g. table, figure etc.). Without any additional handling
632    this will result in a 'undefined label' for kernel-figures.
633
634    This handle adds labels of kernel-figure to the 'std' domain labels.
635    """
636
637    std = app.env.domains["std"]
638    docname = app.env.docname
639    labels = std.data["labels"]
640
641    for name, explicit in doctree.nametypes.items():
642        if not explicit:
643            continue
644        labelid = doctree.nameids[name]
645        if labelid is None:
646            continue
647        node = doctree.ids[labelid]
648
649        if node.tagname == 'kernel_figure':
650            for n in node.next_node():
651                if n.tagname == 'caption':
652                    sectname = clean_astext(n)
653                    # add label to std domain
654                    labels[name] = docname, labelid, sectname
655                    break
656