xref: /linux/Documentation/sphinx/kfigure.py (revision e7d759f31ca295d589f7420719c311870bb3166f)
1# -*- coding: utf-8; mode: python -*-
2# pylint: disable=C0103, R0903, R0912, R0915
3u"""
4    scalable figure and image handling
5    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6
7    Sphinx extension which implements scalable image handling.
8
9    :copyright:  Copyright (C) 2016  Markus Heiser
10    :license:    GPL Version 2, June 1991 see Linux/COPYING for details.
11
12    The build for image formats depend on image's source format and output's
13    destination format. This extension implement methods to simplify image
14    handling from the author's POV. Directives like ``kernel-figure`` implement
15    methods *to* always get the best output-format even if some tools are not
16    installed. For more details take a look at ``convert_image(...)`` which is
17    the core of all conversions.
18
19    * ``.. kernel-image``: for image handling / a ``.. image::`` replacement
20
21    * ``.. kernel-figure``: for figure handling / a ``.. figure::`` replacement
22
23    * ``.. kernel-render``: for render markup / a concept to embed *render*
24      markups (or languages). Supported markups (see ``RENDER_MARKUP_EXT``)
25
26      - ``DOT``: render embedded Graphviz's **DOC**
27      - ``SVG``: render embedded Scalable Vector Graphics (**SVG**)
28      - ... *developable*
29
30    Used tools:
31
32    * ``dot(1)``: Graphviz (https://www.graphviz.org). If Graphviz is not
33      available, the DOT language is inserted as literal-block.
34      For conversion to PDF, ``rsvg-convert(1)`` of librsvg
35      (https://gitlab.gnome.org/GNOME/librsvg) is used when available.
36
37    * SVG to PDF: To generate PDF, you need at least one of this tools:
38
39      - ``convert(1)``: ImageMagick (https://www.imagemagick.org)
40      - ``inkscape(1)``: Inkscape (https://inkscape.org/)
41
42    List of customizations:
43
44    * generate PDF from SVG / used by PDF (LaTeX) builder
45
46    * generate SVG (html-builder) and PDF (latex-builder) from DOT files.
47      DOT: see https://www.graphviz.org/content/dot-language
48
49    """
50
51import os
52from os import path
53import subprocess
54from hashlib import sha1
55import re
56from docutils import nodes
57from docutils.statemachine import ViewList
58from docutils.parsers.rst import directives
59from docutils.parsers.rst.directives import images
60import sphinx
61from sphinx.util.nodes import clean_astext
62import kernellog
63
64Figure = images.Figure
65
66__version__  = '1.0.0'
67
68# simple helper
69# -------------
70
71def which(cmd):
72    """Searches the ``cmd`` in the ``PATH`` environment.
73
74    This *which* searches the PATH for executable ``cmd`` . First match is
75    returned, if nothing is found, ``None` is returned.
76    """
77    envpath = os.environ.get('PATH', None) or os.defpath
78    for folder in envpath.split(os.pathsep):
79        fname = folder + os.sep + cmd
80        if path.isfile(fname):
81            return fname
82
83def mkdir(folder, mode=0o775):
84    if not path.isdir(folder):
85        os.makedirs(folder, mode)
86
87def file2literal(fname):
88    with open(fname, "r") as src:
89        data = src.read()
90        node = nodes.literal_block(data, data)
91    return node
92
93def isNewer(path1, path2):
94    """Returns True if ``path1`` is newer than ``path2``
95
96    If ``path1`` exists and is newer than ``path2`` the function returns
97    ``True`` is returned otherwise ``False``
98    """
99    return (path.exists(path1)
100            and os.stat(path1).st_ctime > os.stat(path2).st_ctime)
101
102def pass_handle(self, node):           # pylint: disable=W0613
103    pass
104
105# setup conversion tools and sphinx extension
106# -------------------------------------------
107
108# Graphviz's dot(1) support
109dot_cmd = None
110# dot(1) -Tpdf should be used
111dot_Tpdf = False
112
113# ImageMagick' convert(1) support
114convert_cmd = None
115
116# librsvg's rsvg-convert(1) support
117rsvg_convert_cmd = None
118
119# Inkscape's inkscape(1) support
120inkscape_cmd = None
121# Inkscape prior to 1.0 uses different command options
122inkscape_ver_one = False
123
124
125def setup(app):
126    # check toolchain first
127    app.connect('builder-inited', setupTools)
128
129    # image handling
130    app.add_directive("kernel-image",  KernelImage)
131    app.add_node(kernel_image,
132                 html    = (visit_kernel_image, pass_handle),
133                 latex   = (visit_kernel_image, pass_handle),
134                 texinfo = (visit_kernel_image, pass_handle),
135                 text    = (visit_kernel_image, pass_handle),
136                 man     = (visit_kernel_image, pass_handle), )
137
138    # figure handling
139    app.add_directive("kernel-figure", KernelFigure)
140    app.add_node(kernel_figure,
141                 html    = (visit_kernel_figure, pass_handle),
142                 latex   = (visit_kernel_figure, pass_handle),
143                 texinfo = (visit_kernel_figure, pass_handle),
144                 text    = (visit_kernel_figure, pass_handle),
145                 man     = (visit_kernel_figure, pass_handle), )
146
147    # render handling
148    app.add_directive('kernel-render', KernelRender)
149    app.add_node(kernel_render,
150                 html    = (visit_kernel_render, pass_handle),
151                 latex   = (visit_kernel_render, pass_handle),
152                 texinfo = (visit_kernel_render, pass_handle),
153                 text    = (visit_kernel_render, pass_handle),
154                 man     = (visit_kernel_render, pass_handle), )
155
156    app.connect('doctree-read', add_kernel_figure_to_std_domain)
157
158    return dict(
159        version = __version__,
160        parallel_read_safe = True,
161        parallel_write_safe = True
162    )
163
164
165def setupTools(app):
166    u"""
167    Check available build tools and log some *verbose* messages.
168
169    This function is called once, when the builder is initiated.
170    """
171    global dot_cmd, dot_Tpdf, convert_cmd, rsvg_convert_cmd   # pylint: disable=W0603
172    global inkscape_cmd, inkscape_ver_one  # pylint: disable=W0603
173    kernellog.verbose(app, "kfigure: check installed tools ...")
174
175    dot_cmd = which('dot')
176    convert_cmd = which('convert')
177    rsvg_convert_cmd = which('rsvg-convert')
178    inkscape_cmd = which('inkscape')
179
180    if dot_cmd:
181        kernellog.verbose(app, "use dot(1) from: " + dot_cmd)
182
183        try:
184            dot_Thelp_list = subprocess.check_output([dot_cmd, '-Thelp'],
185                                    stderr=subprocess.STDOUT)
186        except subprocess.CalledProcessError as err:
187            dot_Thelp_list = err.output
188            pass
189
190        dot_Tpdf_ptn = b'pdf'
191        dot_Tpdf = re.search(dot_Tpdf_ptn, dot_Thelp_list)
192    else:
193        kernellog.warn(app, "dot(1) not found, for better output quality install "
194                       "graphviz from https://www.graphviz.org")
195    if inkscape_cmd:
196        kernellog.verbose(app, "use inkscape(1) from: " + inkscape_cmd)
197        inkscape_ver = subprocess.check_output([inkscape_cmd, '--version'],
198                                               stderr=subprocess.DEVNULL)
199        ver_one_ptn = b'Inkscape 1'
200        inkscape_ver_one = re.search(ver_one_ptn, inkscape_ver)
201        convert_cmd = None
202        rsvg_convert_cmd = None
203        dot_Tpdf = False
204
205    else:
206        if convert_cmd:
207            kernellog.verbose(app, "use convert(1) from: " + convert_cmd)
208        else:
209            kernellog.verbose(app,
210                "Neither inkscape(1) nor convert(1) found.\n"
211                "For SVG to PDF conversion, "
212                "install either Inkscape (https://inkscape.org/) (preferred) or\n"
213                "ImageMagick (https://www.imagemagick.org)")
214
215        if rsvg_convert_cmd:
216            kernellog.verbose(app, "use rsvg-convert(1) from: " + rsvg_convert_cmd)
217            kernellog.verbose(app, "use 'dot -Tsvg' and rsvg-convert(1) for DOT -> PDF conversion")
218            dot_Tpdf = False
219        else:
220            kernellog.verbose(app,
221                "rsvg-convert(1) not found.\n"
222                "  SVG rendering of convert(1) is done by ImageMagick-native renderer.")
223            if dot_Tpdf:
224                kernellog.verbose(app, "use 'dot -Tpdf' for DOT -> PDF conversion")
225            else:
226                kernellog.verbose(app, "use 'dot -Tsvg' and convert(1) for DOT -> PDF conversion")
227
228
229# integrate conversion tools
230# --------------------------
231
232RENDER_MARKUP_EXT = {
233    # The '.ext' must be handled by convert_image(..) function's *in_ext* input.
234    # <name> : <.ext>
235    'DOT' : '.dot',
236    'SVG' : '.svg'
237}
238
239def convert_image(img_node, translator, src_fname=None):
240    """Convert a image node for the builder.
241
242    Different builder prefer different image formats, e.g. *latex* builder
243    prefer PDF while *html* builder prefer SVG format for images.
244
245    This function handles output image formats in dependence of source the
246    format (of the image) and the translator's output format.
247    """
248    app = translator.builder.app
249
250    fname, in_ext = path.splitext(path.basename(img_node['uri']))
251    if src_fname is None:
252        src_fname = path.join(translator.builder.srcdir, img_node['uri'])
253        if not path.exists(src_fname):
254            src_fname = path.join(translator.builder.outdir, img_node['uri'])
255
256    dst_fname = None
257
258    # in kernel builds, use 'make SPHINXOPTS=-v' to see verbose messages
259
260    kernellog.verbose(app, 'assert best format for: ' + img_node['uri'])
261
262    if in_ext == '.dot':
263
264        if not dot_cmd:
265            kernellog.verbose(app,
266                              "dot from graphviz not available / include DOT raw.")
267            img_node.replace_self(file2literal(src_fname))
268
269        elif translator.builder.format == 'latex':
270            dst_fname = path.join(translator.builder.outdir, fname + '.pdf')
271            img_node['uri'] = fname + '.pdf'
272            img_node['candidates'] = {'*': fname + '.pdf'}
273
274
275        elif translator.builder.format == 'html':
276            dst_fname = path.join(
277                translator.builder.outdir,
278                translator.builder.imagedir,
279                fname + '.svg')
280            img_node['uri'] = path.join(
281                translator.builder.imgpath, fname + '.svg')
282            img_node['candidates'] = {
283                '*': path.join(translator.builder.imgpath, fname + '.svg')}
284
285        else:
286            # all other builder formats will include DOT as raw
287            img_node.replace_self(file2literal(src_fname))
288
289    elif in_ext == '.svg':
290
291        if translator.builder.format == 'latex':
292            if not inkscape_cmd and convert_cmd is None:
293                kernellog.warn(app,
294                                  "no SVG to PDF conversion available / include SVG raw."
295                                  "\nIncluding large raw SVGs can cause xelatex error."
296                                  "\nInstall Inkscape (preferred) or ImageMagick.")
297                img_node.replace_self(file2literal(src_fname))
298            else:
299                dst_fname = path.join(translator.builder.outdir, fname + '.pdf')
300                img_node['uri'] = fname + '.pdf'
301                img_node['candidates'] = {'*': fname + '.pdf'}
302
303    if dst_fname:
304        # the builder needs not to copy one more time, so pop it if exists.
305        translator.builder.images.pop(img_node['uri'], None)
306        _name = dst_fname[len(str(translator.builder.outdir)) + 1:]
307
308        if isNewer(dst_fname, src_fname):
309            kernellog.verbose(app,
310                              "convert: {out}/%s already exists and is newer" % _name)
311
312        else:
313            ok = False
314            mkdir(path.dirname(dst_fname))
315
316            if in_ext == '.dot':
317                kernellog.verbose(app, 'convert DOT to: {out}/' + _name)
318                if translator.builder.format == 'latex' and not dot_Tpdf:
319                    svg_fname = path.join(translator.builder.outdir, fname + '.svg')
320                    ok1 = dot2format(app, src_fname, svg_fname)
321                    ok2 = svg2pdf_by_rsvg(app, svg_fname, dst_fname)
322                    ok = ok1 and ok2
323
324                else:
325                    ok = dot2format(app, src_fname, dst_fname)
326
327            elif in_ext == '.svg':
328                kernellog.verbose(app, 'convert SVG to: {out}/' + _name)
329                ok = svg2pdf(app, src_fname, dst_fname)
330
331            if not ok:
332                img_node.replace_self(file2literal(src_fname))
333
334
335def dot2format(app, dot_fname, out_fname):
336    """Converts DOT file to ``out_fname`` using ``dot(1)``.
337
338    * ``dot_fname`` pathname of the input DOT file, including extension ``.dot``
339    * ``out_fname`` pathname of the output file, including format extension
340
341    The *format extension* depends on the ``dot`` command (see ``man dot``
342    option ``-Txxx``). Normally you will use one of the following extensions:
343
344    - ``.ps`` for PostScript,
345    - ``.svg`` or ``svgz`` for Structured Vector Graphics,
346    - ``.fig`` for XFIG graphics and
347    - ``.png`` or ``gif`` for common bitmap graphics.
348
349    """
350    out_format = path.splitext(out_fname)[1][1:]
351    cmd = [dot_cmd, '-T%s' % out_format, dot_fname]
352    exit_code = 42
353
354    with open(out_fname, "w") as out:
355        exit_code = subprocess.call(cmd, stdout = out)
356        if exit_code != 0:
357            kernellog.warn(app,
358                          "Error #%d when calling: %s" % (exit_code, " ".join(cmd)))
359    return bool(exit_code == 0)
360
361def svg2pdf(app, svg_fname, pdf_fname):
362    """Converts SVG to PDF with ``inkscape(1)`` or ``convert(1)`` command.
363
364    Uses ``inkscape(1)`` from Inkscape (https://inkscape.org/) or ``convert(1)``
365    from ImageMagick (https://www.imagemagick.org) for conversion.
366    Returns ``True`` on success and ``False`` if an error occurred.
367
368    * ``svg_fname`` pathname of the input SVG file with extension (``.svg``)
369    * ``pdf_name``  pathname of the output PDF file with extension (``.pdf``)
370
371    """
372    cmd = [convert_cmd, svg_fname, pdf_fname]
373    cmd_name = 'convert(1)'
374
375    if inkscape_cmd:
376        cmd_name = 'inkscape(1)'
377        if inkscape_ver_one:
378            cmd = [inkscape_cmd, '-o', pdf_fname, svg_fname]
379        else:
380            cmd = [inkscape_cmd, '-z', '--export-pdf=%s' % pdf_fname, svg_fname]
381
382    try:
383        warning_msg = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
384        exit_code = 0
385    except subprocess.CalledProcessError as err:
386        warning_msg = err.output
387        exit_code = err.returncode
388        pass
389
390    if exit_code != 0:
391        kernellog.warn(app, "Error #%d when calling: %s" % (exit_code, " ".join(cmd)))
392        if warning_msg:
393            kernellog.warn(app, "Warning msg from %s: %s"
394                           % (cmd_name, str(warning_msg, 'utf-8')))
395    elif warning_msg:
396        kernellog.verbose(app, "Warning msg from %s (likely harmless):\n%s"
397                          % (cmd_name, str(warning_msg, 'utf-8')))
398
399    return bool(exit_code == 0)
400
401def svg2pdf_by_rsvg(app, svg_fname, pdf_fname):
402    """Convert SVG to PDF with ``rsvg-convert(1)`` command.
403
404    * ``svg_fname`` pathname of input SVG file, including extension ``.svg``
405    * ``pdf_fname`` pathname of output PDF file, including extension ``.pdf``
406
407    Input SVG file should be the one generated by ``dot2format()``.
408    SVG -> PDF conversion is done by ``rsvg-convert(1)``.
409
410    If ``rsvg-convert(1)`` is unavailable, fall back to ``svg2pdf()``.
411
412    """
413
414    if rsvg_convert_cmd is None:
415        ok = svg2pdf(app, svg_fname, pdf_fname)
416    else:
417        cmd = [rsvg_convert_cmd, '--format=pdf', '-o', pdf_fname, svg_fname]
418        # use stdout and stderr from parent
419        exit_code = subprocess.call(cmd)
420        if exit_code != 0:
421            kernellog.warn(app, "Error #%d when calling: %s" % (exit_code, " ".join(cmd)))
422        ok = bool(exit_code == 0)
423
424    return ok
425
426
427# image handling
428# ---------------------
429
430def visit_kernel_image(self, node):    # pylint: disable=W0613
431    """Visitor of the ``kernel_image`` Node.
432
433    Handles the ``image`` child-node with the ``convert_image(...)``.
434    """
435    img_node = node[0]
436    convert_image(img_node, self)
437
438class kernel_image(nodes.image):
439    """Node for ``kernel-image`` directive."""
440    pass
441
442class KernelImage(images.Image):
443    u"""KernelImage directive
444
445    Earns everything from ``.. image::`` directive, except *remote URI* and
446    *glob* pattern. The KernelImage wraps a image node into a
447    kernel_image node. See ``visit_kernel_image``.
448    """
449
450    def run(self):
451        uri = self.arguments[0]
452        if uri.endswith('.*') or uri.find('://') != -1:
453            raise self.severe(
454                'Error in "%s: %s": glob pattern and remote images are not allowed'
455                % (self.name, uri))
456        result = images.Image.run(self)
457        if len(result) == 2 or isinstance(result[0], nodes.system_message):
458            return result
459        (image_node,) = result
460        # wrap image node into a kernel_image node / see visitors
461        node = kernel_image('', image_node)
462        return [node]
463
464# figure handling
465# ---------------------
466
467def visit_kernel_figure(self, node):   # pylint: disable=W0613
468    """Visitor of the ``kernel_figure`` Node.
469
470    Handles the ``image`` child-node with the ``convert_image(...)``.
471    """
472    img_node = node[0][0]
473    convert_image(img_node, self)
474
475class kernel_figure(nodes.figure):
476    """Node for ``kernel-figure`` directive."""
477
478class KernelFigure(Figure):
479    u"""KernelImage directive
480
481    Earns everything from ``.. figure::`` directive, except *remote URI* and
482    *glob* pattern.  The KernelFigure wraps a figure node into a kernel_figure
483    node. See ``visit_kernel_figure``.
484    """
485
486    def run(self):
487        uri = self.arguments[0]
488        if uri.endswith('.*') or uri.find('://') != -1:
489            raise self.severe(
490                'Error in "%s: %s":'
491                ' glob pattern and remote images are not allowed'
492                % (self.name, uri))
493        result = Figure.run(self)
494        if len(result) == 2 or isinstance(result[0], nodes.system_message):
495            return result
496        (figure_node,) = result
497        # wrap figure node into a kernel_figure node / see visitors
498        node = kernel_figure('', figure_node)
499        return [node]
500
501
502# render handling
503# ---------------------
504
505def visit_kernel_render(self, node):
506    """Visitor of the ``kernel_render`` Node.
507
508    If rendering tools available, save the markup of the ``literal_block`` child
509    node into a file and replace the ``literal_block`` node with a new created
510    ``image`` node, pointing to the saved markup file. Afterwards, handle the
511    image child-node with the ``convert_image(...)``.
512    """
513    app = self.builder.app
514    srclang = node.get('srclang')
515
516    kernellog.verbose(app, 'visit kernel-render node lang: "%s"' % (srclang))
517
518    tmp_ext = RENDER_MARKUP_EXT.get(srclang, None)
519    if tmp_ext is None:
520        kernellog.warn(app, 'kernel-render: "%s" unknown / include raw.' % (srclang))
521        return
522
523    if not dot_cmd and tmp_ext == '.dot':
524        kernellog.verbose(app, "dot from graphviz not available / include raw.")
525        return
526
527    literal_block = node[0]
528
529    code      = literal_block.astext()
530    hashobj   = code.encode('utf-8') #  str(node.attributes)
531    fname     = path.join('%s-%s' % (srclang, sha1(hashobj).hexdigest()))
532
533    tmp_fname = path.join(
534        self.builder.outdir, self.builder.imagedir, fname + tmp_ext)
535
536    if not path.isfile(tmp_fname):
537        mkdir(path.dirname(tmp_fname))
538        with open(tmp_fname, "w") as out:
539            out.write(code)
540
541    img_node = nodes.image(node.rawsource, **node.attributes)
542    img_node['uri'] = path.join(self.builder.imgpath, fname + tmp_ext)
543    img_node['candidates'] = {
544        '*': path.join(self.builder.imgpath, fname + tmp_ext)}
545
546    literal_block.replace_self(img_node)
547    convert_image(img_node, self, tmp_fname)
548
549
550class kernel_render(nodes.General, nodes.Inline, nodes.Element):
551    """Node for ``kernel-render`` directive."""
552    pass
553
554class KernelRender(Figure):
555    u"""KernelRender directive
556
557    Render content by external tool.  Has all the options known from the
558    *figure*  directive, plus option ``caption``.  If ``caption`` has a
559    value, a figure node with the *caption* is inserted. If not, a image node is
560    inserted.
561
562    The KernelRender directive wraps the text of the directive into a
563    literal_block node and wraps it into a kernel_render node. See
564    ``visit_kernel_render``.
565    """
566    has_content = True
567    required_arguments = 1
568    optional_arguments = 0
569    final_argument_whitespace = False
570
571    # earn options from 'figure'
572    option_spec = Figure.option_spec.copy()
573    option_spec['caption'] = directives.unchanged
574
575    def run(self):
576        return [self.build_node()]
577
578    def build_node(self):
579
580        srclang = self.arguments[0].strip()
581        if srclang not in RENDER_MARKUP_EXT.keys():
582            return [self.state_machine.reporter.warning(
583                'Unknown source language "%s", use one of: %s.' % (
584                    srclang, ",".join(RENDER_MARKUP_EXT.keys())),
585                line=self.lineno)]
586
587        code = '\n'.join(self.content)
588        if not code.strip():
589            return [self.state_machine.reporter.warning(
590                'Ignoring "%s" directive without content.' % (
591                    self.name),
592                line=self.lineno)]
593
594        node = kernel_render()
595        node['alt'] = self.options.get('alt','')
596        node['srclang'] = srclang
597        literal_node = nodes.literal_block(code, code)
598        node += literal_node
599
600        caption = self.options.get('caption')
601        if caption:
602            # parse caption's content
603            parsed = nodes.Element()
604            self.state.nested_parse(
605                ViewList([caption], source=''), self.content_offset, parsed)
606            caption_node = nodes.caption(
607                parsed[0].rawsource, '', *parsed[0].children)
608            caption_node.source = parsed[0].source
609            caption_node.line = parsed[0].line
610
611            figure_node = nodes.figure('', node)
612            for k,v in self.options.items():
613                figure_node[k] = v
614            figure_node += caption_node
615
616            node = figure_node
617
618        return node
619
620def add_kernel_figure_to_std_domain(app, doctree):
621    """Add kernel-figure anchors to 'std' domain.
622
623    The ``StandardDomain.process_doc(..)`` method does not know how to resolve
624    the caption (label) of ``kernel-figure`` directive (it only knows about
625    standard nodes, e.g. table, figure etc.). Without any additional handling
626    this will result in a 'undefined label' for kernel-figures.
627
628    This handle adds labels of kernel-figure to the 'std' domain labels.
629    """
630
631    std = app.env.domains["std"]
632    docname = app.env.docname
633    labels = std.data["labels"]
634
635    for name, explicit in doctree.nametypes.items():
636        if not explicit:
637            continue
638        labelid = doctree.nameids[name]
639        if labelid is None:
640            continue
641        node = doctree.ids[labelid]
642
643        if node.tagname == 'kernel_figure':
644            for n in node.next_node():
645                if n.tagname == 'caption':
646                    sectname = clean_astext(n)
647                    # add label to std domain
648                    labels[name] = docname, labelid, sectname
649                    break
650