src/common/tuklib_mbstr_wrap.h

*128836d3SXin LI// SPDX-License-Identifier: 0BSD
*128836d3SXin LI
*128836d3SXin LI///////////////////////////////////////////////////////////////////////////////
*128836d3SXin LI//
*128836d3SXin LI/// \file       tuklib_mbstr_wrap.h
*128836d3SXin LI/// \brief      Word wrapping for multibyte strings
*128836d3SXin LI///
*128836d3SXin LI/// The word wrapping functions are intended to be usable, for example,
*128836d3SXin LI/// for printing --help text in command line tools. While manually-wrapped
*128836d3SXin LI/// --help text allows precise formatting, such freedom requires translators
*128836d3SXin LI/// to count spaces and determine where line breaks should occur. It's
*128836d3SXin LI/// tedious and error prone, and experience has shown that only some
*128836d3SXin LI/// translators do it well. Automatic word wrapping is less flexible but
*128836d3SXin LI/// results in polished-enough look with less effort from everyone.
*128836d3SXin LI/// Right-to-left languages and languages that don't use spaces between
*128836d3SXin LI/// words will still need extra effort though.
*128836d3SXin LI//
*128836d3SXin LI//  Author:     Lasse Collin
*128836d3SXin LI//
*128836d3SXin LI///////////////////////////////////////////////////////////////////////////////
*128836d3SXin LI
*128836d3SXin LI#ifndef TUKLIB_MBSTR_WRAP_H
*128836d3SXin LI#define TUKLIB_MBSTR_WRAP_H
*128836d3SXin LI
*128836d3SXin LI#include "tuklib_common.h"
*128836d3SXin LI#include <stdio.h>
*128836d3SXin LI
*128836d3SXin LITUKLIB_DECLS_BEGIN
*128836d3SXin LI
*128836d3SXin LI/// One or more output lines exceeded right_margin.
*128836d3SXin LI/// This only a warning; everything was still printed successfully.
*128836d3SXin LI#define TUKLIB_WRAP_WARN_OVERLONG   0x01
*128836d3SXin LI
*128836d3SXin LI/// Error writing to to the output FILE. The error flag in the FILE
*128836d3SXin LI/// should have been set as well.
*128836d3SXin LI#define TUKLIB_WRAP_ERR_IO          0x02
*128836d3SXin LI
*128836d3SXin LI/// Invalid options in struct tuklib_wrap_opt.
*128836d3SXin LI/// Nothing was printed.
*128836d3SXin LI#define TUKLIB_WRAP_ERR_OPT         0x04
*128836d3SXin LI
*128836d3SXin LI/// Invalid or unsupported multibyte character in the input string:
*128836d3SXin LI/// either mbrtowc() failed or wcwidth() returned a negative value.
*128836d3SXin LI#define TUKLIB_WRAP_ERR_STR         0x08
*128836d3SXin LI
*128836d3SXin LI/// Only tuklib_wrapf(): Error in converting the format string.
*128836d3SXin LI/// It's either a memory allocation failure or something bad with the
*128836d3SXin LI/// format string or arguments.
*128836d3SXin LI#define TUKLIB_WRAP_ERR_FORMAT      0x10
*128836d3SXin LI
*128836d3SXin LI/// Options for tuklib_wraps() and tuklib_wrapf()
*128836d3SXin LIstruct tuklib_wrap_opt {
*128836d3SXin LI	/// Indentation of the first output line after `\n` or `\r`.
*128836d3SXin LI	/// This can be anything less than right_margin.
*128836d3SXin LI	unsigned short left_margin;
*128836d3SXin LI
*128836d3SXin LI	/// Column where word-wrapped continuation lines start.
*128836d3SXin LI	/// This can be anything less than right_margin.
*128836d3SXin LI	unsigned short left_cont;
*128836d3SXin LI
*128836d3SXin LI	/// Column where the text after `\v` will start, either on the current
*128836d3SXin LI	/// line (when there is room to add at least one space) or on a new
*128836d3SXin LI	/// empty line.
*128836d3SXin LI	unsigned short left2_margin;
*128836d3SXin LI
*128836d3SXin LI	/// Like left_cont but for text after a `\v`. However, this must
*128836d3SXin LI	/// be greater than or equal to left2_margin in addition to being
*128836d3SXin LI	/// less than right_margin.
*128836d3SXin LI	unsigned short left2_cont;
*128836d3SXin LI
*128836d3SXin LI	/// For 80-column terminals, it is recommended to use 79 here for
*128836d3SXin LI	/// maximum portability. 80 will work most of the time but it will
*128836d3SXin LI	/// result in unwanted empty lines in the rare case where a terminal
*128836d3SXin LI	/// moves the cursor to the beginning of the next line immediately
*128836d3SXin LI	/// when the last column has been used.
*128836d3SXin LI	unsigned short right_margin;
*128836d3SXin LI};
*128836d3SXin LI
*128836d3SXin LI#define tuklib_wraps TUKLIB_SYMBOL(tuklib_wraps)
*128836d3SXin LIextern int tuklib_wraps(FILE *stream, const struct tuklib_wrap_opt *opt,
*128836d3SXin LI		const char *str);
*128836d3SXin LI///<
*128836d3SXin LI/// \brief      Word wrap a multibyte string and write it to a FILE
*128836d3SXin LI///
*128836d3SXin LI/// Word wrapping is done only at spaces and at the special control characters
*128836d3SXin LI/// described below. Multiple consecutive spaces are handled properly: strings
*128836d3SXin LI/// that have two (or more) spaces after a full sentence will look good even
*128836d3SXin LI/// when the spaces occur at a word wrapping boundary. Trailing spaces are
*128836d3SXin LI/// ignored at the end of a line or at the end of a string.
*128836d3SXin LI///
*128836d3SXin LI/// The following control characters have been repurposed:
*128836d3SXin LI///
*128836d3SXin LI///   - `\t` = Zero-width space allows a line break without producing any
*128836d3SXin LI///            output by itself. This can be useful after hard hyphens as
*128836d3SXin LI///            hyphens aren't otherwise used for line breaking. This can also
*128836d3SXin LI///            be useful in languages that don't use spaces between words.
*128836d3SXin LI///            (The Unicode character U+200B isn't supported.)
*128836d3SXin LI///   - `\b` = Text between a pair of `\b` characters is treated as an
*128836d3SXin LI///            unbreakable block (not wrapped even if there are spaces).
*128836d3SXin LI///            For example, a non-breaking space can be done like
*128836d3SXin LI///            in `"123\b \bMiB"`. Control characters (like `\n` or `\t`)
*128836d3SXin LI///            aren't allowed before the closing `\b`. If closing `\b` is
*128836d3SXin LI///            missing, the block extends to the end of the string. Empty
*128836d3SXin LI///            blocks are treated as zero-width characters. If line breaks
*128836d3SXin LI///            are possible around an empty block (like in `"foo \b\b bar"`
*128836d3SXin LI///            or `"foo \b"`), it can result in weird output.
*128836d3SXin LI///   - `\v` = Change to alternative indentation (left2_margin).
*128836d3SXin LI///   - `\r` = Reset back to the initial indentation and add a newline.
*128836d3SXin LI///            The next line will be indented by left_margin.
*128836d3SXin LI///   - `\n` = Add a newline without resetting the effect of `\v`. The
*128836d3SXin LI///            next line will be indented by left_margin or left2_margin
*128836d3SXin LI///            (not left_cont or left2_cont).
*128836d3SXin LI///
*128836d3SXin LI/// Only `\n` should appear in translatable strings. `\t` works too but
*128836d3SXin LI/// even that might confuse some translators even if there is a TRANSLATORS
*128836d3SXin LI/// comment explaining its meaning.
*128836d3SXin LI///
*128836d3SXin LI/// To use the other control characters in messages, one should use
*128836d3SXin LI/// tuklib_wrapf() with appropriate printf format string to combine
*128836d3SXin LI/// translatable strings with non-translatable portions. For example:
*128836d3SXin LI///
*128836d3SXin LI/// \code{.c}
*128836d3SXin LI/// static const struct tuklib_wrap_opt wrap2 = { 2,  2, 22, 22, 79 };
*128836d3SXin LI/// int e = 0;
*128836d3SXin LI/// ...
*128836d3SXin LI/// e |= tuklib_wrapf(stdout, &wrap2,
*128836d3SXin LI///                   "-h, --help\v%s\r"
*128836d3SXin LI///                   "    --version\v%s",
*128836d3SXin LI///                   W_("display this help and exit"),
*128836d3SXin LI///                   W_("display version information and exit"));
*128836d3SXin LI/// ...
*128836d3SXin LI/// if (e != 0) {
*128836d3SXin LI///     // Handle warning or error.
*128836d3SXin LI///     ...
*128836d3SXin LI/// }
*128836d3SXin LI/// \endcode
*128836d3SXin LI///
*128836d3SXin LI/// Control characters other than `\n` and `\t` are unusable in
*128836d3SXin LI/// translatable strings:
*128836d3SXin LI///
*128836d3SXin LI///   - Gettext tools show annoying warnings if C escape sequences other
*128836d3SXin LI///     than `\n` or `\t` are seen. (Otherwise they still work perfectly
*128836d3SXin LI///     fine though.)
*128836d3SXin LI///
*128836d3SXin LI///   - While at least Poedit and Lokalize support all escapes, some
*128836d3SXin LI///     editors only support `\n` and `\t`.
*128836d3SXin LI///
*128836d3SXin LI///   - They could confuse some translators, resulting in broken
*128836d3SXin LI///     translations.
*128836d3SXin LI///
*128836d3SXin LI/// Using non-control characters would solve some issues but it wouldn't
*128836d3SXin LI/// help with the unfortunate real-world issue that some translators would
*128836d3SXin LI/// likely have trouble understanding a new syntax. The Gettext manual
*128836d3SXin LI/// specifically warns about this, see the subheading "No unusual markup"
*128836d3SXin LI/// in `info (gettext)Preparing Strings`. (While using `\t` for zero-width
*128836d3SXin LI/// space is such custom markup, most translators will never need it.)
*128836d3SXin LI///
*128836d3SXin LI/// Translators can use the Unicode character U+00A0 (or U+202F) if they
*128836d3SXin LI/// need a non-breaking space. For example, in French a non-breaking space
*128836d3SXin LI/// may be needed before colons and question marks (U+00A0 is common in
*128836d3SXin LI/// real-world French PO files).
*128836d3SXin LI///
*128836d3SXin LI/// Using a non-ASCII char in a string in the C code (like `"123\u00A0MiB"`)
*128836d3SXin LI/// can work if one tells xgettext that input encoding is UTF-8, one
*128836d3SXin LI/// ensures that the C compiler uses UTF-8 as the input charset, and one
*128836d3SXin LI/// is certain that the program is *always* run under an UTF-8 locale.
*128836d3SXin LI/// Unfortunately a portable program cannot make this kind of assumptions,
*128836d3SXin LI/// which means that there is no pretty way to have a non-breaking space in
*128836d3SXin LI/// a translatable string.
*128836d3SXin LI///
*128836d3SXin LI/// Optional: To tell translators which strings are automatically word
*128836d3SXin LI/// wrapped, see the macro `W_` in tuklib_gettext.h.
*128836d3SXin LI///
*128836d3SXin LI/// \param      stream      Output FILE stream. For decent performance, it
*128836d3SXin LI///                         should be in buffered mode because this function
*128836d3SXin LI///                         writes the output one byte at a time with fputc().
*128836d3SXin LI/// \param      opt         Word wrapping options.
*128836d3SXin LI/// \param      str         Null-terminated multibyte string that is in
*128836d3SXin LI///                         the encoding used by the current locale.
*128836d3SXin LI///
*128836d3SXin LI/// \return     Returns 0 on success. If an error or warning occurs, one of
*128836d3SXin LI///             TUKLIB_WRAP_* codes is returned. Those codes are powers
*128836d3SXin LI///             of two. When warning/error detection can be delayed, the
*128836d3SXin LI///             return values can be accumulated from multiple calls using
*128836d3SXin LI///             bitwise-or into a single variable which can be checked after
*128836d3SXin LI///             all strings have (hopefully) been printed.
*128836d3SXin LI
*128836d3SXin LI#define tuklib_wrapf TUKLIB_SYMBOL(tuklib_wrapf)
*128836d3SXin LItuklib_attr_format_printf(3, 4)
*128836d3SXin LIextern int tuklib_wrapf(FILE *stream, const struct tuklib_wrap_opt *opt,
*128836d3SXin LI		const char *fmt, ...);
*128836d3SXin LI///<
*128836d3SXin LI/// \brief      Format and word-wrap a multibyte string and write it to a FILE
*128836d3SXin LI///
*128836d3SXin LI/// This is like tuklib_wraps() except that this takes a printf
*128836d3SXin LI/// format string.
*128836d3SXin LI///
*128836d3SXin LI/// \note       On platforms that lack vasprintf(), the intermediate
*128836d3SXin LI///             result from vsnprintf() must fit into a 128 KiB buffer.
*128836d3SXin LI///             TUKLIB_WRAP_ERR_FORMAT is returned if it doesn't but
*128836d3SXin LI///             only on platforms that lack vasprintf().
*128836d3SXin LI
*128836d3SXin LITUKLIB_DECLS_END
*128836d3SXin LI#endif