1*128836d3SXin LI // SPDX-License-Identifier: 0BSD 2*128836d3SXin LI 3*128836d3SXin LI /////////////////////////////////////////////////////////////////////////////// 4*128836d3SXin LI // 5*128836d3SXin LI /// \file tuklib_mbstr_wrap.h 6*128836d3SXin LI /// \brief Word wrapping for multibyte strings 7*128836d3SXin LI /// 8*128836d3SXin LI /// The word wrapping functions are intended to be usable, for example, 9*128836d3SXin LI /// for printing --help text in command line tools. While manually-wrapped 10*128836d3SXin LI /// --help text allows precise formatting, such freedom requires translators 11*128836d3SXin LI /// to count spaces and determine where line breaks should occur. It's 12*128836d3SXin LI /// tedious and error prone, and experience has shown that only some 13*128836d3SXin LI /// translators do it well. Automatic word wrapping is less flexible but 14*128836d3SXin LI /// results in polished-enough look with less effort from everyone. 15*128836d3SXin LI /// Right-to-left languages and languages that don't use spaces between 16*128836d3SXin LI /// words will still need extra effort though. 17*128836d3SXin LI // 18*128836d3SXin LI // Author: Lasse Collin 19*128836d3SXin LI // 20*128836d3SXin LI /////////////////////////////////////////////////////////////////////////////// 21*128836d3SXin LI 22*128836d3SXin LI #ifndef TUKLIB_MBSTR_WRAP_H 23*128836d3SXin LI #define TUKLIB_MBSTR_WRAP_H 24*128836d3SXin LI 25*128836d3SXin LI #include "tuklib_common.h" 26*128836d3SXin LI #include <stdio.h> 27*128836d3SXin LI 28*128836d3SXin LI TUKLIB_DECLS_BEGIN 29*128836d3SXin LI 30*128836d3SXin LI /// One or more output lines exceeded right_margin. 31*128836d3SXin LI /// This only a warning; everything was still printed successfully. 32*128836d3SXin LI #define TUKLIB_WRAP_WARN_OVERLONG 0x01 33*128836d3SXin LI 34*128836d3SXin LI /// Error writing to to the output FILE. The error flag in the FILE 35*128836d3SXin LI /// should have been set as well. 36*128836d3SXin LI #define TUKLIB_WRAP_ERR_IO 0x02 37*128836d3SXin LI 38*128836d3SXin LI /// Invalid options in struct tuklib_wrap_opt. 39*128836d3SXin LI /// Nothing was printed. 40*128836d3SXin LI #define TUKLIB_WRAP_ERR_OPT 0x04 41*128836d3SXin LI 42*128836d3SXin LI /// Invalid or unsupported multibyte character in the input string: 43*128836d3SXin LI /// either mbrtowc() failed or wcwidth() returned a negative value. 44*128836d3SXin LI #define TUKLIB_WRAP_ERR_STR 0x08 45*128836d3SXin LI 46*128836d3SXin LI /// Only tuklib_wrapf(): Error in converting the format string. 47*128836d3SXin LI /// It's either a memory allocation failure or something bad with the 48*128836d3SXin LI /// format string or arguments. 49*128836d3SXin LI #define TUKLIB_WRAP_ERR_FORMAT 0x10 50*128836d3SXin LI 51*128836d3SXin LI /// Options for tuklib_wraps() and tuklib_wrapf() 52*128836d3SXin LI struct tuklib_wrap_opt { 53*128836d3SXin LI /// Indentation of the first output line after `\n` or `\r`. 54*128836d3SXin LI /// This can be anything less than right_margin. 55*128836d3SXin LI unsigned short left_margin; 56*128836d3SXin LI 57*128836d3SXin LI /// Column where word-wrapped continuation lines start. 58*128836d3SXin LI /// This can be anything less than right_margin. 59*128836d3SXin LI unsigned short left_cont; 60*128836d3SXin LI 61*128836d3SXin LI /// Column where the text after `\v` will start, either on the current 62*128836d3SXin LI /// line (when there is room to add at least one space) or on a new 63*128836d3SXin LI /// empty line. 64*128836d3SXin LI unsigned short left2_margin; 65*128836d3SXin LI 66*128836d3SXin LI /// Like left_cont but for text after a `\v`. However, this must 67*128836d3SXin LI /// be greater than or equal to left2_margin in addition to being 68*128836d3SXin LI /// less than right_margin. 69*128836d3SXin LI unsigned short left2_cont; 70*128836d3SXin LI 71*128836d3SXin LI /// For 80-column terminals, it is recommended to use 79 here for 72*128836d3SXin LI /// maximum portability. 80 will work most of the time but it will 73*128836d3SXin LI /// result in unwanted empty lines in the rare case where a terminal 74*128836d3SXin LI /// moves the cursor to the beginning of the next line immediately 75*128836d3SXin LI /// when the last column has been used. 76*128836d3SXin LI unsigned short right_margin; 77*128836d3SXin LI }; 78*128836d3SXin LI 79*128836d3SXin LI #define tuklib_wraps TUKLIB_SYMBOL(tuklib_wraps) 80*128836d3SXin LI extern int tuklib_wraps(FILE *stream, const struct tuklib_wrap_opt *opt, 81*128836d3SXin LI const char *str); 82*128836d3SXin LI ///< 83*128836d3SXin LI /// \brief Word wrap a multibyte string and write it to a FILE 84*128836d3SXin LI /// 85*128836d3SXin LI /// Word wrapping is done only at spaces and at the special control characters 86*128836d3SXin LI /// described below. Multiple consecutive spaces are handled properly: strings 87*128836d3SXin LI /// that have two (or more) spaces after a full sentence will look good even 88*128836d3SXin LI /// when the spaces occur at a word wrapping boundary. Trailing spaces are 89*128836d3SXin LI /// ignored at the end of a line or at the end of a string. 90*128836d3SXin LI /// 91*128836d3SXin LI /// The following control characters have been repurposed: 92*128836d3SXin LI /// 93*128836d3SXin LI /// - `\t` = Zero-width space allows a line break without producing any 94*128836d3SXin LI /// output by itself. This can be useful after hard hyphens as 95*128836d3SXin LI /// hyphens aren't otherwise used for line breaking. This can also 96*128836d3SXin LI /// be useful in languages that don't use spaces between words. 97*128836d3SXin LI /// (The Unicode character U+200B isn't supported.) 98*128836d3SXin LI /// - `\b` = Text between a pair of `\b` characters is treated as an 99*128836d3SXin LI /// unbreakable block (not wrapped even if there are spaces). 100*128836d3SXin LI /// For example, a non-breaking space can be done like 101*128836d3SXin LI /// in `"123\b \bMiB"`. Control characters (like `\n` or `\t`) 102*128836d3SXin LI /// aren't allowed before the closing `\b`. If closing `\b` is 103*128836d3SXin LI /// missing, the block extends to the end of the string. Empty 104*128836d3SXin LI /// blocks are treated as zero-width characters. If line breaks 105*128836d3SXin LI /// are possible around an empty block (like in `"foo \b\b bar"` 106*128836d3SXin LI /// or `"foo \b"`), it can result in weird output. 107*128836d3SXin LI /// - `\v` = Change to alternative indentation (left2_margin). 108*128836d3SXin LI /// - `\r` = Reset back to the initial indentation and add a newline. 109*128836d3SXin LI /// The next line will be indented by left_margin. 110*128836d3SXin LI /// - `\n` = Add a newline without resetting the effect of `\v`. The 111*128836d3SXin LI /// next line will be indented by left_margin or left2_margin 112*128836d3SXin LI /// (not left_cont or left2_cont). 113*128836d3SXin LI /// 114*128836d3SXin LI /// Only `\n` should appear in translatable strings. `\t` works too but 115*128836d3SXin LI /// even that might confuse some translators even if there is a TRANSLATORS 116*128836d3SXin LI /// comment explaining its meaning. 117*128836d3SXin LI /// 118*128836d3SXin LI /// To use the other control characters in messages, one should use 119*128836d3SXin LI /// tuklib_wrapf() with appropriate printf format string to combine 120*128836d3SXin LI /// translatable strings with non-translatable portions. For example: 121*128836d3SXin LI /// 122*128836d3SXin LI /// \code{.c} 123*128836d3SXin LI /// static const struct tuklib_wrap_opt wrap2 = { 2, 2, 22, 22, 79 }; 124*128836d3SXin LI /// int e = 0; 125*128836d3SXin LI /// ... 126*128836d3SXin LI /// e |= tuklib_wrapf(stdout, &wrap2, 127*128836d3SXin LI /// "-h, --help\v%s\r" 128*128836d3SXin LI /// " --version\v%s", 129*128836d3SXin LI /// W_("display this help and exit"), 130*128836d3SXin LI /// W_("display version information and exit")); 131*128836d3SXin LI /// ... 132*128836d3SXin LI /// if (e != 0) { 133*128836d3SXin LI /// // Handle warning or error. 134*128836d3SXin LI /// ... 135*128836d3SXin LI /// } 136*128836d3SXin LI /// \endcode 137*128836d3SXin LI /// 138*128836d3SXin LI /// Control characters other than `\n` and `\t` are unusable in 139*128836d3SXin LI /// translatable strings: 140*128836d3SXin LI /// 141*128836d3SXin LI /// - Gettext tools show annoying warnings if C escape sequences other 142*128836d3SXin LI /// than `\n` or `\t` are seen. (Otherwise they still work perfectly 143*128836d3SXin LI /// fine though.) 144*128836d3SXin LI /// 145*128836d3SXin LI /// - While at least Poedit and Lokalize support all escapes, some 146*128836d3SXin LI /// editors only support `\n` and `\t`. 147*128836d3SXin LI /// 148*128836d3SXin LI /// - They could confuse some translators, resulting in broken 149*128836d3SXin LI /// translations. 150*128836d3SXin LI /// 151*128836d3SXin LI /// Using non-control characters would solve some issues but it wouldn't 152*128836d3SXin LI /// help with the unfortunate real-world issue that some translators would 153*128836d3SXin LI /// likely have trouble understanding a new syntax. The Gettext manual 154*128836d3SXin LI /// specifically warns about this, see the subheading "No unusual markup" 155*128836d3SXin LI /// in `info (gettext)Preparing Strings`. (While using `\t` for zero-width 156*128836d3SXin LI /// space is such custom markup, most translators will never need it.) 157*128836d3SXin LI /// 158*128836d3SXin LI /// Translators can use the Unicode character U+00A0 (or U+202F) if they 159*128836d3SXin LI /// need a non-breaking space. For example, in French a non-breaking space 160*128836d3SXin LI /// may be needed before colons and question marks (U+00A0 is common in 161*128836d3SXin LI /// real-world French PO files). 162*128836d3SXin LI /// 163*128836d3SXin LI /// Using a non-ASCII char in a string in the C code (like `"123\u00A0MiB"`) 164*128836d3SXin LI /// can work if one tells xgettext that input encoding is UTF-8, one 165*128836d3SXin LI /// ensures that the C compiler uses UTF-8 as the input charset, and one 166*128836d3SXin LI /// is certain that the program is *always* run under an UTF-8 locale. 167*128836d3SXin LI /// Unfortunately a portable program cannot make this kind of assumptions, 168*128836d3SXin LI /// which means that there is no pretty way to have a non-breaking space in 169*128836d3SXin LI /// a translatable string. 170*128836d3SXin LI /// 171*128836d3SXin LI /// Optional: To tell translators which strings are automatically word 172*128836d3SXin LI /// wrapped, see the macro `W_` in tuklib_gettext.h. 173*128836d3SXin LI /// 174*128836d3SXin LI /// \param stream Output FILE stream. For decent performance, it 175*128836d3SXin LI /// should be in buffered mode because this function 176*128836d3SXin LI /// writes the output one byte at a time with fputc(). 177*128836d3SXin LI /// \param opt Word wrapping options. 178*128836d3SXin LI /// \param str Null-terminated multibyte string that is in 179*128836d3SXin LI /// the encoding used by the current locale. 180*128836d3SXin LI /// 181*128836d3SXin LI /// \return Returns 0 on success. If an error or warning occurs, one of 182*128836d3SXin LI /// TUKLIB_WRAP_* codes is returned. Those codes are powers 183*128836d3SXin LI /// of two. When warning/error detection can be delayed, the 184*128836d3SXin LI /// return values can be accumulated from multiple calls using 185*128836d3SXin LI /// bitwise-or into a single variable which can be checked after 186*128836d3SXin LI /// all strings have (hopefully) been printed. 187*128836d3SXin LI 188*128836d3SXin LI #define tuklib_wrapf TUKLIB_SYMBOL(tuklib_wrapf) 189*128836d3SXin LI tuklib_attr_format_printf(3, 4) 190*128836d3SXin LI extern int tuklib_wrapf(FILE *stream, const struct tuklib_wrap_opt *opt, 191*128836d3SXin LI const char *fmt, ...); 192*128836d3SXin LI ///< 193*128836d3SXin LI /// \brief Format and word-wrap a multibyte string and write it to a FILE 194*128836d3SXin LI /// 195*128836d3SXin LI /// This is like tuklib_wraps() except that this takes a printf 196*128836d3SXin LI /// format string. 197*128836d3SXin LI /// 198*128836d3SXin LI /// \note On platforms that lack vasprintf(), the intermediate 199*128836d3SXin LI /// result from vsnprintf() must fit into a 128 KiB buffer. 200*128836d3SXin LI /// TUKLIB_WRAP_ERR_FORMAT is returned if it doesn't but 201*128836d3SXin LI /// only on platforms that lack vasprintf(). 202*128836d3SXin LI 203*128836d3SXin LI TUKLIB_DECLS_END 204*128836d3SXin LI #endif 205