1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file tuklib_mbstr_wrap.h 6 /// \brief Word wrapping for multibyte strings 7 /// 8 /// The word wrapping functions are intended to be usable, for example, 9 /// for printing --help text in command line tools. While manually-wrapped 10 /// --help text allows precise formatting, such freedom requires translators 11 /// to count spaces and determine where line breaks should occur. It's 12 /// tedious and error prone, and experience has shown that only some 13 /// translators do it well. Automatic word wrapping is less flexible but 14 /// results in polished-enough look with less effort from everyone. 15 /// Right-to-left languages and languages that don't use spaces between 16 /// words will still need extra effort though. 17 // 18 // Author: Lasse Collin 19 // 20 /////////////////////////////////////////////////////////////////////////////// 21 22 #ifndef TUKLIB_MBSTR_WRAP_H 23 #define TUKLIB_MBSTR_WRAP_H 24 25 #include "tuklib_common.h" 26 #include <stdio.h> 27 28 TUKLIB_DECLS_BEGIN 29 30 /// One or more output lines exceeded right_margin. 31 /// This only a warning; everything was still printed successfully. 32 #define TUKLIB_WRAP_WARN_OVERLONG 0x01 33 34 /// Error writing to to the output FILE. The error flag in the FILE 35 /// should have been set as well. 36 #define TUKLIB_WRAP_ERR_IO 0x02 37 38 /// Invalid options in struct tuklib_wrap_opt. 39 /// Nothing was printed. 40 #define TUKLIB_WRAP_ERR_OPT 0x04 41 42 /// Invalid or unsupported multibyte character in the input string: 43 /// either mbrtowc() failed or wcwidth() returned a negative value. 44 #define TUKLIB_WRAP_ERR_STR 0x08 45 46 /// Only tuklib_wrapf(): Error in converting the format string. 47 /// It's either a memory allocation failure or something bad with the 48 /// format string or arguments. 49 #define TUKLIB_WRAP_ERR_FORMAT 0x10 50 51 /// Options for tuklib_wraps() and tuklib_wrapf() 52 struct tuklib_wrap_opt { 53 /// Indentation of the first output line after `\n` or `\r`. 54 /// This can be anything less than right_margin. 55 unsigned short left_margin; 56 57 /// Column where word-wrapped continuation lines start. 58 /// This can be anything less than right_margin. 59 unsigned short left_cont; 60 61 /// Column where the text after `\v` will start, either on the current 62 /// line (when there is room to add at least one space) or on a new 63 /// empty line. 64 unsigned short left2_margin; 65 66 /// Like left_cont but for text after a `\v`. However, this must 67 /// be greater than or equal to left2_margin in addition to being 68 /// less than right_margin. 69 unsigned short left2_cont; 70 71 /// For 80-column terminals, it is recommended to use 79 here for 72 /// maximum portability. 80 will work most of the time but it will 73 /// result in unwanted empty lines in the rare case where a terminal 74 /// moves the cursor to the beginning of the next line immediately 75 /// when the last column has been used. 76 unsigned short right_margin; 77 }; 78 79 #define tuklib_wraps TUKLIB_SYMBOL(tuklib_wraps) 80 extern int tuklib_wraps(FILE *stream, const struct tuklib_wrap_opt *opt, 81 const char *str); 82 ///< 83 /// \brief Word wrap a multibyte string and write it to a FILE 84 /// 85 /// Word wrapping is done only at spaces and at the special control characters 86 /// described below. Multiple consecutive spaces are handled properly: strings 87 /// that have two (or more) spaces after a full sentence will look good even 88 /// when the spaces occur at a word wrapping boundary. Trailing spaces are 89 /// ignored at the end of a line or at the end of a string. 90 /// 91 /// The following control characters have been repurposed: 92 /// 93 /// - `\t` = Zero-width space allows a line break without producing any 94 /// output by itself. This can be useful after hard hyphens as 95 /// hyphens aren't otherwise used for line breaking. This can also 96 /// be useful in languages that don't use spaces between words. 97 /// (The Unicode character U+200B isn't supported.) 98 /// - `\b` = Text between a pair of `\b` characters is treated as an 99 /// unbreakable block (not wrapped even if there are spaces). 100 /// For example, a non-breaking space can be done like 101 /// in `"123\b \bMiB"`. Control characters (like `\n` or `\t`) 102 /// aren't allowed before the closing `\b`. If closing `\b` is 103 /// missing, the block extends to the end of the string. Empty 104 /// blocks are treated as zero-width characters. If line breaks 105 /// are possible around an empty block (like in `"foo \b\b bar"` 106 /// or `"foo \b"`), it can result in weird output. 107 /// - `\v` = Change to alternative indentation (left2_margin). 108 /// - `\r` = Reset back to the initial indentation and add a newline. 109 /// The next line will be indented by left_margin. 110 /// - `\n` = Add a newline without resetting the effect of `\v`. The 111 /// next line will be indented by left_margin or left2_margin 112 /// (not left_cont or left2_cont). 113 /// 114 /// Only `\n` should appear in translatable strings. `\t` works too but 115 /// even that might confuse some translators even if there is a TRANSLATORS 116 /// comment explaining its meaning. 117 /// 118 /// To use the other control characters in messages, one should use 119 /// tuklib_wrapf() with appropriate printf format string to combine 120 /// translatable strings with non-translatable portions. For example: 121 /// 122 /// \code{.c} 123 /// static const struct tuklib_wrap_opt wrap2 = { 2, 2, 22, 22, 79 }; 124 /// int e = 0; 125 /// ... 126 /// e |= tuklib_wrapf(stdout, &wrap2, 127 /// "-h, --help\v%s\r" 128 /// " --version\v%s", 129 /// W_("display this help and exit"), 130 /// W_("display version information and exit")); 131 /// ... 132 /// if (e != 0) { 133 /// // Handle warning or error. 134 /// ... 135 /// } 136 /// \endcode 137 /// 138 /// Control characters other than `\n` and `\t` are unusable in 139 /// translatable strings: 140 /// 141 /// - Gettext tools show annoying warnings if C escape sequences other 142 /// than `\n` or `\t` are seen. (Otherwise they still work perfectly 143 /// fine though.) 144 /// 145 /// - While at least Poedit and Lokalize support all escapes, some 146 /// editors only support `\n` and `\t`. 147 /// 148 /// - They could confuse some translators, resulting in broken 149 /// translations. 150 /// 151 /// Using non-control characters would solve some issues but it wouldn't 152 /// help with the unfortunate real-world issue that some translators would 153 /// likely have trouble understanding a new syntax. The Gettext manual 154 /// specifically warns about this, see the subheading "No unusual markup" 155 /// in `info (gettext)Preparing Strings`. (While using `\t` for zero-width 156 /// space is such custom markup, most translators will never need it.) 157 /// 158 /// Translators can use the Unicode character U+00A0 (or U+202F) if they 159 /// need a non-breaking space. For example, in French a non-breaking space 160 /// may be needed before colons and question marks (U+00A0 is common in 161 /// real-world French PO files). 162 /// 163 /// Using a non-ASCII char in a string in the C code (like `"123\u00A0MiB"`) 164 /// can work if one tells xgettext that input encoding is UTF-8, one 165 /// ensures that the C compiler uses UTF-8 as the input charset, and one 166 /// is certain that the program is *always* run under an UTF-8 locale. 167 /// Unfortunately a portable program cannot make this kind of assumptions, 168 /// which means that there is no pretty way to have a non-breaking space in 169 /// a translatable string. 170 /// 171 /// Optional: To tell translators which strings are automatically word 172 /// wrapped, see the macro `W_` in tuklib_gettext.h. 173 /// 174 /// \param stream Output FILE stream. For decent performance, it 175 /// should be in buffered mode because this function 176 /// writes the output one byte at a time with fputc(). 177 /// \param opt Word wrapping options. 178 /// \param str Null-terminated multibyte string that is in 179 /// the encoding used by the current locale. 180 /// 181 /// \return Returns 0 on success. If an error or warning occurs, one of 182 /// TUKLIB_WRAP_* codes is returned. Those codes are powers 183 /// of two. When warning/error detection can be delayed, the 184 /// return values can be accumulated from multiple calls using 185 /// bitwise-or into a single variable which can be checked after 186 /// all strings have (hopefully) been printed. 187 188 #define tuklib_wrapf TUKLIB_SYMBOL(tuklib_wrapf) 189 tuklib_attr_format_printf(3, 4) 190 extern int tuklib_wrapf(FILE *stream, const struct tuklib_wrap_opt *opt, 191 const char *fmt, ...); 192 ///< 193 /// \brief Format and word-wrap a multibyte string and write it to a FILE 194 /// 195 /// This is like tuklib_wraps() except that this takes a printf 196 /// format string. 197 /// 198 /// \note On platforms that lack vasprintf(), the intermediate 199 /// result from vsnprintf() must fit into a 128 KiB buffer. 200 /// TUKLIB_WRAP_ERR_FORMAT is returned if it doesn't but 201 /// only on platforms that lack vasprintf(). 202 203 TUKLIB_DECLS_END 204 #endif 205