xref: /freebsd/contrib/xz/src/common/tuklib_mbstr_wrap.h (revision 128836d304d93f2d00eb14069c27089ab46c38d4)
1 // SPDX-License-Identifier: 0BSD
2 
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       tuklib_mbstr_wrap.h
6 /// \brief      Word wrapping for multibyte strings
7 ///
8 /// The word wrapping functions are intended to be usable, for example,
9 /// for printing --help text in command line tools. While manually-wrapped
10 /// --help text allows precise formatting, such freedom requires translators
11 /// to count spaces and determine where line breaks should occur. It's
12 /// tedious and error prone, and experience has shown that only some
13 /// translators do it well. Automatic word wrapping is less flexible but
14 /// results in polished-enough look with less effort from everyone.
15 /// Right-to-left languages and languages that don't use spaces between
16 /// words will still need extra effort though.
17 //
18 //  Author:     Lasse Collin
19 //
20 ///////////////////////////////////////////////////////////////////////////////
21 
22 #ifndef TUKLIB_MBSTR_WRAP_H
23 #define TUKLIB_MBSTR_WRAP_H
24 
25 #include "tuklib_common.h"
26 #include <stdio.h>
27 
28 TUKLIB_DECLS_BEGIN
29 
30 /// One or more output lines exceeded right_margin.
31 /// This only a warning; everything was still printed successfully.
32 #define TUKLIB_WRAP_WARN_OVERLONG   0x01
33 
34 /// Error writing to to the output FILE. The error flag in the FILE
35 /// should have been set as well.
36 #define TUKLIB_WRAP_ERR_IO          0x02
37 
38 /// Invalid options in struct tuklib_wrap_opt.
39 /// Nothing was printed.
40 #define TUKLIB_WRAP_ERR_OPT         0x04
41 
42 /// Invalid or unsupported multibyte character in the input string:
43 /// either mbrtowc() failed or wcwidth() returned a negative value.
44 #define TUKLIB_WRAP_ERR_STR         0x08
45 
46 /// Only tuklib_wrapf(): Error in converting the format string.
47 /// It's either a memory allocation failure or something bad with the
48 /// format string or arguments.
49 #define TUKLIB_WRAP_ERR_FORMAT      0x10
50 
51 /// Options for tuklib_wraps() and tuklib_wrapf()
52 struct tuklib_wrap_opt {
53 	/// Indentation of the first output line after `\n` or `\r`.
54 	/// This can be anything less than right_margin.
55 	unsigned short left_margin;
56 
57 	/// Column where word-wrapped continuation lines start.
58 	/// This can be anything less than right_margin.
59 	unsigned short left_cont;
60 
61 	/// Column where the text after `\v` will start, either on the current
62 	/// line (when there is room to add at least one space) or on a new
63 	/// empty line.
64 	unsigned short left2_margin;
65 
66 	/// Like left_cont but for text after a `\v`. However, this must
67 	/// be greater than or equal to left2_margin in addition to being
68 	/// less than right_margin.
69 	unsigned short left2_cont;
70 
71 	/// For 80-column terminals, it is recommended to use 79 here for
72 	/// maximum portability. 80 will work most of the time but it will
73 	/// result in unwanted empty lines in the rare case where a terminal
74 	/// moves the cursor to the beginning of the next line immediately
75 	/// when the last column has been used.
76 	unsigned short right_margin;
77 };
78 
79 #define tuklib_wraps TUKLIB_SYMBOL(tuklib_wraps)
80 extern int tuklib_wraps(FILE *stream, const struct tuklib_wrap_opt *opt,
81 		const char *str);
82 ///<
83 /// \brief      Word wrap a multibyte string and write it to a FILE
84 ///
85 /// Word wrapping is done only at spaces and at the special control characters
86 /// described below. Multiple consecutive spaces are handled properly: strings
87 /// that have two (or more) spaces after a full sentence will look good even
88 /// when the spaces occur at a word wrapping boundary. Trailing spaces are
89 /// ignored at the end of a line or at the end of a string.
90 ///
91 /// The following control characters have been repurposed:
92 ///
93 ///   - `\t` = Zero-width space allows a line break without producing any
94 ///            output by itself. This can be useful after hard hyphens as
95 ///            hyphens aren't otherwise used for line breaking. This can also
96 ///            be useful in languages that don't use spaces between words.
97 ///            (The Unicode character U+200B isn't supported.)
98 ///   - `\b` = Text between a pair of `\b` characters is treated as an
99 ///            unbreakable block (not wrapped even if there are spaces).
100 ///            For example, a non-breaking space can be done like
101 ///            in `"123\b \bMiB"`. Control characters (like `\n` or `\t`)
102 ///            aren't allowed before the closing `\b`. If closing `\b` is
103 ///            missing, the block extends to the end of the string. Empty
104 ///            blocks are treated as zero-width characters. If line breaks
105 ///            are possible around an empty block (like in `"foo \b\b bar"`
106 ///            or `"foo \b"`), it can result in weird output.
107 ///   - `\v` = Change to alternative indentation (left2_margin).
108 ///   - `\r` = Reset back to the initial indentation and add a newline.
109 ///            The next line will be indented by left_margin.
110 ///   - `\n` = Add a newline without resetting the effect of `\v`. The
111 ///            next line will be indented by left_margin or left2_margin
112 ///            (not left_cont or left2_cont).
113 ///
114 /// Only `\n` should appear in translatable strings. `\t` works too but
115 /// even that might confuse some translators even if there is a TRANSLATORS
116 /// comment explaining its meaning.
117 ///
118 /// To use the other control characters in messages, one should use
119 /// tuklib_wrapf() with appropriate printf format string to combine
120 /// translatable strings with non-translatable portions. For example:
121 ///
122 /// \code{.c}
123 /// static const struct tuklib_wrap_opt wrap2 = { 2,  2, 22, 22, 79 };
124 /// int e = 0;
125 /// ...
126 /// e |= tuklib_wrapf(stdout, &wrap2,
127 ///                   "-h, --help\v%s\r"
128 ///                   "    --version\v%s",
129 ///                   W_("display this help and exit"),
130 ///                   W_("display version information and exit"));
131 /// ...
132 /// if (e != 0) {
133 ///     // Handle warning or error.
134 ///     ...
135 /// }
136 /// \endcode
137 ///
138 /// Control characters other than `\n` and `\t` are unusable in
139 /// translatable strings:
140 ///
141 ///   - Gettext tools show annoying warnings if C escape sequences other
142 ///     than `\n` or `\t` are seen. (Otherwise they still work perfectly
143 ///     fine though.)
144 ///
145 ///   - While at least Poedit and Lokalize support all escapes, some
146 ///     editors only support `\n` and `\t`.
147 ///
148 ///   - They could confuse some translators, resulting in broken
149 ///     translations.
150 ///
151 /// Using non-control characters would solve some issues but it wouldn't
152 /// help with the unfortunate real-world issue that some translators would
153 /// likely have trouble understanding a new syntax. The Gettext manual
154 /// specifically warns about this, see the subheading "No unusual markup"
155 /// in `info (gettext)Preparing Strings`. (While using `\t` for zero-width
156 /// space is such custom markup, most translators will never need it.)
157 ///
158 /// Translators can use the Unicode character U+00A0 (or U+202F) if they
159 /// need a non-breaking space. For example, in French a non-breaking space
160 /// may be needed before colons and question marks (U+00A0 is common in
161 /// real-world French PO files).
162 ///
163 /// Using a non-ASCII char in a string in the C code (like `"123\u00A0MiB"`)
164 /// can work if one tells xgettext that input encoding is UTF-8, one
165 /// ensures that the C compiler uses UTF-8 as the input charset, and one
166 /// is certain that the program is *always* run under an UTF-8 locale.
167 /// Unfortunately a portable program cannot make this kind of assumptions,
168 /// which means that there is no pretty way to have a non-breaking space in
169 /// a translatable string.
170 ///
171 /// Optional: To tell translators which strings are automatically word
172 /// wrapped, see the macro `W_` in tuklib_gettext.h.
173 ///
174 /// \param      stream      Output FILE stream. For decent performance, it
175 ///                         should be in buffered mode because this function
176 ///                         writes the output one byte at a time with fputc().
177 /// \param      opt         Word wrapping options.
178 /// \param      str         Null-terminated multibyte string that is in
179 ///                         the encoding used by the current locale.
180 ///
181 /// \return     Returns 0 on success. If an error or warning occurs, one of
182 ///             TUKLIB_WRAP_* codes is returned. Those codes are powers
183 ///             of two. When warning/error detection can be delayed, the
184 ///             return values can be accumulated from multiple calls using
185 ///             bitwise-or into a single variable which can be checked after
186 ///             all strings have (hopefully) been printed.
187 
188 #define tuklib_wrapf TUKLIB_SYMBOL(tuklib_wrapf)
189 tuklib_attr_format_printf(3, 4)
190 extern int tuklib_wrapf(FILE *stream, const struct tuklib_wrap_opt *opt,
191 		const char *fmt, ...);
192 ///<
193 /// \brief      Format and word-wrap a multibyte string and write it to a FILE
194 ///
195 /// This is like tuklib_wraps() except that this takes a printf
196 /// format string.
197 ///
198 /// \note       On platforms that lack vasprintf(), the intermediate
199 ///             result from vsnprintf() must fit into a 128 KiB buffer.
200 ///             TUKLIB_WRAP_ERR_FORMAT is returned if it doesn't but
201 ///             only on platforms that lack vasprintf().
202 
203 TUKLIB_DECLS_END
204 #endif
205