1 // SPDX-License-Identifier: 0BSD
2
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file tuklib_mbstr_wrap.c
6 /// \brief Word wraps a string and prints it to a FILE stream
7 ///
8 /// This depends on tuklib_mbstr_width.c.
9 //
10 // Author: Lasse Collin
11 //
12 ///////////////////////////////////////////////////////////////////////////////
13
14 #include "tuklib_mbstr.h"
15 #include "tuklib_mbstr_wrap.h"
16 #include <stdarg.h>
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <string.h>
20
21
22 extern int
tuklib_wraps(FILE * outfile,const struct tuklib_wrap_opt * opt,const char * str)23 tuklib_wraps(FILE *outfile, const struct tuklib_wrap_opt *opt, const char *str)
24 {
25 // left_cont may be less than left_margin. In that case, if the first
26 // word is extremely long, it will stay on the first line even if
27 // the line then gets overlong.
28 //
29 // On the other hand, left2_cont < left2_margin isn't allowed because
30 // it could result in inconsistent behavior when a very long word
31 // comes right after a \v.
32 //
33 // It is fine to have left2_margin < left_margin although it would be
34 // an odd use case.
35 if (!(opt->left_margin < opt->right_margin
36 && opt->left_cont < opt->right_margin
37 && opt->left2_margin <= opt->left2_cont
38 && opt->left2_cont < opt->right_margin))
39 return TUKLIB_WRAP_ERR_OPT;
40
41 // This is set to TUKLIB_WRAP_WARN_OVERLONG if one or more
42 // output lines extend past opt->right_margin columns.
43 int warn_overlong = 0;
44
45 // Indentation of the first output line after \n or \r.
46 // \v sets this to opt->left2_margin.
47 // \r resets this back to the original value.
48 size_t first_indent = opt->left_margin;
49
50 // Indentation of the output lines that occur due to word wrapping.
51 // \v sets this to opt->left2_cont and \r back to the original value.
52 size_t cont_indent = opt->left_cont;
53
54 // If word wrapping occurs, the newline isn't printed unless more
55 // text would be put on the continuation line. This is also used
56 // when \v needs to start on a new line.
57 bool pending_newline = false;
58
59 // Spaces are printed only when there is something else to put
60 // after the spaces on the line. This avoids unwanted empty lines
61 // in the output and makes it possible to ignore possible spaces
62 // before a \v character.
63 size_t pending_spaces = first_indent;
64
65 // Current output column. When cur_col == pending_spaces, nothing
66 // has been actually printed to the current output line.
67 size_t cur_col = pending_spaces;
68
69 while (true) {
70 // Number of bytes until the *next* line-break opportunity.
71 size_t len = 0;
72
73 // Number of columns until the *next* line-break opportunity.
74 size_t width = 0;
75
76 // Text between a pair of \b characters is treated as
77 // an unbreakable block even if it contains spaces.
78 // It must not contain any control characters before
79 // the closing \b.
80 bool unbreakable = false;
81
82 while (true) {
83 // Find the next character that we handle specially.
84 // In an unbreakable block, search only for the
85 // closing \b; if missing, the unbreakable block
86 // extends to the end of the string.
87 const size_t n = strcspn(str + len,
88 unbreakable ? "\b" : " \t\n\r\v\b");
89
90 // Calculate how many columns the characters need.
91 const size_t w = tuklib_mbstr_width_mem(str + len, n);
92 if (w == (size_t)-1)
93 return TUKLIB_WRAP_ERR_STR;
94
95 width += w;
96 len += n;
97
98 // \b isn't a line-break opportunity so it has to
99 // be handled here. For simplicity, empty blocks
100 // are treated as zero-width characters.
101 if (str[len] == '\b') {
102 ++len;
103 unbreakable = !unbreakable;
104 continue;
105 }
106
107 break;
108 }
109
110 // Determine if adding this chunk of text would make the
111 // current output line exceed opt->right_margin columns.
112 const bool too_long = cur_col + width > opt->right_margin;
113
114 // Wrap the line if needed. However:
115 //
116 // - Don't wrap if the current column is less than where
117 // the continuation line would begin. In that case
118 // the chunk wouldn't fit on the next line either so
119 // we just have to produce an overlong line.
120 //
121 // - Don't wrap if so far the line only contains spaces.
122 // Wrapping in that case would leave a weird empty line.
123 // NOTE: This "only contains spaces" condition is the
124 // reason why left2_margin > left2_cont isn't allowed.
125 if (too_long && cur_col > cont_indent
126 && cur_col > pending_spaces) {
127 // There might be trailing spaces or zero-width spaces
128 // which need to be ignored to keep the output pretty.
129 //
130 // Spaces need to be ignored because in some
131 // writing styles there are two spaces after
132 // a full stop. Example string:
133 //
134 // "Foo bar. Abc def."
135 // ^
136 // If the first space after the first full stop
137 // triggers word wrapping, both spaces must be
138 // ignored. Otherwise the next line would be
139 // indented too much.
140 //
141 // Zero-width spaces are ignored the same way
142 // because they are meaningless if an adjacent
143 // character is a space.
144 while (*str == ' ' || *str == '\t')
145 ++str;
146
147 // Don't print the newline here; only mark it as
148 // pending. This avoids an unwanted empty line if
149 // there is a \n or \r or \0 after the spaces have
150 // been ignored.
151 pending_newline = true;
152 pending_spaces = cont_indent;
153 cur_col = pending_spaces;
154
155 // Since str may have been incremented due to the
156 // ignored spaces, the loop needs to be restarted.
157 continue;
158 }
159
160 // Print the current chunk of text before the next
161 // line-break opportunity. If the chunk was empty,
162 // don't print anything so that the pending newline
163 // and pending spaces aren't printed on their own.
164 if (len > 0) {
165 if (pending_newline) {
166 pending_newline = false;
167 if (putc('\n', outfile) == EOF)
168 return TUKLIB_WRAP_ERR_IO;
169 }
170
171 while (pending_spaces > 0) {
172 if (putc(' ', outfile) == EOF)
173 return TUKLIB_WRAP_ERR_IO;
174
175 --pending_spaces;
176 }
177
178 for (size_t i = 0; i < len; ++i) {
179 // Ignore unbreakable block characters (\b).
180 const int c = (unsigned char)str[i];
181 if (c != '\b' && putc(c, outfile) == EOF)
182 return TUKLIB_WRAP_ERR_IO;
183 }
184
185 str += len;
186 cur_col += width;
187
188 // Remember if the line got overlong. If no other
189 // errors occur, we return warn_overlong. It might
190 // help in catching problematic strings.
191 if (too_long)
192 warn_overlong = TUKLIB_WRAP_WARN_OVERLONG;
193 }
194
195 // Handle the special character after the chunk of text.
196 switch (*str) {
197 case ' ':
198 // Regular space.
199 ++cur_col;
200 ++pending_spaces;
201 break;
202
203 case '\v':
204 // Set the alternative indentation settings.
205 first_indent = opt->left2_margin;
206 cont_indent = opt->left2_cont;
207
208 if (first_indent > cur_col) {
209 // Add one or more spaces to reach
210 // the column specified in first_indent.
211 pending_spaces += first_indent - cur_col;
212 } else {
213 // There is no room to add even one space
214 // before reaching the column first_indent.
215 pending_newline = true;
216 pending_spaces = first_indent;
217 }
218
219 cur_col = first_indent;
220 break;
221
222 case '\0': // Implicit newline at the end of the string.
223 case '\r': // Newline that also resets the effect of \v.
224 case '\n': // Newline without resetting the indentation mode.
225 if (putc('\n', outfile) == EOF)
226 return TUKLIB_WRAP_ERR_IO;
227
228 if (*str == '\0')
229 return warn_overlong;
230
231 if (*str == '\r') {
232 first_indent = opt->left_margin;
233 cont_indent = opt->left_cont;
234 }
235
236 pending_newline = false;
237 pending_spaces = first_indent;
238 cur_col = first_indent;
239 break;
240 }
241
242 // Skip the specially-handled character.
243 ++str;
244 }
245 }
246
247
248 extern int
tuklib_wrapf(FILE * stream,const struct tuklib_wrap_opt * opt,const char * fmt,...)249 tuklib_wrapf(FILE *stream, const struct tuklib_wrap_opt *opt,
250 const char *fmt, ...)
251 {
252 va_list ap;
253 char *buf;
254
255 #ifdef HAVE_VASPRINTF
256 va_start(ap, fmt);
257
258 #ifdef __clang__
259 # pragma GCC diagnostic push
260 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
261 #endif
262 const int n = vasprintf(&buf, fmt, ap);
263 #ifdef __clang__
264 # pragma GCC diagnostic pop
265 #endif
266
267 va_end(ap);
268 if (n == -1)
269 return TUKLIB_WRAP_ERR_FORMAT;
270 #else
271 // Fixed buffer size is dumb but in practice one shouldn't need
272 // huge strings for *formatted* output. This simple method is safe
273 // with pre-C99 vsnprintf() implementations too which don't return
274 // the required buffer size (they return -1 or buf_size - 1) or
275 // which might not null-terminate the buffer in case it's too small.
276 const size_t buf_size = 128 * 1024;
277 buf = malloc(buf_size);
278 if (buf == NULL)
279 return TUKLIB_WRAP_ERR_FORMAT;
280
281 va_start(ap, fmt);
282 const int n = vsnprintf(buf, buf_size, fmt, ap);
283 va_end(ap);
284
285 if (n <= 0 || n >= (int)(buf_size - 1)) {
286 free(buf);
287 return TUKLIB_WRAP_ERR_FORMAT;
288 }
289 #endif
290
291 const int ret = tuklib_wraps(stream, opt, buf);
292 free(buf);
293 return ret;
294 }
295