1*128836d3SXin LI // SPDX-License-Identifier: 0BSD
2*128836d3SXin LI
3*128836d3SXin LI ///////////////////////////////////////////////////////////////////////////////
4*128836d3SXin LI //
5*128836d3SXin LI /// \file tuklib_mbstr_wrap.c
6*128836d3SXin LI /// \brief Word wraps a string and prints it to a FILE stream
7*128836d3SXin LI ///
8*128836d3SXin LI /// This depends on tuklib_mbstr_width.c.
9*128836d3SXin LI //
10*128836d3SXin LI // Author: Lasse Collin
11*128836d3SXin LI //
12*128836d3SXin LI ///////////////////////////////////////////////////////////////////////////////
13*128836d3SXin LI
14*128836d3SXin LI #include "tuklib_mbstr.h"
15*128836d3SXin LI #include "tuklib_mbstr_wrap.h"
16*128836d3SXin LI #include <stdarg.h>
17*128836d3SXin LI #include <stdlib.h>
18*128836d3SXin LI #include <stdio.h>
19*128836d3SXin LI #include <string.h>
20*128836d3SXin LI
21*128836d3SXin LI
22*128836d3SXin LI extern int
tuklib_wraps(FILE * outfile,const struct tuklib_wrap_opt * opt,const char * str)23*128836d3SXin LI tuklib_wraps(FILE *outfile, const struct tuklib_wrap_opt *opt, const char *str)
24*128836d3SXin LI {
25*128836d3SXin LI // left_cont may be less than left_margin. In that case, if the first
26*128836d3SXin LI // word is extremely long, it will stay on the first line even if
27*128836d3SXin LI // the line then gets overlong.
28*128836d3SXin LI //
29*128836d3SXin LI // On the other hand, left2_cont < left2_margin isn't allowed because
30*128836d3SXin LI // it could result in inconsistent behavior when a very long word
31*128836d3SXin LI // comes right after a \v.
32*128836d3SXin LI //
33*128836d3SXin LI // It is fine to have left2_margin < left_margin although it would be
34*128836d3SXin LI // an odd use case.
35*128836d3SXin LI if (!(opt->left_margin < opt->right_margin
36*128836d3SXin LI && opt->left_cont < opt->right_margin
37*128836d3SXin LI && opt->left2_margin <= opt->left2_cont
38*128836d3SXin LI && opt->left2_cont < opt->right_margin))
39*128836d3SXin LI return TUKLIB_WRAP_ERR_OPT;
40*128836d3SXin LI
41*128836d3SXin LI // This is set to TUKLIB_WRAP_WARN_OVERLONG if one or more
42*128836d3SXin LI // output lines extend past opt->right_margin columns.
43*128836d3SXin LI int warn_overlong = 0;
44*128836d3SXin LI
45*128836d3SXin LI // Indentation of the first output line after \n or \r.
46*128836d3SXin LI // \v sets this to opt->left2_margin.
47*128836d3SXin LI // \r resets this back to the original value.
48*128836d3SXin LI size_t first_indent = opt->left_margin;
49*128836d3SXin LI
50*128836d3SXin LI // Indentation of the output lines that occur due to word wrapping.
51*128836d3SXin LI // \v sets this to opt->left2_cont and \r back to the original value.
52*128836d3SXin LI size_t cont_indent = opt->left_cont;
53*128836d3SXin LI
54*128836d3SXin LI // If word wrapping occurs, the newline isn't printed unless more
55*128836d3SXin LI // text would be put on the continuation line. This is also used
56*128836d3SXin LI // when \v needs to start on a new line.
57*128836d3SXin LI bool pending_newline = false;
58*128836d3SXin LI
59*128836d3SXin LI // Spaces are printed only when there is something else to put
60*128836d3SXin LI // after the spaces on the line. This avoids unwanted empty lines
61*128836d3SXin LI // in the output and makes it possible to ignore possible spaces
62*128836d3SXin LI // before a \v character.
63*128836d3SXin LI size_t pending_spaces = first_indent;
64*128836d3SXin LI
65*128836d3SXin LI // Current output column. When cur_col == pending_spaces, nothing
66*128836d3SXin LI // has been actually printed to the current output line.
67*128836d3SXin LI size_t cur_col = pending_spaces;
68*128836d3SXin LI
69*128836d3SXin LI while (true) {
70*128836d3SXin LI // Number of bytes until the *next* line-break opportunity.
71*128836d3SXin LI size_t len = 0;
72*128836d3SXin LI
73*128836d3SXin LI // Number of columns until the *next* line-break opportunity.
74*128836d3SXin LI size_t width = 0;
75*128836d3SXin LI
76*128836d3SXin LI // Text between a pair of \b characters is treated as
77*128836d3SXin LI // an unbreakable block even if it contains spaces.
78*128836d3SXin LI // It must not contain any control characters before
79*128836d3SXin LI // the closing \b.
80*128836d3SXin LI bool unbreakable = false;
81*128836d3SXin LI
82*128836d3SXin LI while (true) {
83*128836d3SXin LI // Find the next character that we handle specially.
84*128836d3SXin LI // In an unbreakable block, search only for the
85*128836d3SXin LI // closing \b; if missing, the unbreakable block
86*128836d3SXin LI // extends to the end of the string.
87*128836d3SXin LI const size_t n = strcspn(str + len,
88*128836d3SXin LI unbreakable ? "\b" : " \t\n\r\v\b");
89*128836d3SXin LI
90*128836d3SXin LI // Calculate how many columns the characters need.
91*128836d3SXin LI const size_t w = tuklib_mbstr_width_mem(str + len, n);
92*128836d3SXin LI if (w == (size_t)-1)
93*128836d3SXin LI return TUKLIB_WRAP_ERR_STR;
94*128836d3SXin LI
95*128836d3SXin LI width += w;
96*128836d3SXin LI len += n;
97*128836d3SXin LI
98*128836d3SXin LI // \b isn't a line-break opportunity so it has to
99*128836d3SXin LI // be handled here. For simplicity, empty blocks
100*128836d3SXin LI // are treated as zero-width characters.
101*128836d3SXin LI if (str[len] == '\b') {
102*128836d3SXin LI ++len;
103*128836d3SXin LI unbreakable = !unbreakable;
104*128836d3SXin LI continue;
105*128836d3SXin LI }
106*128836d3SXin LI
107*128836d3SXin LI break;
108*128836d3SXin LI }
109*128836d3SXin LI
110*128836d3SXin LI // Determine if adding this chunk of text would make the
111*128836d3SXin LI // current output line exceed opt->right_margin columns.
112*128836d3SXin LI const bool too_long = cur_col + width > opt->right_margin;
113*128836d3SXin LI
114*128836d3SXin LI // Wrap the line if needed. However:
115*128836d3SXin LI //
116*128836d3SXin LI // - Don't wrap if the current column is less than where
117*128836d3SXin LI // the continuation line would begin. In that case
118*128836d3SXin LI // the chunk wouldn't fit on the next line either so
119*128836d3SXin LI // we just have to produce an overlong line.
120*128836d3SXin LI //
121*128836d3SXin LI // - Don't wrap if so far the line only contains spaces.
122*128836d3SXin LI // Wrapping in that case would leave a weird empty line.
123*128836d3SXin LI // NOTE: This "only contains spaces" condition is the
124*128836d3SXin LI // reason why left2_margin > left2_cont isn't allowed.
125*128836d3SXin LI if (too_long && cur_col > cont_indent
126*128836d3SXin LI && cur_col > pending_spaces) {
127*128836d3SXin LI // There might be trailing spaces or zero-width spaces
128*128836d3SXin LI // which need to be ignored to keep the output pretty.
129*128836d3SXin LI //
130*128836d3SXin LI // Spaces need to be ignored because in some
131*128836d3SXin LI // writing styles there are two spaces after
132*128836d3SXin LI // a full stop. Example string:
133*128836d3SXin LI //
134*128836d3SXin LI // "Foo bar. Abc def."
135*128836d3SXin LI // ^
136*128836d3SXin LI // If the first space after the first full stop
137*128836d3SXin LI // triggers word wrapping, both spaces must be
138*128836d3SXin LI // ignored. Otherwise the next line would be
139*128836d3SXin LI // indented too much.
140*128836d3SXin LI //
141*128836d3SXin LI // Zero-width spaces are ignored the same way
142*128836d3SXin LI // because they are meaningless if an adjacent
143*128836d3SXin LI // character is a space.
144*128836d3SXin LI while (*str == ' ' || *str == '\t')
145*128836d3SXin LI ++str;
146*128836d3SXin LI
147*128836d3SXin LI // Don't print the newline here; only mark it as
148*128836d3SXin LI // pending. This avoids an unwanted empty line if
149*128836d3SXin LI // there is a \n or \r or \0 after the spaces have
150*128836d3SXin LI // been ignored.
151*128836d3SXin LI pending_newline = true;
152*128836d3SXin LI pending_spaces = cont_indent;
153*128836d3SXin LI cur_col = pending_spaces;
154*128836d3SXin LI
155*128836d3SXin LI // Since str may have been incremented due to the
156*128836d3SXin LI // ignored spaces, the loop needs to be restarted.
157*128836d3SXin LI continue;
158*128836d3SXin LI }
159*128836d3SXin LI
160*128836d3SXin LI // Print the current chunk of text before the next
161*128836d3SXin LI // line-break opportunity. If the chunk was empty,
162*128836d3SXin LI // don't print anything so that the pending newline
163*128836d3SXin LI // and pending spaces aren't printed on their own.
164*128836d3SXin LI if (len > 0) {
165*128836d3SXin LI if (pending_newline) {
166*128836d3SXin LI pending_newline = false;
167*128836d3SXin LI if (putc('\n', outfile) == EOF)
168*128836d3SXin LI return TUKLIB_WRAP_ERR_IO;
169*128836d3SXin LI }
170*128836d3SXin LI
171*128836d3SXin LI while (pending_spaces > 0) {
172*128836d3SXin LI if (putc(' ', outfile) == EOF)
173*128836d3SXin LI return TUKLIB_WRAP_ERR_IO;
174*128836d3SXin LI
175*128836d3SXin LI --pending_spaces;
176*128836d3SXin LI }
177*128836d3SXin LI
178*128836d3SXin LI for (size_t i = 0; i < len; ++i) {
179*128836d3SXin LI // Ignore unbreakable block characters (\b).
180*128836d3SXin LI const int c = (unsigned char)str[i];
181*128836d3SXin LI if (c != '\b' && putc(c, outfile) == EOF)
182*128836d3SXin LI return TUKLIB_WRAP_ERR_IO;
183*128836d3SXin LI }
184*128836d3SXin LI
185*128836d3SXin LI str += len;
186*128836d3SXin LI cur_col += width;
187*128836d3SXin LI
188*128836d3SXin LI // Remember if the line got overlong. If no other
189*128836d3SXin LI // errors occur, we return warn_overlong. It might
190*128836d3SXin LI // help in catching problematic strings.
191*128836d3SXin LI if (too_long)
192*128836d3SXin LI warn_overlong = TUKLIB_WRAP_WARN_OVERLONG;
193*128836d3SXin LI }
194*128836d3SXin LI
195*128836d3SXin LI // Handle the special character after the chunk of text.
196*128836d3SXin LI switch (*str) {
197*128836d3SXin LI case ' ':
198*128836d3SXin LI // Regular space.
199*128836d3SXin LI ++cur_col;
200*128836d3SXin LI ++pending_spaces;
201*128836d3SXin LI break;
202*128836d3SXin LI
203*128836d3SXin LI case '\v':
204*128836d3SXin LI // Set the alternative indentation settings.
205*128836d3SXin LI first_indent = opt->left2_margin;
206*128836d3SXin LI cont_indent = opt->left2_cont;
207*128836d3SXin LI
208*128836d3SXin LI if (first_indent > cur_col) {
209*128836d3SXin LI // Add one or more spaces to reach
210*128836d3SXin LI // the column specified in first_indent.
211*128836d3SXin LI pending_spaces += first_indent - cur_col;
212*128836d3SXin LI } else {
213*128836d3SXin LI // There is no room to add even one space
214*128836d3SXin LI // before reaching the column first_indent.
215*128836d3SXin LI pending_newline = true;
216*128836d3SXin LI pending_spaces = first_indent;
217*128836d3SXin LI }
218*128836d3SXin LI
219*128836d3SXin LI cur_col = first_indent;
220*128836d3SXin LI break;
221*128836d3SXin LI
222*128836d3SXin LI case '\0': // Implicit newline at the end of the string.
223*128836d3SXin LI case '\r': // Newline that also resets the effect of \v.
224*128836d3SXin LI case '\n': // Newline without resetting the indentation mode.
225*128836d3SXin LI if (putc('\n', outfile) == EOF)
226*128836d3SXin LI return TUKLIB_WRAP_ERR_IO;
227*128836d3SXin LI
228*128836d3SXin LI if (*str == '\0')
229*128836d3SXin LI return warn_overlong;
230*128836d3SXin LI
231*128836d3SXin LI if (*str == '\r') {
232*128836d3SXin LI first_indent = opt->left_margin;
233*128836d3SXin LI cont_indent = opt->left_cont;
234*128836d3SXin LI }
235*128836d3SXin LI
236*128836d3SXin LI pending_newline = false;
237*128836d3SXin LI pending_spaces = first_indent;
238*128836d3SXin LI cur_col = first_indent;
239*128836d3SXin LI break;
240*128836d3SXin LI }
241*128836d3SXin LI
242*128836d3SXin LI // Skip the specially-handled character.
243*128836d3SXin LI ++str;
244*128836d3SXin LI }
245*128836d3SXin LI }
246*128836d3SXin LI
247*128836d3SXin LI
248*128836d3SXin LI extern int
tuklib_wrapf(FILE * stream,const struct tuklib_wrap_opt * opt,const char * fmt,...)249*128836d3SXin LI tuklib_wrapf(FILE *stream, const struct tuklib_wrap_opt *opt,
250*128836d3SXin LI const char *fmt, ...)
251*128836d3SXin LI {
252*128836d3SXin LI va_list ap;
253*128836d3SXin LI char *buf;
254*128836d3SXin LI
255*128836d3SXin LI #ifdef HAVE_VASPRINTF
256*128836d3SXin LI va_start(ap, fmt);
257*128836d3SXin LI
258*128836d3SXin LI #ifdef __clang__
259*128836d3SXin LI # pragma GCC diagnostic push
260*128836d3SXin LI # pragma GCC diagnostic ignored "-Wformat-nonliteral"
261*128836d3SXin LI #endif
262*128836d3SXin LI const int n = vasprintf(&buf, fmt, ap);
263*128836d3SXin LI #ifdef __clang__
264*128836d3SXin LI # pragma GCC diagnostic pop
265*128836d3SXin LI #endif
266*128836d3SXin LI
267*128836d3SXin LI va_end(ap);
268*128836d3SXin LI if (n == -1)
269*128836d3SXin LI return TUKLIB_WRAP_ERR_FORMAT;
270*128836d3SXin LI #else
271*128836d3SXin LI // Fixed buffer size is dumb but in practice one shouldn't need
272*128836d3SXin LI // huge strings for *formatted* output. This simple method is safe
273*128836d3SXin LI // with pre-C99 vsnprintf() implementations too which don't return
274*128836d3SXin LI // the required buffer size (they return -1 or buf_size - 1) or
275*128836d3SXin LI // which might not null-terminate the buffer in case it's too small.
276*128836d3SXin LI const size_t buf_size = 128 * 1024;
277*128836d3SXin LI buf = malloc(buf_size);
278*128836d3SXin LI if (buf == NULL)
279*128836d3SXin LI return TUKLIB_WRAP_ERR_FORMAT;
280*128836d3SXin LI
281*128836d3SXin LI va_start(ap, fmt);
282*128836d3SXin LI const int n = vsnprintf(buf, buf_size, fmt, ap);
283*128836d3SXin LI va_end(ap);
284*128836d3SXin LI
285*128836d3SXin LI if (n <= 0 || n >= (int)(buf_size - 1)) {
286*128836d3SXin LI free(buf);
287*128836d3SXin LI return TUKLIB_WRAP_ERR_FORMAT;
288*128836d3SXin LI }
289*128836d3SXin LI #endif
290*128836d3SXin LI
291*128836d3SXin LI const int ret = tuklib_wraps(stream, opt, buf);
292*128836d3SXin LI free(buf);
293*128836d3SXin LI return ret;
294*128836d3SXin LI }
295