1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file tuklib_mbstr_wrap.c 6 /// \brief Word wraps a string and prints it to a FILE stream 7 /// 8 /// This depends on tuklib_mbstr_width.c. 9 // 10 // Author: Lasse Collin 11 // 12 /////////////////////////////////////////////////////////////////////////////// 13 14 #include "tuklib_mbstr.h" 15 #include "tuklib_mbstr_wrap.h" 16 #include <stdarg.h> 17 #include <stdlib.h> 18 #include <stdio.h> 19 #include <string.h> 20 21 22 extern int 23 tuklib_wraps(FILE *outfile, const struct tuklib_wrap_opt *opt, const char *str) 24 { 25 // left_cont may be less than left_margin. In that case, if the first 26 // word is extremely long, it will stay on the first line even if 27 // the line then gets overlong. 28 // 29 // On the other hand, left2_cont < left2_margin isn't allowed because 30 // it could result in inconsistent behavior when a very long word 31 // comes right after a \v. 32 // 33 // It is fine to have left2_margin < left_margin although it would be 34 // an odd use case. 35 if (!(opt->left_margin < opt->right_margin 36 && opt->left_cont < opt->right_margin 37 && opt->left2_margin <= opt->left2_cont 38 && opt->left2_cont < opt->right_margin)) 39 return TUKLIB_WRAP_ERR_OPT; 40 41 // This is set to TUKLIB_WRAP_WARN_OVERLONG if one or more 42 // output lines extend past opt->right_margin columns. 43 int warn_overlong = 0; 44 45 // Indentation of the first output line after \n or \r. 46 // \v sets this to opt->left2_margin. 47 // \r resets this back to the original value. 48 size_t first_indent = opt->left_margin; 49 50 // Indentation of the output lines that occur due to word wrapping. 51 // \v sets this to opt->left2_cont and \r back to the original value. 52 size_t cont_indent = opt->left_cont; 53 54 // If word wrapping occurs, the newline isn't printed unless more 55 // text would be put on the continuation line. This is also used 56 // when \v needs to start on a new line. 57 bool pending_newline = false; 58 59 // Spaces are printed only when there is something else to put 60 // after the spaces on the line. This avoids unwanted empty lines 61 // in the output and makes it possible to ignore possible spaces 62 // before a \v character. 63 size_t pending_spaces = first_indent; 64 65 // Current output column. When cur_col == pending_spaces, nothing 66 // has been actually printed to the current output line. 67 size_t cur_col = pending_spaces; 68 69 while (true) { 70 // Number of bytes until the *next* line-break opportunity. 71 size_t len = 0; 72 73 // Number of columns until the *next* line-break opportunity. 74 size_t width = 0; 75 76 // Text between a pair of \b characters is treated as 77 // an unbreakable block even if it contains spaces. 78 // It must not contain any control characters before 79 // the closing \b. 80 bool unbreakable = false; 81 82 while (true) { 83 // Find the next character that we handle specially. 84 // In an unbreakable block, search only for the 85 // closing \b; if missing, the unbreakable block 86 // extends to the end of the string. 87 const size_t n = strcspn(str + len, 88 unbreakable ? "\b" : " \t\n\r\v\b"); 89 90 // Calculate how many columns the characters need. 91 const size_t w = tuklib_mbstr_width_mem(str + len, n); 92 if (w == (size_t)-1) 93 return TUKLIB_WRAP_ERR_STR; 94 95 width += w; 96 len += n; 97 98 // \b isn't a line-break opportunity so it has to 99 // be handled here. For simplicity, empty blocks 100 // are treated as zero-width characters. 101 if (str[len] == '\b') { 102 ++len; 103 unbreakable = !unbreakable; 104 continue; 105 } 106 107 break; 108 } 109 110 // Determine if adding this chunk of text would make the 111 // current output line exceed opt->right_margin columns. 112 const bool too_long = cur_col + width > opt->right_margin; 113 114 // Wrap the line if needed. However: 115 // 116 // - Don't wrap if the current column is less than where 117 // the continuation line would begin. In that case 118 // the chunk wouldn't fit on the next line either so 119 // we just have to produce an overlong line. 120 // 121 // - Don't wrap if so far the line only contains spaces. 122 // Wrapping in that case would leave a weird empty line. 123 // NOTE: This "only contains spaces" condition is the 124 // reason why left2_margin > left2_cont isn't allowed. 125 if (too_long && cur_col > cont_indent 126 && cur_col > pending_spaces) { 127 // There might be trailing spaces or zero-width spaces 128 // which need to be ignored to keep the output pretty. 129 // 130 // Spaces need to be ignored because in some 131 // writing styles there are two spaces after 132 // a full stop. Example string: 133 // 134 // "Foo bar. Abc def." 135 // ^ 136 // If the first space after the first full stop 137 // triggers word wrapping, both spaces must be 138 // ignored. Otherwise the next line would be 139 // indented too much. 140 // 141 // Zero-width spaces are ignored the same way 142 // because they are meaningless if an adjacent 143 // character is a space. 144 while (*str == ' ' || *str == '\t') 145 ++str; 146 147 // Don't print the newline here; only mark it as 148 // pending. This avoids an unwanted empty line if 149 // there is a \n or \r or \0 after the spaces have 150 // been ignored. 151 pending_newline = true; 152 pending_spaces = cont_indent; 153 cur_col = pending_spaces; 154 155 // Since str may have been incremented due to the 156 // ignored spaces, the loop needs to be restarted. 157 continue; 158 } 159 160 // Print the current chunk of text before the next 161 // line-break opportunity. If the chunk was empty, 162 // don't print anything so that the pending newline 163 // and pending spaces aren't printed on their own. 164 if (len > 0) { 165 if (pending_newline) { 166 pending_newline = false; 167 if (putc('\n', outfile) == EOF) 168 return TUKLIB_WRAP_ERR_IO; 169 } 170 171 while (pending_spaces > 0) { 172 if (putc(' ', outfile) == EOF) 173 return TUKLIB_WRAP_ERR_IO; 174 175 --pending_spaces; 176 } 177 178 for (size_t i = 0; i < len; ++i) { 179 // Ignore unbreakable block characters (\b). 180 const int c = (unsigned char)str[i]; 181 if (c != '\b' && putc(c, outfile) == EOF) 182 return TUKLIB_WRAP_ERR_IO; 183 } 184 185 str += len; 186 cur_col += width; 187 188 // Remember if the line got overlong. If no other 189 // errors occur, we return warn_overlong. It might 190 // help in catching problematic strings. 191 if (too_long) 192 warn_overlong = TUKLIB_WRAP_WARN_OVERLONG; 193 } 194 195 // Handle the special character after the chunk of text. 196 switch (*str) { 197 case ' ': 198 // Regular space. 199 ++cur_col; 200 ++pending_spaces; 201 break; 202 203 case '\v': 204 // Set the alternative indentation settings. 205 first_indent = opt->left2_margin; 206 cont_indent = opt->left2_cont; 207 208 if (first_indent > cur_col) { 209 // Add one or more spaces to reach 210 // the column specified in first_indent. 211 pending_spaces += first_indent - cur_col; 212 } else { 213 // There is no room to add even one space 214 // before reaching the column first_indent. 215 pending_newline = true; 216 pending_spaces = first_indent; 217 } 218 219 cur_col = first_indent; 220 break; 221 222 case '\0': // Implicit newline at the end of the string. 223 case '\r': // Newline that also resets the effect of \v. 224 case '\n': // Newline without resetting the indentation mode. 225 if (putc('\n', outfile) == EOF) 226 return TUKLIB_WRAP_ERR_IO; 227 228 if (*str == '\0') 229 return warn_overlong; 230 231 if (*str == '\r') { 232 first_indent = opt->left_margin; 233 cont_indent = opt->left_cont; 234 } 235 236 pending_newline = false; 237 pending_spaces = first_indent; 238 cur_col = first_indent; 239 break; 240 } 241 242 // Skip the specially-handled character. 243 ++str; 244 } 245 } 246 247 248 extern int 249 tuklib_wrapf(FILE *stream, const struct tuklib_wrap_opt *opt, 250 const char *fmt, ...) 251 { 252 va_list ap; 253 char *buf; 254 255 #ifdef HAVE_VASPRINTF 256 va_start(ap, fmt); 257 258 #ifdef __clang__ 259 # pragma GCC diagnostic push 260 # pragma GCC diagnostic ignored "-Wformat-nonliteral" 261 #endif 262 const int n = vasprintf(&buf, fmt, ap); 263 #ifdef __clang__ 264 # pragma GCC diagnostic pop 265 #endif 266 267 va_end(ap); 268 if (n == -1) 269 return TUKLIB_WRAP_ERR_FORMAT; 270 #else 271 // Fixed buffer size is dumb but in practice one shouldn't need 272 // huge strings for *formatted* output. This simple method is safe 273 // with pre-C99 vsnprintf() implementations too which don't return 274 // the required buffer size (they return -1 or buf_size - 1) or 275 // which might not null-terminate the buffer in case it's too small. 276 const size_t buf_size = 128 * 1024; 277 buf = malloc(buf_size); 278 if (buf == NULL) 279 return TUKLIB_WRAP_ERR_FORMAT; 280 281 va_start(ap, fmt); 282 const int n = vsnprintf(buf, buf_size, fmt, ap); 283 va_end(ap); 284 285 if (n <= 0 || n >= (int)(buf_size - 1)) { 286 free(buf); 287 return TUKLIB_WRAP_ERR_FORMAT; 288 } 289 #endif 290 291 const int ret = tuklib_wraps(stream, opt, buf); 292 free(buf); 293 return ret; 294 } 295