1 // SPDX-License-Identifier: 0BSD
2
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file tuklib_mbstr_width.c
6 /// \brief Calculate width of a multibyte string
7 //
8 // Author: Lasse Collin
9 //
10 ///////////////////////////////////////////////////////////////////////////////
11
12 #include "tuklib_mbstr.h"
13 #include <string.h>
14
15 #ifdef HAVE_MBRTOWC
16 # include <wchar.h>
17 #endif
18
19
20 extern size_t
tuklib_mbstr_width(const char * str,size_t * bytes)21 tuklib_mbstr_width(const char *str, size_t *bytes)
22 {
23 const size_t len = strlen(str);
24 if (bytes != NULL)
25 *bytes = len;
26
27 return tuklib_mbstr_width_mem(str, len);
28 }
29
30
31 extern size_t
tuklib_mbstr_width_mem(const char * str,size_t len)32 tuklib_mbstr_width_mem(const char *str, size_t len)
33 {
34 #ifndef HAVE_MBRTOWC
35 // In single-byte mode, the width of the string is the same
36 // as its length.
37 (void)str;
38 return len;
39
40 #else
41 mbstate_t state;
42 memset(&state, 0, sizeof(state));
43
44 size_t width = 0;
45 size_t i = 0;
46
47 // Convert one multibyte character at a time to wchar_t
48 // and get its width using wcwidth().
49 while (i < len) {
50 wchar_t wc;
51 const size_t ret = mbrtowc(&wc, str + i, len - i, &state);
52 if (ret < 1 || ret > len - i)
53 return (size_t)-1;
54
55 i += ret;
56
57 #ifdef HAVE_WCWIDTH
58 const int wc_width = wcwidth(wc);
59 if (wc_width < 0)
60 return (size_t)-1;
61
62 width += (size_t)wc_width;
63 #else
64 // Without wcwidth() (like in a native Windows build),
65 // assume that one multibyte char == one column. With
66 // UTF-8, this is less bad than one byte == one column.
67 // This way quite a few languages will be handled correctly
68 // in practice; CJK chars will be very wrong though.
69 ++width;
70 #endif
71 }
72
73 // It's good to check that the string ended in the initial state.
74 // However, in practice this is redundant:
75 //
76 // - No one will use this code with character sets that have
77 // locking shift states.
78 //
79 // - We already checked that mbrtowc() didn't return (size_t)-2
80 // which would indicate a partial multibyte character.
81 if (!mbsinit(&state))
82 return (size_t)-1;
83
84 return width;
85 #endif
86 }
87