1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file tuklib_mbstr_width.c 6 /// \brief Calculate width of a multibyte string 7 // 8 // Author: Lasse Collin 9 // 10 /////////////////////////////////////////////////////////////////////////////// 11 12 #include "tuklib_mbstr.h" 13 #include <string.h> 14 15 #ifdef HAVE_MBRTOWC 16 # include <wchar.h> 17 #endif 18 19 20 extern size_t 21 tuklib_mbstr_width(const char *str, size_t *bytes) 22 { 23 const size_t len = strlen(str); 24 if (bytes != NULL) 25 *bytes = len; 26 27 return tuklib_mbstr_width_mem(str, len); 28 } 29 30 31 extern size_t 32 tuklib_mbstr_width_mem(const char *str, size_t len) 33 { 34 #ifndef HAVE_MBRTOWC 35 // In single-byte mode, the width of the string is the same 36 // as its length. 37 (void)str; 38 return len; 39 40 #else 41 mbstate_t state; 42 memset(&state, 0, sizeof(state)); 43 44 size_t width = 0; 45 size_t i = 0; 46 47 // Convert one multibyte character at a time to wchar_t 48 // and get its width using wcwidth(). 49 while (i < len) { 50 wchar_t wc; 51 const size_t ret = mbrtowc(&wc, str + i, len - i, &state); 52 if (ret < 1 || ret > len - i) 53 return (size_t)-1; 54 55 i += ret; 56 57 #ifdef HAVE_WCWIDTH 58 const int wc_width = wcwidth(wc); 59 if (wc_width < 0) 60 return (size_t)-1; 61 62 width += (size_t)wc_width; 63 #else 64 // Without wcwidth() (like in a native Windows build), 65 // assume that one multibyte char == one column. With 66 // UTF-8, this is less bad than one byte == one column. 67 // This way quite a few languages will be handled correctly 68 // in practice; CJK chars will be very wrong though. 69 ++width; 70 #endif 71 } 72 73 // It's good to check that the string ended in the initial state. 74 // However, in practice this is redundant: 75 // 76 // - No one will use this code with character sets that have 77 // locking shift states. 78 // 79 // - We already checked that mbrtowc() didn't return (size_t)-2 80 // which would indicate a partial multibyte character. 81 if (!mbsinit(&state)) 82 return (size_t)-1; 83 84 return width; 85 #endif 86 } 87