xref: /freebsd/contrib/xz/src/common/tuklib_mbstr_width.c (revision 3b35e7ee8de9b0260149a2b77e87a2b9c7a36244)
1*3b35e7eeSXin LI // SPDX-License-Identifier: 0BSD
2*3b35e7eeSXin LI 
3e0f0e66dSMartin Matuska ///////////////////////////////////////////////////////////////////////////////
4e0f0e66dSMartin Matuska //
5a8675d92SXin LI /// \file       tuklib_mbstr_width.c
6e0f0e66dSMartin Matuska /// \brief      Calculate width of a multibyte string
7e0f0e66dSMartin Matuska //
8e0f0e66dSMartin Matuska //  Author:     Lasse Collin
9e0f0e66dSMartin Matuska //
10e0f0e66dSMartin Matuska ///////////////////////////////////////////////////////////////////////////////
11e0f0e66dSMartin Matuska 
12e0f0e66dSMartin Matuska #include "tuklib_mbstr.h"
13a8675d92SXin LI #include <string.h>
14e0f0e66dSMartin Matuska 
15e0f0e66dSMartin Matuska #if defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
16e0f0e66dSMartin Matuska #	include <wchar.h>
17e0f0e66dSMartin Matuska #endif
18e0f0e66dSMartin Matuska 
19e0f0e66dSMartin Matuska 
20e0f0e66dSMartin Matuska extern size_t
21e0f0e66dSMartin Matuska tuklib_mbstr_width(const char *str, size_t *bytes)
22e0f0e66dSMartin Matuska {
23e0f0e66dSMartin Matuska 	const size_t len = strlen(str);
24e0f0e66dSMartin Matuska 	if (bytes != NULL)
25e0f0e66dSMartin Matuska 		*bytes = len;
26e0f0e66dSMartin Matuska 
27e0f0e66dSMartin Matuska #if !(defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH))
28e0f0e66dSMartin Matuska 	// In single-byte mode, the width of the string is the same
29e0f0e66dSMartin Matuska 	// as its length.
30e0f0e66dSMartin Matuska 	return len;
31e0f0e66dSMartin Matuska 
32e0f0e66dSMartin Matuska #else
33e0f0e66dSMartin Matuska 	mbstate_t state;
34e0f0e66dSMartin Matuska 	memset(&state, 0, sizeof(state));
35e0f0e66dSMartin Matuska 
36e0f0e66dSMartin Matuska 	size_t width = 0;
37e0f0e66dSMartin Matuska 	size_t i = 0;
38e0f0e66dSMartin Matuska 
39e0f0e66dSMartin Matuska 	// Convert one multibyte character at a time to wchar_t
40e0f0e66dSMartin Matuska 	// and get its width using wcwidth().
41e0f0e66dSMartin Matuska 	while (i < len) {
42e0f0e66dSMartin Matuska 		wchar_t wc;
43e0f0e66dSMartin Matuska 		const size_t ret = mbrtowc(&wc, str + i, len - i, &state);
44e0f0e66dSMartin Matuska 		if (ret < 1 || ret > len)
45e0f0e66dSMartin Matuska 			return (size_t)-1;
46e0f0e66dSMartin Matuska 
47e0f0e66dSMartin Matuska 		i += ret;
48e0f0e66dSMartin Matuska 
49e0f0e66dSMartin Matuska 		const int wc_width = wcwidth(wc);
50e0f0e66dSMartin Matuska 		if (wc_width < 0)
51e0f0e66dSMartin Matuska 			return (size_t)-1;
52e0f0e66dSMartin Matuska 
53a8675d92SXin LI 		width += (size_t)wc_width;
54e0f0e66dSMartin Matuska 	}
55e0f0e66dSMartin Matuska 
56e0f0e66dSMartin Matuska 	// Require that the string ends in the initial shift state.
57e0f0e66dSMartin Matuska 	// This way the caller can be combine the string with other
58e0f0e66dSMartin Matuska 	// strings without needing to worry about the shift states.
59e0f0e66dSMartin Matuska 	if (!mbsinit(&state))
60e0f0e66dSMartin Matuska 		return (size_t)-1;
61e0f0e66dSMartin Matuska 
62e0f0e66dSMartin Matuska 	return width;
63e0f0e66dSMartin Matuska #endif
64e0f0e66dSMartin Matuska }
65